/* BEGIN_ICS_COPYRIGHT4 **************************************** Copyright (c) 2015, Intel Corporation Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ** END_ICS_COPYRIGHT4 ****************************************/ // Public header file #include "ib_cm.h" // Private header file #include "cm_private.h" ////////////////////////////////////////////////////////////////////////// // Connect // // This routine is called by CmConnect() and CmConnectPeer() // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS. // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL with ListLock held // FSTATUS Connect( CM_CEP_OBJECT* pCEP, const CM_REQUEST_INFO* pConnectRequest, boolean bPeer, // TRUE if peer connect, FALSE otherwise PFN_CM_CALLBACK pfnConnectCB, void* Context ) { FSTATUS Status=FSUCCESS; IB_PORT_ATTRIBUTES *pPortAttr=NULL; uint32 pktlifetime=0; uint64 pktlifetime_us=0; uint32 req_timeout_ms=0; uint64 turnaround_time_us; IB_CA_ATTRIBUTES *pCaAttr = NULL; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, Connect); DEBUG_ASSERT(pCEP->State == CMS_IDLE); if (pCEP->State != CMS_IDLE) { _DBG_ERROR((" invalid State for Connect!!! <%d>\n", _DBG_PTR(pCEP), pCEP->State)); Status = FINVALID_PARAMETER; goto fail_type; } if (pCEP->Type != CM_RC_TYPE && pCEP->Type != CM_UC_TYPE && pCEP->Type != CM_RD_TYPE ) { _DBG_WARN((" invalid Type for Connect!!! <%d>\n", _DBG_PTR(pCEP), pCEP->Type)); Status = FINVALID_PARAMETER; goto fail_type; } if (pConnectRequest->CEPInfo.QPN == 0 && pConnectRequest->CEPInfo.LocalEECN == 0) { _DBG_WARN((" invalid QPN/EECN for Connect!!! \n", _DBG_PTR(pCEP))); Status = FINVALID_PARAMETER; goto fail_type; } if (pConnectRequest->SID == 0) { _DBG_WARN((" invalid SID for Connect!!! <%"PRIx64">\n", _DBG_PTR(pCEP), pConnectRequest->SID)); Status = FINVALID_PARAMETER; goto fail_type; } // Set up addressing information pCEP->LocalEndPoint.CaGUID = pConnectRequest->CEPInfo.CaGUID; pCEP->bPeer = bPeer; // Set up Bind address, especially important for Peer connect // which will end up on ListenMap which is keyed by // SID, Local&Remote LID&GID, EndPoint and Private Data Discriminator pCEP->SID = pConnectRequest->SID; CopyPathInfoToCepPath(&pCEP->PrimaryPath, &pConnectRequest->PathInfo, pConnectRequest->CEPInfo.AckTimeout); Status = iba_find_ca_gid2(&pConnectRequest->PathInfo.Path.SGID, pConnectRequest->CEPInfo.CaGUID, &pCEP->PrimaryPath.LocalPortGuid, &pCEP->PrimaryPath.LocalGidIndex, &pPortAttr, &pCaAttr); if (Status != FSUCCESS) { _DBG_WARN((" FindCaGid() failed!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); Status = FINVALID_PARAMETER; goto fail_port; } // as a safety feature we quietly downgrade the RDMA Read resources to // match the CA capabilities if (pCEP->Type == CM_RC_TYPE) { pCEP->LocalInitiatorDepth = MIN(pCaAttr->MaxQPInitiatorDepth, pConnectRequest->CEPInfo.OfferedInitiatorDepth); pCEP->LocalResponderResources = MIN(pCaAttr->MaxQPResponderResources, pConnectRequest->CEPInfo.OfferedResponderResources); } else if (pCEP->Type == CM_RD_TYPE) { pCEP->LocalInitiatorDepth = MIN(pCaAttr->MaxEECInitiatorDepth, pConnectRequest->CEPInfo.OfferedInitiatorDepth); pCEP->LocalResponderResources = MIN(pCaAttr->MaxEECResponderResources, pConnectRequest->CEPInfo.OfferedResponderResources); } else { pCEP->LocalInitiatorDepth = 0; pCEP->LocalResponderResources = 0; } if (! LookupPKey(pPortAttr, pConnectRequest->PathInfo.Path.P_Key, FALSE, &pCEP->PrimaryPath.PkeyIndex)) { _DBG_WARN((" LookupPKey() failed!!!\n", _DBG_PTR(pCEP))); Status = FINVALID_PARAMETER; goto fail_port; } pCEP->PrimaryPath.LocalPathBits = LidToPathBits(pCEP->PrimaryPath.LocalLID, pPortAttr->Address.LMC); MemoryDeallocate(pPortAttr); pPortAttr = NULL; if (pConnectRequest->AlternatePathInfo.Path.SLID != 0) { if (pConnectRequest->PathInfo.Path.P_Key != pConnectRequest->AlternatePathInfo.Path.P_Key) { Status = FINVALID_PARAMETER; goto fail_port; } CopyPathInfoToCepPath(&pCEP->AlternatePath, &pConnectRequest->AlternatePathInfo, pConnectRequest->CEPInfo.AlternateAckTimeout); Status = iba_find_ca_gid(&pConnectRequest->AlternatePathInfo.Path.SGID, pConnectRequest->CEPInfo.CaGUID, &pCEP->AlternatePath.LocalPortGuid, &pCEP->AlternatePath.LocalGidIndex, &pPortAttr, NULL); if (Status != FSUCCESS) { _DBG_WARN((" Alternate FindCaGid() failed!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); Status = FINVALID_PARAMETER; goto fail_port; } if (! LookupPKey(pPortAttr, pConnectRequest->AlternatePathInfo.Path.P_Key, FALSE, &pCEP->AlternatePath.PkeyIndex)) { _DBG_WARN((" Alternate LookupPKey() failed!!!\n", _DBG_PTR(pCEP))); Status = FINVALID_PARAMETER; goto fail_port; } pCEP->AlternatePath.LocalPathBits = LidToPathBits(pCEP->AlternatePath.LocalLID, pPortAttr->Address.LMC); MemoryDeallocate(pPortAttr); pPortAttr = NULL; } else { MemoryClear(&pCEP->AlternatePath, sizeof(pCEP->AlternatePath)); } pCEP->Mtu = pConnectRequest->PathInfo.Path.Mtu; pCEP->PartitionKey = pConnectRequest->PathInfo.Path.P_Key; pCEP->TargetAckDelay = pCaAttr->LocalCaAckDelay; // best guess to start MemoryDeallocate(pCaAttr); pCaAttr = NULL; // save info for later pCEP->LocalRetryCount = pConnectRequest->CEPInfo.RetryCount; pCEP->LocalRecvPSN= pConnectRequest->CEPInfo.StartingPSN; // Bind the local endpoint addr to the object pCEP->LocalEndPoint.QPN = pConnectRequest->CEPInfo.QPN; pCEP->LocalEndPoint.EECN = pConnectRequest->CEPInfo.LocalEECN; pCEP->RemoteEndPoint.QPN = pCEP->RemoteEndPoint.EECN = 0; // make sure LocalEndPoint is unique if (! CM_MapTryInsert(&gCM->LocalEndPointMap, (uintn)pCEP, &pCEP->LocalEndPointMapEntry, "LOCAL_END_POINT_LIST", pCEP)) { Status = FCM_ADDR_INUSE; goto fail_endpoint; } // For peer connection, put it onto the LISTEN list // this also makes sure the bound listen address is unique // Once we associate the CEP with a remote end point, we will remove the CEP // from the LISTEN list. For the passive end, we will put it onto the // PENDING list to facilitate CmAccept issuing REP if (bPeer && ! CM_MapTryInsert(&gCM->ListenMap, (uintn)pCEP, &pCEP->MapEntry, "LISTEN_LIST", pCEP)) { Status = FCM_ADDR_INUSE; goto fail; } pCEP->EventFlags = 0; pCEP->RetryCount = 0; DListInit(&pCEP->PendingList); pCEP->Mode = CM_MODE_ACTIVE; pCEP->bFailoverSupported = TRUE; // assume the best until we know better // The comm ID is set here so that each connect request is unique since // user can reuse the object. This prevents duplicate msg arriving for // previous connect request using the same object. AssignLocalCommID(pCEP); pCEP->TransactionID = (pCEP->LocalCommID)<<8; _DBG_INFO((" Comm ID set at .\n", _DBG_PTR(pCEP), pCEP->LocalCommID)); pCEP->QKey = pConnectRequest->CEPInfo.QKey; pktlifetime = pConnectRequest->PathInfo.Path.PktLifeTime; if (pktlifetime > 31) pktlifetime = 31; pCEP->PktLifeTime = pktlifetime; pCEP->Timewait = pConnectRequest->CEPInfo.AckTimeout; if (pCEP->Timewait < pCEP->PktLifeTime+1) { // round up Timewait to at least be 2*PktLifeTime // TBD could add in LocalCaAckDelay and adjust // AckTimeout as well pCEP->Timewait = pCEP->PktLifeTime+1; // log2 so +1 is *2 } // Round-trip time in usec pktlifetime_us = TimeoutMultToTimeInUsec(pktlifetime); // turnaround_time_us should include CM and appl software time and // the hfi hardware Ca Ack Delay if (pCEP->turnaround_time_us) { turnaround_time_us = pCEP->turnaround_time_us; // CEP specific time } else { turnaround_time_us = gCM->turnaround_time_us; // use heuristic } // The remote timeout is used by the remote CM to determine if it should // send an MRA, in theory turnaround_time_us should be anticipated // turnaround time of remote end. pCEP->RemoteCMTimeout = TimeoutTimeToMult(turnaround_time_us); // The local timeout is used to timeout all waits for responses // from request packets (REQ, REP, LAP, DREQ), it is the time a remote // CM can expect our local CM to respond within (including fabric time) pCEP->LocalCMTimeout = TimeoutTimeToMult(turnaround_time_us + pktlifetime_us*2); pCEP->MaxCMRetries = gCmParams.MaxReqRetry; _DBG_INFO((" Timeout setting .\n", _DBG_PTR(pCEP), pCEP->PktLifeTime, pktlifetime_us*2, turnaround_time_us, pCEP->LocalCMTimeout, pCEP->RemoteCMTimeout)); // Save user callback info pCEP->pfnUserCallback = pfnConnectCB; pCEP->UserContext = Context; // If CEP is being reused, it may have a send still in progress or it // may not have a Dgrm any longer if (pCEP->pDgrmElement && CmDgrmIsInUse(pCEP->pDgrmElement)) { _DBG_INFO((" Releasing the dgrm .\n", _DBG_PTR(pCEP), _DBG_PTR(pCEP->pDgrmElement))); CmDgrmSetReleaseFlag(pCEP->pDgrmElement); pCEP->pDgrmElement = NULL; } if (pCEP->pDgrmElement == NULL) { pCEP->pDgrmElement = CmDgrmGet(); if (!pCEP->pDgrmElement) { _DBG_WARNING((" Connection attempt failed: Out of CM Dgrms!!!\n", _DBG_PTR(pCEP))); Status = FINSUFFICIENT_RESOURCES; goto fail_dgrm; } } DEBUG_ASSERT(pCEP->pDgrmElement->Element.pBufferList->ByteCount == MAD_BLOCK_SIZE); if (pCEP->pDgrmElement->Element.pBufferList->ByteCount != MAD_BLOCK_SIZE) { _DBG_ERROR((" Disgarding Corrupt Datagram!!! pDgrmElement = %p, pCEP->pDgrmElement->Element.pBufferList = %p, pCEP->pDgrmElement->Element.pBufferList->ByteCount = %d\n", _DBG_PTR(pCEP), _DBG_PTR(pCEP->pDgrmElement), _DBG_PTR(pCEP->pDgrmElement->Element.pBufferList), pCEP->pDgrmElement->Element.pBufferList->ByteCount)); Status = FINSUFFICIENT_RESOURCES; pCEP->pDgrmElement = NULL; goto fail_dgrm; } // Format the REQ msg FormatREQ((CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement), pConnectRequest, pCEP->TransactionID, pCEP->LocalCommID, QP1_WELL_KNOWN_Q_KEY, pCEP->Type, pCEP->LocalCMTimeout, pCEP->RemoteCMTimeout, pCEP->MaxCMRetries, pCEP->LocalInitiatorDepth, pCEP->LocalResponderResources); // Setup the dgrm's dest addr _DBG_INFO((" REQ's dgrm address field \n", _DBG_PTR(pCEP), pCEP->PrimaryPath.LocalPortGuid, pCEP->PrimaryPath.RemoteLID, pCEP->PrimaryPath.SL, pCEP->PrimaryPath.LocalPathBits, pCEP->PrimaryPath.StaticRate)); CmDgrmAddrSet(pCEP->pDgrmElement, pCEP->PrimaryPath.LocalPortGuid, pCEP->PrimaryPath.RemoteLID, pCEP->PrimaryPath.SL, pCEP->PrimaryPath.LocalPathBits, pCEP->PrimaryPath.StaticRate, (! pCEP->PrimaryPath.bSubnetLocal), pCEP->PrimaryPath.RemoteGID, pCEP->PrimaryPath.FlowLabel, pCEP->PrimaryPath.LocalGidIndex, pCEP->PrimaryPath.HopLimit, (uint8)pCEP->PrimaryPath.TClass, pCEP->PrimaryPath.PkeyIndex); // Pass the REQ down to GSA. // At this point, we may get the send or receive complete callback but the lock // will prevent nested call. // Also, we update the state and start the timer here instead of in the send callback // because we assume the send is done here (i.e. unreliable send) Status = CmDgrmSend(pCEP->pDgrmElement); if (Status != FSUCCESS) { _DBG_WARN((" DgrmSend() failed for REQ!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); // fall through and let timer retry the send later Status = FSUCCESS; } else { AtomicIncrementVoid(&gCM->Sent.Req); } _DBG_INFO((" REQ sent for .\n", _DBG_PTR(pCEP), pCEP->LocalCommID, pCEP->PrimaryPath.LocalLID, pCEP->PrimaryPath.RemoteLID)); Status = FPENDING; CepSetState(pCEP, CMS_REQ_SENT); // Start the REQ timer. Use the Local CM timeout. req_timeout_ms = TimeoutMultToTimeInMs(pCEP->LocalCMTimeout); CmTimerStart(pCEP, req_timeout_ms, REQ_TIMER); _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; fail_dgrm: if (bPeer) CM_MapRemoveEntry(&gCM->ListenMap, &pCEP->MapEntry, LISTEN_LIST, pCEP); pCEP->Mode = CM_MODE_NONE; fail: CM_MapRemoveEntry(&gCM->LocalEndPointMap, &pCEP->LocalEndPointMapEntry, LOCAL_END_POINT_LIST, pCEP); fail_endpoint: fail_port: if (pCaAttr != NULL) MemoryDeallocate(pCaAttr); if (pPortAttr != NULL) MemoryDeallocate(pPortAttr); // cleanup CEP pCEP->bPeer = FALSE; pCEP->SID = 0; MemoryClear(&pCEP->PrimaryPath, sizeof(pCEP->PrimaryPath)); MemoryClear(&pCEP->AlternatePath, sizeof(pCEP->AlternatePath)); MemoryClear(&pCEP->LocalEndPoint, sizeof(pCEP->LocalEndPoint)); MemoryClear(&pCEP->RemoteEndPoint, sizeof(pCEP->RemoteEndPoint)); pCEP->Mtu = 0; pCEP->PartitionKey = 0; pCEP->LocalInitiatorDepth = 0; pCEP->LocalResponderResources = 0; pCEP->LocalRetryCount = 0; pCEP->LocalRecvPSN = 0; pCEP->TargetAckDelay = 0; fail_type: _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // Connect() ////////////////////////////////////////////////////////////////////////// // GetConnInfo // Supports Connection Info (for use in preparing callback arguments) // for both Active and Passive sides. Where there are differences between // active and passive behaviours, asserts or tests of pCEP->Mode are used // // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS or FTIMEOUT. // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. with ListLock held // FSTATUS GetConnInfo( IN CM_CEP_OBJECT* pCEP, OUT CM_CONN_INFO* pConnInfo ) { FSTATUS Status=FSUCCESS; boolean bKeepQueued = FALSE; // should additional queued events be kept CM_CEP_OBJECT* pCEPEntry = NULL; DLIST_ENTRY* pListEntry = NULL; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, GetConnInfo); pConnInfo->Status = 0; // Find out what state switch (pCEP->State) { case CMS_IDLE: if (BitTest(pCEP->EventFlags, SYSE_DISCONNECT)) { // CA has been removed BitClear(pCEP->EventFlags, SYSE_DISCONNECT); if (pCEP->bTimewaitCallback) pConnInfo->Status = FCM_CA_REMOVED; else pConnInfo->Status = FCM_DISCONNECTED; // legacy API } else if (BitTest(pCEP->EventFlags, USRE_CANCEL)) { // user has cancelled listen, sidr_req or connect attempt BitClear(pCEP->EventFlags, USRE_CANCEL); pConnInfo->Status = FCM_CONNECT_CANCEL; } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_REQ)) { // We timed out on the REQ msg i.e. no REP/REJ msg received. ASSERT(pCEP->Mode == CM_MODE_ACTIVE); BitClear(pCEP->EventFlags, CME_TIMEOUT_REQ); pConnInfo->Status = FCM_CONNECT_TIMEOUT; } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_REP)) { // We timed out while waiting for the RTU to arrive ASSERT(pCEP->Mode == CM_MODE_PASSIVE); BitClear(pCEP->EventFlags, CME_TIMEOUT_REP); pConnInfo->Status = FCM_CONNECT_TIMEOUT; } else if (BitTest(pCEP->EventFlags, CME_RCVD_REJ)) { // a REJ was received BitClear(pCEP->EventFlags, CME_RCVD_REJ); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FCM_CONNECT_REJECT; bKeepQueued = TRUE; /* for RC_STALE, CME_TIMEOUT_TIMEWAIT queued */ } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_DREQ)) { // We timed out on the DREQ msg i.e. no DREP msg received. BitClear(pCEP->EventFlags, CME_TIMEOUT_DREQ); // FCM_DISCONNECTED is same value as FCM_DISCONNECT_TIMEOUT pConnInfo->Status = FCM_DISCONNECTED; } else if (BitTest(pCEP->EventFlags, CME_RCVD_DREP)) { // received a DREP BitClear(pCEP->EventFlags, CME_RCVD_DREP); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FCM_DISCONNECT_REPLY; bKeepQueued = TRUE; /* leave CME_TIMEOUT_TIMEWAIT queued */ } else if (BitTest(pCEP->EventFlags, CME_RCVD_DREQ)) { // We received a DREQ but the user already sent a DREP // odd sequence, user must have done CmDisconnect with a reply // and it raced with a receipt of a DREQ BitClear(pCEP->EventFlags, CME_RCVD_DREQ); pConnInfo->Status = FCM_DISCONNECTED; } else if (BitTest(pCEP->EventFlags, CME_SIM_RCVD_DREQ)) { // We received a REJ but the user was already issuing a Disconnect // odd sequence, user must have done CmDisconnect with a reply // and it raced with a receipt of a REJ BitClear(pCEP->EventFlags, CME_SIM_RCVD_DREQ); pConnInfo->Status = FCM_DISCONNECTED; } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_TIMEWAIT)) { // Timewait state completed BitClear(pCEP->EventFlags, CME_TIMEOUT_TIMEWAIT); if (pCEP->bTimewaitCallback) { pConnInfo->Status = FCM_DISCONNECTED; } else { Status = FERROR; } } else if (BitTest(pCEP->EventFlags, CME_RCVD_SIDR_RESP)) { // SIDR_RESP was received ASSERT(pCEP->Mode == CM_MODE_ACTIVE); BitClear(pCEP->EventFlags, CME_RCVD_SIDR_RESP); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); if (pConnInfo->Info.SidrResponse.Status == SRS_VALID_QPN) pConnInfo->Status = FSIDR_RESPONSE; else pConnInfo->Status = FSIDR_RESPONSE_ERR; } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_SIDR_REQ)) { // We timed out while waiting for a response to our sidr req ASSERT(pCEP->Mode == CM_MODE_ACTIVE); BitClear(pCEP->EventFlags, CME_TIMEOUT_SIDR_REQ); pConnInfo->Status = FSIDR_REQUEST_TIMEOUT; } else { Status = FERROR; } break; case CMS_LISTEN: case CMS_REQ_RCVD: case CMS_MRA_REQ_SENT: // We received a client connect or peer connect request. ASSERT(pCEP->Mode == CM_MODE_PASSIVE); //ASSERT(BitTest(pCEP->EventFlags, CME_RCVD_REQ)); BitClear(pCEP->EventFlags, CME_RCVD_REQ); if (pCEP->State == CMS_LISTEN) { _DBG_INFO((" *** We received a connect request ***\n", _DBG_PTR(pCEP))); } else { _DBG_INFO((" *** We received a peer request ***\n", _DBG_PTR(pCEP))); } // Get the new CEP object from the pending list if (DListIsEmpty(&pCEP->PendingList)) { _DBG_WARNING((" Pending list is empty!\n", _DBG_PTR(pCEP))); Status = FERROR; break; } pListEntry = DListGetNext(&pCEP->PendingList); pCEPEntry = PARENT_STRUCT(pListEntry, CM_CEP_OBJECT, PendingListEntry); // Make sure this is not the same one we returned previously. This can // happen if another request arrived before the user accept /* if (BitTest(pCEPEntry->EventFlags, USRE_WAIT)) { _DBG_WARNING((" Object is in signaled state but user is still processing the current event!\n", _DBG_PTR(pCEP))); // The user is still processing the current one. To maintain FIFO order, // we cannot notify the user of the new request until the current one is // accepted, rejected or timed out. Status = FERROR; //bWaitAgain = TRUE; break; } // Mark it so that we dont return this again in another wait() // We clear this in AcceptP() or RejectP() //BitSet(pCEPEntry->EventFlags, USRE_WAIT); */ CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEPEntry->pDgrmElement)); // port on which REQ arrived pConnInfo->Info.Request.CEPInfo.PortGUID = pCEPEntry->pDgrmElement->PortGuid; // when we processed REQ, we negotiated these down based on // CA capabilities pConnInfo->Info.Request.CEPInfo.OfferedInitiatorDepth = pCEPEntry->LocalResponderResources; pConnInfo->Info.Request.CEPInfo.OfferedResponderResources = pCEPEntry->LocalInitiatorDepth; pConnInfo->Status = FCM_CONNECT_REQUEST; break; case CMS_REP_RCVD: case CMS_MRA_REP_SENT: ASSERT(pCEP->Mode == CM_MODE_ACTIVE); // We received a reply to our connect request if (BitTest(pCEP->EventFlags, CME_RCVD_REP)) { BitClear(pCEP->EventFlags, CME_RCVD_REP); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FCM_CONNECT_REPLY; } else { Status = FERROR; } break; case CMS_ESTABLISHED: if (BitTest(pCEP->EventFlags, CME_RCVD_RTU)) { // received the RTU for our REP ASSERT(pCEP->Mode == CM_MODE_PASSIVE); BitClear(pCEP->EventFlags, CME_RCVD_RTU); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FCM_CONNECT_ESTABLISHED; } else if (BitTest(pCEP->EventFlags, CME_RCVD_APR)) { ASSERT(pCEP->Mode == CM_MODE_ACTIVE); BitClear(pCEP->EventFlags, CME_RCVD_APR); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); if (pConnInfo->Info.AltPathReply.APStatus == APS_PATH_LOADED) { // we populate CEP with additional information to help // application do ModifyQP CopyCepPathToPathInfo(&pConnInfo->Info.AltPathReply.u.Alternate.PathInfo, &pConnInfo->Info.AltPathReply.u.Alternate.AckTimeout, &pCEP->AlternatePath); pConnInfo->Info.AltPathReply.u.Alternate.PathInfo.Path.Mtu = pCEP->Mtu; pConnInfo->Info.AltPathReply.u.Alternate.PathInfo.Path.P_Key = pCEP->PartitionKey; pConnInfo->Status = FCM_ALTPATH_REPLY; } else { pConnInfo->Status = FCM_ALTPATH_REJECT; } } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_LAP)) { ASSERT(pCEP->Mode == CM_MODE_ACTIVE); BitClear(pCEP->EventFlags, CME_TIMEOUT_LAP); pConnInfo->Status = FCM_ALTPATH_TIMEOUT; } else { Status = FERROR; } break; case CMS_LAP_RCVD: case CMS_MRA_LAP_SENT: if (BitTest(pCEP->EventFlags, CME_RCVD_RTU)) { // RTU and LAP events are both queued // by the time the signal is delivered to us ASSERT(pCEP->Mode == CM_MODE_PASSIVE); BitClear(pCEP->EventFlags, CME_RCVD_RTU); // We have not yet delivered the RTU, however the pCEP->pDgrmElement // is now the LAP, so we deliver an RTU with no private data // (much the way a QP Async Event for Communication Established // would simulate an RTU) // this callback will be followed immediately by the // LAP request MemoryClear(&pConnInfo->Info.Rtu, sizeof(pConnInfo->Info.Rtu)); pConnInfo->Status = FCM_CONNECT_ESTABLISHED; bKeepQueued = TRUE; /* leave CME_RCVD_LAP queued */ } else if (BitTest(pCEP->EventFlags, CME_RCVD_LAP)) { // received LAP BitClear(pCEP->EventFlags, CME_RCVD_LAP); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); // additional fields in AltPathInfo pConnInfo->Info.AltPathRequest.AlternatePathInfo.Path.P_Key= pCEP->PartitionKey; pConnInfo->Info.AltPathRequest.AlternatePathInfo.Path.Mtu = pCEP->Mtu; pConnInfo->Status = FCM_ALTPATH_REQUEST; } else { Status = FERROR; } break; case CMS_DREQ_RCVD: if (BitTest(pCEP->EventFlags,CME_RCVD_RTU)) { // RTU and DREQ events are both queued // by the time the signal is delivered to us ASSERT(pCEP->Mode == CM_MODE_PASSIVE); BitClear(pCEP->EventFlags,CME_RCVD_RTU); // We have not yet delivered the RTU, however the pCEP->pDgrmElement // is now the DREQ, so we deliver an RTU with no private data // (much the way a QP Async Event for Communication Established // would simulate an RTU) // this callback will be followed immediately by the // disconnect request MemoryClear(&pConnInfo->Info.Rtu, sizeof(pConnInfo->Info.Rtu)); pConnInfo->Status = FCM_CONNECT_ESTABLISHED; bKeepQueued = TRUE; /* leave CME_RCVD_DREQ queued */ } else if (BitTest(pCEP->EventFlags, CME_RCVD_DREQ)) { // Received disconnect request BitClear(pCEP->EventFlags, CME_RCVD_DREQ); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FCM_DISCONNECT_REQUEST; } else { Status = FERROR; } break; case CMS_SIM_DREQ_RCVD: if (BitTest(pCEP->EventFlags,CME_RCVD_RTU)) { // RTU and SIM DREQ events are both queued // by the time the signal is delivered to us // // Note this is only applicable to passive side, in present code // CMS_SIM_DREQ_RCVD could only happen on active side. However // this is implemented for completeness and in case this state // is later utilized for a passive side simulated DREQ as well ASSERT(pCEP->Mode == CM_MODE_PASSIVE); BitClear(pCEP->EventFlags,CME_RCVD_RTU); // We have not yet delivered the RTU, however the pCEP->pDgrmElement // is now the DREQ, so we deliver an RTU with no private data // (much the way a QP Async Event for Communication Established // would simulate an RTU) // this callback will be followed immediately by the // disconnect request MemoryClear(&pConnInfo->Info.Rtu, sizeof(pConnInfo->Info.Rtu)); pConnInfo->Status = FCM_CONNECT_ESTABLISHED; bKeepQueued = TRUE; /* leave CME_SIM_RCVD_DREQ queued */ } else if (BitTest(pCEP->EventFlags, CME_SIM_RCVD_DREQ)) { // We received a REJ but are simulating a DREQ MemoryClear(&pConnInfo->Info.DisconnectRequest, sizeof(pConnInfo->Info.DisconnectRequest)); BitClear(pCEP->EventFlags, CME_SIM_RCVD_DREQ); pConnInfo->Status = FCM_DISCONNECT_REQUEST; } else { Status = FERROR; } break; case CMS_TIMEWAIT: if (BitTest(pCEP->EventFlags, CME_RCVD_REJ)) { // a REJ was received (must be RC_STALE_CONN) BitClear(pCEP->EventFlags, CME_RCVD_REJ); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FCM_CONNECT_REJECT; } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_REP)) { // timeout waiting for RTU ASSERT(pCEP->Mode == CM_MODE_PASSIVE); if (pCEP->bTimewaitCallback) { // wait until timewait finishes to provide FCM_CONNECT_TIMEOUT Status = FERROR; bKeepQueued = TRUE; /* keep CME_TIMEOUT_REP set till Idle */ } else { BitClear(pCEP->EventFlags, CME_TIMEOUT_REP); pConnInfo->Status = FCM_CONNECT_TIMEOUT; } } else if (BitTest(pCEP->EventFlags, CME_RCVD_DREP)) { // Received disconnect reply BitClear(pCEP->EventFlags, CME_RCVD_DREP); CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FCM_DISCONNECT_REPLY; } else if (BitTest(pCEP->EventFlags, CME_RCVD_DREQ)) { // We received a DREQ but the user already sent a DREP // odd sequence, user must have done CmDisconnect with a reply // and it raced with a receipt of a DREQ BitClear(pCEP->EventFlags, CME_RCVD_DREQ); if (pCEP->bTimewaitCallback) { // wait until timewait finishes to provide FCM_DISCONNECTED Status = FERROR; } else { pConnInfo->Status = FCM_DISCONNECTED; } } else if (BitTest(pCEP->EventFlags, CME_SIM_RCVD_DREQ)) { // We received a REJ but the user was already issuing a Disconnect // odd sequence, user must have done CmDisconnect with a reply // and it raced with a receipt of a REJ BitClear(pCEP->EventFlags, CME_SIM_RCVD_DREQ); if (pCEP->bTimewaitCallback) { // wait until timewait finishes to provide FCM_DISCONNECTED Status = FERROR; } else { pConnInfo->Status = FCM_DISCONNECTED; } } else if (BitTest(pCEP->EventFlags, CME_TIMEOUT_DREQ)) { // We timed out on the DREQ msg i.e. no DREP msg received. BitClear(pCEP->EventFlags, CME_TIMEOUT_DREQ); if (pCEP->bTimewaitCallback) { // wait until timewait finishes to provide FCM_DISCONNECTED Status = FERROR; } else { // FCM_DISCONNECTED is same value as FCM_DISCONNECT_TIMEOUT pConnInfo->Status = FCM_DISCONNECTED; } } else { Status = FERROR; } break; case CMS_REGISTERED: ASSERT(pCEP->Mode == CM_MODE_PASSIVE); if (BitTest(pCEP->EventFlags, CME_RCVD_SIDR_REQ)) { // Received SIDR request BitClear(pCEP->EventFlags, CME_RCVD_SIDR_REQ); if (DListIsEmpty(&pCEP->PendingList)) { _DBG_WARNING((" Pending list is empty!\n", _DBG_PTR(pCEP))); Status = FERROR; break; } pListEntry = DListGetNext(&pCEP->PendingList); pCEPEntry = PARENT_STRUCT(pListEntry, CM_CEP_OBJECT, PendingListEntry); // Make sure this is not the same one we returned previously /* if (BitTest(pCEPEntry->EventFlags, USRE_WAIT)) { _DBG_WARNING((" Object is in signaled state but user is still processing the current event!\n", _DBG_PTR(pCEP))); // The user is still processing the current one. To maintain FIFO order, // we cannot notify the user of the new request until the current one is // accepted, rejected or timed out. //bWaitAgain = TRUE; Status = FERROR; break; } // Mark it so that we dont return this again in another wait() // We clear this in SIDRResponse() BitSet(pCEPEntry->EventFlags, USRE_WAIT); */ CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEPEntry->pDgrmElement)); // Copy additional info to the SidrRequest. pConnInfo->Info.SidrRequest.PathInfo.Path.DLID = pCEPEntry->PrimaryPath.RemoteLID; pConnInfo->Info.SidrRequest.SID = pCEPEntry->SID; pConnInfo->Info.SidrRequest.PortGuid = pCEPEntry->pDgrmElement->PortGuid; pConnInfo->Info.SidrRequest.PathInfo.Path.NumbPath = 1; // only 1 path pConnInfo->Info.SidrRequest.PathInfo.Path.SLID = pCEPEntry->pDgrmElement->PathBits; // TBD BUGBUG | local port BaseLID; // IB does not have Static rate in UD LRH // so we can't be sure what rate of remote port is // we use a constant value for GSI QPs, so this is not accurate pConnInfo->Info.SidrRequest.PathInfo.Path.Rate = pCEPEntry->pDgrmElement->StaticRate; pConnInfo->Info.SidrRequest.PathInfo.Path.u2.s.SL = pCEPEntry->pDgrmElement->ServiceLevel; // TBD CopyMadToConnInfo has set Path.Pkey based on SIDR_REQ // alternately could lookup pDgrmElement->PkeyIndex if (pCEPEntry->pDgrmElement->GlobalRoute) { pConnInfo->Info.SidrRequest.PathInfo.bSubnetLocal = 0; memcpy(&pConnInfo->Info.SidrRequest.PathInfo.Path.DGID.Raw, pCEPEntry->pDgrmElement->GRHInfo.DestGID.Raw, sizeof(IB_GID)); // BUGBUG! //memcpy(&pConnInfo->Info.SidrRequest.PathInfo.Path.SGID.Raw, pConnInfo->Info.SidrRequest.PathInfo.Path.SGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX; pConnInfo->Info.SidrRequest.PathInfo.Path.SGID.Type.Global.InterfaceID = pCEPEntry->pDgrmElement->PortGuid; pConnInfo->Info.SidrRequest.PathInfo.Path.u1.s.FlowLabel = pCEPEntry->pDgrmElement->GRHInfo.FlowLabel; pConnInfo->Info.SidrRequest.PathInfo.Path.u1.s.HopLimit = pCEPEntry->pDgrmElement->GRHInfo.HopLimit; pConnInfo->Info.SidrRequest.PathInfo.Path.TClass = pCEPEntry->pDgrmElement->GRHInfo.TrafficClass; } else { pConnInfo->Info.SidrRequest.PathInfo.bSubnetLocal = 1; } pConnInfo->Status = FSIDR_REQUEST; } else { Status = FERROR; } break; #if 0 case CMS_SIDR_RESP_RCVD: // Received SIDR response ASSERT(pCEP->Mode == CM_MODE_ACTIVE); if (BitTest(pCEP->EventFlags, CME_RCVD_SIDR_RESP)) { BitClear(pCEP->EventFlags, CME_RCVD_SIDR_RESP); // Copy the SIDR response to user buffer CopyMadToConnInfo(&pConnInfo->Info, (CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement)); pConnInfo->Status = FSIDR_RESPONSE; } else { Status = FERROR; } break; #endif default: Status = FERROR; break; } // switch() // Clear the event flags if (! bKeepQueued) pCEP->EventFlags = 0; if (pCEP->State == CMS_IDLE && pCEP->EventFlags == 0) pCEP->Mode = CM_MODE_NONE; _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // GetConnInfo() ////////////////////////////////////////////////////////////////////////// // WaitA // // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS or FTIMEOUT. // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS WaitA( IN CM_CEP_OBJECT* pCEP, OUT CM_CONN_INFO* pConnInfo, uint32 Timeout_us // TODO: Put a max limit on this in addition to infinite ) { FSTATUS Status=FSUCCESS; uint32 WaitTimeUs=Timeout_us; uint64 TimeoutUs=Timeout_us; uint64 StartTimeUs=GetTimeStamp(); uint64 CurrTimeUs=0; uint64 TimeElapsedUs=0; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, WaitA); waitagain: // Update the timeout if ((int32)WaitTimeUs != EVENT_NO_TIMEOUT) { if (CurrTimeUs >= StartTimeUs) { TimeElapsedUs = (CurrTimeUs - StartTimeUs); } else // wrap-around { TimeElapsedUs = (CurrTimeUs + ((uint64)(-1) - StartTimeUs)); } if (TimeoutUs > TimeElapsedUs) { WaitTimeUs = (uint32) (TimeoutUs - TimeElapsedUs); } else { WaitTimeUs = 0; } } //_DBG_INFO((" Waiting on obj... \n", // _DBG_PTR(pCEP), WaitTimeUs/1000, pCEP->LocalCommID, // _DBG_PTR(CmGetStateString(pCEP->State)))); Status = EventWaitOn(pCEP->pEventObj, WaitTimeUs); //_DBG_INFO((" Waiting on obj... complete <%s %s>\n", // _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)), // _DBG_PTR(CmGetStateString(pCEP->State)))); if (Status == FTIMEOUT) { if (Timeout_us == 0) { //_DBG_INFO((" Obj state is nonsignaled.\n", // _DBG_PTR(pCEP))); } else { _DBG_WARNING((" EventWaitOn() returns FTIMEOUT!\n", _DBG_PTR(pCEP))); } _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return FTIMEOUT; } ASSERT(Status == FSUCCESS); _DBG_INFO((" Obj state is signaled <%s>.\n", _DBG_PTR(pCEP), _DBG_PTR(CmGetStateString(pCEP->State)))); // Get mutual ex to the object SpinLockAcquire(&gCM->ListLock); Status = GetConnInfo(pCEP, pConnInfo); if (Status != FSUCCESS) { // Handle the case when we get signal but we already process the event // with the previous signal // eg evt1 --> signal; evt2 --> signal // ->wait() returned _DBG_WARNING((" GetConnInfo() returns %s!\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); ASSERT(pConnInfo->Status == 0); SpinLockRelease(&gCM->ListLock); goto waitagain; } SpinLockRelease(&gCM->ListLock); _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // WaitA() ////////////////////////////////////////////////////////////////////////// // AcceptA // // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS - The function has completed successfully. For server, this indicates the initialization. // >FCM_CONNECT_ESTABLISHED // >FCM_CONNECT_REJECT // >FCM_CONNECT_TIMEOUT // >FCM_CONNECT_CANCEL // // FINVALID_STATE - The endpoint is not in the valid state for this call // FERROR - Unable to send the reply ack packet // FTIMEOUT - The timeout interval expires // // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS AcceptA( IN CM_CEP_OBJECT* pCEP, IN CM_CONN_INFO* pSendConnInfo // Send RTU ) { FSTATUS Status=FSUCCESS; uint64 elapsed_us=0; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, AcceptA); SpinLockAcquire(&gCM->ListLock); switch (pCEP->State) { case CMS_IDLE: if (BitTest(pCEP->EventFlags, USRE_CANCEL)) { Status = FCM_CONNECT_CANCEL; } else { Status = FERROR; } break; case CMS_REP_RCVD: case CMS_MRA_REP_SENT: // // Active accept - // Accepting the connect reply on a connect or peer endpoint. // Send a reply ack msg. // // Update the turnaround time elapsed_us = CmGetElapsedTime(pCEP); gCM->turnaround_time_us = UpdateTurnaroundTime(gCM->turnaround_time_us, elapsed_us); _DBG_INFO(("Update turnaround time .\n", gCM->turnaround_time_us)); // Prepare RTU FormatRTU((CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement), pSendConnInfo?pSendConnInfo->Info.Rtu.PrivateData:NULL, pSendConnInfo?CMM_RTU_USER_LEN:0, pCEP->TransactionID, pCEP->LocalCommID, pCEP->RemoteCommID); _DBG_INFO((" RTU's dgrm address field \n", _DBG_PTR(pCEP), pCEP->pDgrmElement->PortGuid, pCEP->pDgrmElement->RemoteLID, pCEP->pDgrmElement->ServiceLevel, pCEP->pDgrmElement->PathBits, pCEP->pDgrmElement->StaticRate)); // Send out the RTU Status = CmDgrmSend(pCEP->pDgrmElement); if (Status != FSUCCESS) { // fall through and let timer retry the send later _DBG_WARN((" DgrmSend() failed for RTU!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); Status = FSUCCESS; } else { AtomicIncrementVoid(&gCM->Sent.Rtu); } // Update the status Status = FCM_CONNECT_ESTABLISHED; if (pSendConnInfo) pSendConnInfo->Status = Status; CepSetState(pCEP, CMS_ESTABLISHED); _DBG_INFO((" *** Connection established *** .\n", _DBG_PTR(pCEP), pCEP->LocalCommID, pCEP->RemoteCommID, pCEP->PrimaryPath.LocalLID, pCEP->PrimaryPath.RemoteLID)); break; default: Status = FINVALID_STATE; break; } // switch() SpinLockRelease(&gCM->ListLock); _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // AcceptA() ////////////////////////////////////////////////////////////////////////// // RejectA // // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS. // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS RejectA( IN CM_CEP_OBJECT* pCEP, IN const CM_REJECT_INFO* pConnectReject ) { FSTATUS Status=FSUCCESS; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, RejectA); switch (pCEP->State) { case CMS_IDLE: if (BitTest(pCEP->EventFlags, USRE_CANCEL)) { Status = FCM_CONNECT_CANCEL; } else { Status = FERROR; } break; case CMS_REP_RCVD: case CMS_MRA_REP_SENT: // // Reject the connect reply from the server // // Format the REJ msg FormatREJ((CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement), CM_REJECT_REPLY, (uint16)pConnectReject->Reason, pConnectReject->RejectInfo, pConnectReject->RejectInfoLen, pConnectReject->PrivateData, CMM_REJ_USER_LEN, pCEP->TransactionID, pCEP->LocalCommID, pCEP->RemoteCommID); // Send out the REJ. We do not need to handle failures since // the other end will eventually time-out if it does not received // either RTU or REJ Status = CmDgrmSend(pCEP->pDgrmElement); if (Status != FSUCCESS) { // fall through and let timer retry the send later _DBG_WARN((" DgrmSend() failed for REJ!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); Status = FSUCCESS; } else { AtomicIncrementVoid(&gCM->Sent.RejRep); } _DBG_INFO((" REJ sent, rejecting REP .\n", _DBG_PTR(pCEP), pCEP->LocalCommID, pCEP->RemoteCommID, pCEP->PrimaryPath.LocalLID, pCEP->PrimaryPath.RemoteLID, pConnectReject->Reason)); CepToIdle(pCEP); break; //case CMS_REQ_SENT: //case CMS_REP_WAIT: // TODO: Client canceling the pending connection attempt //break; default: Status = FINVALID_STATE; break; } // switch() _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // RejectA() ////////////////////////////////////////////////////////////////////////// // CancelA // CmCancel or CEPDestroy on an active CEP // CA removal while connecting // // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS. // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS CancelA( IN CM_CEP_OBJECT* pCEP ) { FSTATUS Status=FPENDING; switch (pCEP->State) { case CMS_REQ_SENT: case CMS_REP_WAIT: // // Cancel the connect request // // Cancel the REQ timer CmTimerStop(pCEP, REQ_TIMER); if (pCEP->bPeer && pCEP->RemoteEndPoint.CaGUID == 0) { CM_MapRemoveEntry(&gCM->ListenMap, &pCEP->MapEntry, LISTEN_LIST, pCEP); } CepToIdle(pCEP); // Set the cancel event SetNotification(pCEP, USRE_CANCEL); break; case CMS_REP_RCVD: case CMS_MRA_REP_SENT: // No need to check the return status. We are guaranteed to succeed // since we are calling RejectA() within the lock (i.e. no state changed) { // keep const and static to save kernel stack space static CM_REJECT_INFO ConnectReject = { Reason:RC_USER_REJ }; RejectA(pCEP, &ConnectReject); } // CEP is now Idle // Set the cancel event SetNotification(pCEP, USRE_CANCEL); break; case CMS_SIDR_REQ_SENT: // Cancel the SIDR_REQ timer CmTimerStop(pCEP, SIDR_REQ_TIMER); // Move from QUERY to INIT list CM_MapRemoveEntry(&gCM->QueryMap, &pCEP->MapEntry, QUERY_LIST, pCEP); // ClearEndPoint in CepToIdle is benign CepToIdle(pCEP); // Set the cancel event SetNotification(pCEP, USRE_CANCEL); break; default: Status = FINVALID_STATE; break; } // switch() return Status; } // CancelA() // build QP Attributes to move QP to Init, RTR and RTS // and prepare a Rtu FSTATUS ProcessReplyA( IN CM_CEP_OBJECT* pCEP, OUT CM_PROCESS_REPLY_INFO* Info ) { FSTATUS Status=FSUCCESS; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, ProcessReplyA); switch (pCEP->State) { case CMS_IDLE: if (BitTest(pCEP->EventFlags, USRE_CANCEL)) { Status = FCM_CONNECT_CANCEL; } else { Status = FERROR; } break; case CMS_REP_RCVD: case CMS_MRA_REP_SENT: MemoryClear(Info, sizeof(*Info)); // just to be safe // set up attributes to move QP to Init // we used to return Init, but spec says this should be done before // send REQ so we removed it to reduce size of return structure //GetInitAttrFromCep(pCEP, &Info->QpAttrInit); // set up attributes to move QP to RTR GetRtrAttrFromCep(pCEP, pCEP->bFailoverSupported, &Info->QpAttrRtr); // set up attributes to move QP to RTS GetRtsAttrFromCep(pCEP, &Info->QpAttrRts); break; default: Status = FINVALID_STATE; break; } _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } ////////////////////////////////////////////////////////////////////////// // AltPathRequestA // // Request a new alternate path // Only allowed for Client/Active side of connection // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FPENDING - LAP queued to be sent. // FINVALID_STATE - The endpoint is not in the valid state for this call // FERROR - Unable to send the lap packet // FINVALID_PARAMETER - invalid pLapInfo // // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS AltPathRequestA( IN CM_CEP_OBJECT* pCEP, IN const CM_ALTPATH_INFO* pLapInfo // Send LAP ) { FSTATUS Status; IB_PORT_ATTRIBUTES *pPortAttr=NULL; uint8 LocalCaAckDelay; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, AltPathRequestA); ASSERT(pCEP->Mode == CM_MODE_ACTIVE); if (pCEP->State != CMS_ESTABLISHED) { Status = FINVALID_STATE; goto done; } if (! pCEP->bFailoverSupported) { // Remote End REP did not accept Failover Status = FINVALID_STATE; goto done; } #if ! defined(CM_APM_RELOAD) // do not allow alternate path to be reloaded if (pCEP->AlternatePath.LocalLID != 0) { Status = FINVALID_STATE; goto done; } #endif if (0 == CompareCepPathToPathInfo(&pCEP->PrimaryPath, &pLapInfo->AlternatePathInfo)) { Status = FINVALID_PARAMETER; goto done; } Status = iba_find_ca_gid(&pLapInfo->AlternatePathInfo.Path.SGID, pCEP->LocalEndPoint.CaGUID, &pCEP->TempAlternateLocalPortGuid, &pCEP->TempAlternateLocalGidIndex, &pPortAttr, &LocalCaAckDelay); if (Status != FSUCCESS) { Status = FINVALID_PARAMETER; goto done; } if (! LookupPKey(pPortAttr, pCEP->PartitionKey, FALSE, &pCEP->TempAlternatePkeyIndex)) { Status = FINVALID_PARAMETER; goto done; } pCEP->TempAlternateLocalPathBits = LidToPathBits(pLapInfo->AlternatePathInfo.Path.SLID, pPortAttr->Address.LMC); pCEP->TempAlternateLocalAckTimeout = ComputeAckTimeout( pLapInfo->AlternatePathInfo.Path.PktLifeTime, pCEP->TargetAckDelay); MemoryDeallocate(pPortAttr); // Prepare LAP // RTU could still be on send Q, get a fresh Dgrm if needed Status = PrepareCepDgrm(pCEP); if (Status != FSUCCESS) { _DBG_WARNING((" Unable to send LAP: Out of CM Dgrms!!!\n", _DBG_PTR(pCEP))); goto done; } pCEP->bLapSent = 1; // pCEP->pDgrmElement is no longer an RTU FormatLAP((CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement), pCEP->RemoteEndPoint.QPN, // TBD EECN pCEP->RemoteCMTimeout, pLapInfo, (pLapInfo->AlternateAckTimeout ?pLapInfo->AlternateAckTimeout : ComputeAckTimeout( pLapInfo->AlternatePathInfo.Path.PktLifeTime, LocalCaAckDelay)), pCEP->TransactionID, pCEP->LocalCommID, pCEP->RemoteCommID); _DBG_INFO((" LAP's dgrm address field \n", _DBG_PTR(pCEP), pCEP->pDgrmElement->PortGuid, pCEP->pDgrmElement->RemoteLID, pCEP->pDgrmElement->ServiceLevel, pCEP->pDgrmElement->PathBits, pCEP->pDgrmElement->StaticRate)); // Send out the LAP Status = CmDgrmSend(pCEP->pDgrmElement); if (Status != FSUCCESS) { // fall through and let timer retry the send later _DBG_WARN((" DgrmSend() failed for LAP!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); Status = FSUCCESS; } else { AtomicIncrementVoid(&gCM->Sent.Lap); } Status = FPENDING; CepSetState(pCEP, CMS_LAP_SENT); pCEP->RetryCount = 0; CmTimerStart(pCEP, TimeoutMultToTimeInMs(pCEP->LocalCMTimeout), LAP_TIMER); done: _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // AltPathRequestA() // build QP Attributes to load new alternate path into QP FSTATUS ProcessAltPathReplyA( IN CM_CEP_OBJECT* pCEP, IN const CM_ALTPATH_REPLY_INFO* AltPathReply, OUT IB_QP_ATTRIBUTES_MODIFY* QpAttrRts ) { FSTATUS Status=FSUCCESS; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, ProcessAltPathReplyA); ASSERT(pCEP->Mode == CM_MODE_ACTIVE); if (pCEP->State != CMS_ESTABLISHED) { Status = FINVALID_STATE; goto done; } // possible risk - we trust pCEP->AlternatePath, however if the // application issues a Lap and gets a reply before this function // completes, the pCEP->AlternatePath and pCEP->TempAlternate fields // could change on us. If this risk is real we would need to work // strictly from the AltPathReply MemoryClear(QpAttrRts, sizeof(*QpAttrRts)); // just to be safe QpAttrRts->RequestState = QPStateReadyToSend; QpAttrRts->APMState = APMStateRearm; GetAVFromCepPath(&pCEP->AlternatePath, &QpAttrRts->AltDestAV); QpAttrRts->AltPortGUID = pCEP->AlternatePath.LocalPortGuid; QpAttrRts->AltPkeyIndex = pCEP->AlternatePath.PkeyIndex; QpAttrRts->Attrs = IB_QP_ATTR_APMSTATE | IB_QP_ATTR_ALTDESTAV|IB_QP_ATTR_ALTPORTGUID | IB_QP_ATTR_ALTPKEYINDEX; if (pCEP->Type != CM_UC_TYPE) { QpAttrRts->AltLocalAckTimeout = pCEP->AlternatePath.LocalAckTimeout; QpAttrRts->Attrs |= IB_QP_ATTR_ALTLOCALACKTIMEOUT; } // caller must iba_modify_qp(...) done: _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } ////////////////////////////////////////////////////////////////////////// // MigratedA // // Inform CM that alternate path has been migrated to by client or server // TBD - hook into Async Event callbacks // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS - CEP adjusted to reflect migration // FINVALID_STATE - The endpoint is not in the valid state for this call // // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS MigratedA( IN CM_CEP_OBJECT* pCEP ) { FSTATUS Status = FSUCCESS; uint64 old_acktimeout_us; uint64 new_acktimeout_us; uint64 timeout_us; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, MigratedA); if (pCEP->State != CMS_ESTABLISHED) { // if CM_APM_RELOAD could be a race of migration with LAP Status = FINVALID_STATE; goto done; } if (pCEP->AlternatePath.LocalLID == 0) { // No alternate path Status = FINVALID_STATE; goto done; } // New path will have a different PktLifetime, so adjust our CM timers // accordingly, AckTimeout has a 2*PktLifetime component old_acktimeout_us = TimeoutMultToTimeInUsec(pCEP->PrimaryPath.AckTimeout); new_acktimeout_us = TimeoutMultToTimeInUsec(pCEP->AlternatePath.AckTimeout); // LocalCMTimeout has a 2*PktLifetime component timeout_us = TimeoutMultToTimeInUsec(pCEP->LocalCMTimeout) + (new_acktimeout_us - old_acktimeout_us); pCEP->LocalCMTimeout = TimeoutTimeToMult(timeout_us); // RemoteCMTimeout has no PktLifetime component pCEP->Timewait = pCEP->AlternatePath.LocalAckTimeout; // Ack timeout is calculated as round-trip // This computation will be rounded up by CA Ack Delay/2 if (pCEP->AlternatePath.AckTimeout >= 1) { // -1 for a log2 value is the same as /2 pCEP->PktLifeTime = (pCEP->AlternatePath.AckTimeout -1); } else { pCEP->PktLifeTime = 0; } pCEP->PrimaryPath = pCEP->AlternatePath; MemoryClear(&pCEP->AlternatePath, sizeof(pCEP->AlternatePath)); // adjust remote path for future CM dgrms to use CmDgrmAddrSet(pCEP->pDgrmElement, pCEP->PrimaryPath.LocalPortGuid, pCEP->PrimaryPath.RemoteLID, pCEP->PrimaryPath.SL, pCEP->PrimaryPath.LocalPathBits, pCEP->PrimaryPath.StaticRate, (! pCEP->PrimaryPath.bSubnetLocal), pCEP->PrimaryPath.RemoteGID, pCEP->PrimaryPath.FlowLabel, pCEP->PrimaryPath.LocalGidIndex, pCEP->PrimaryPath.HopLimit, (uint8)pCEP->PrimaryPath.TClass, pCEP->PrimaryPath.PkeyIndex); done: _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // MigratedA() FSTATUS MigratedReloadA( IN CM_CEP_OBJECT* pCEP, OUT CM_PATH_INFO* NewPrimaryPath OPTIONAL, OUT CM_ALTPATH_INFO* AltPathRequest OPTIONAL ) { FSTATUS Status; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, MigratedReloadA); ASSERT(pCEP->Mode == CM_MODE_ACTIVE); if (NewPrimaryPath) { CopyCepPathToPathInfo(&NewPrimaryPath->PathInfo, &NewPrimaryPath->AckTimeout, &pCEP->AlternatePath); NewPrimaryPath->PathInfo.Path.P_Key = pCEP->PartitionKey; NewPrimaryPath->PathInfo.Path.Mtu = pCEP->Mtu; //NewPrimaryPath->PathInfo.Path.PktLifeTime may be inaccurate } if (AltPathRequest) { CopyCepPathToPathInfo(&AltPathRequest->AlternatePathInfo, &AltPathRequest->AlternateAckTimeout, &pCEP->PrimaryPath); AltPathRequest->AlternatePathInfo.Path.P_Key = pCEP->PartitionKey; AltPathRequest->AlternatePathInfo.Path.Mtu = pCEP->Mtu; //AltPathRequest->AlternatePathInfo.Path.PktLifeTime may be inaccurate } Status = MigratedA(pCEP); _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // MigratedReloadA() ////////////////////////////////////////////////////////////////////////// // ShutdownA // CA has been removed while disconnecting // used for both Active and Passive CEPs // // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS. // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS ShutdownA( IN CM_CEP_OBJECT* pCEP ) { FSTATUS Status=FPENDING;//FSUCCESS; switch (pCEP->State) { case CMS_DREQ_SENT: // Cancel the DREQ timer CmTimerStop(pCEP, DREQ_TIMER); /* FALLSTHROUGH */ case CMS_DREQ_RCVD: case CMS_SIM_DREQ_RCVD: // Skip the CMS_TIMEWAIT, CA is gone CepToIdle(pCEP); // Set the shutdown event SetNotification(pCEP, SYSE_DISCONNECT); break; case CMS_TIMEWAIT: // Cancel the TIMEWAIT timer CmTimerStop(pCEP, TIMEWAIT_TIMER); // Skip the CMS_TIMEWAIT, CA is gone CepToIdle(pCEP); // Is this obj marked for destruction ? if (BitTest(pCEP->EventFlags, USRE_DESTROY)) { // handle the case when the user callback returns and this routine // gets run because the timewait timer expired. // We let the CmSystemCallback() proceed with the destruction. if (AtomicRead(&pCEP->CallbackRefCnt) == 0) { DestroyCEP(pCEP); } } else { // make sure user gets an event to indicate end of timewait period if (pCEP->bTimewaitCallback) { SetNotification(pCEP, SYSE_DISCONNECT); } } Status = FSUCCESS; break; default: Status = FINVALID_STATE; break; } // switch() return Status; } // ShutdownA() ////////////////////////////////////////////////////////////////////////// // DisconnectA // User has called CmDisconnect // used for both Active and Passive CEPs // // INPUTS: // // // // OUTPUTS: // // None. // // RETURNS: // // FSUCCESS. // // IRQL: // // This routine is called at IRQL_PASSIVE_LEVEL. // FSTATUS DisconnectA( IN CM_CEP_OBJECT* pCEP, IN const CM_DREQUEST_INFO* pDRequest, // Send DREQ IN const CM_DREPLY_INFO* pDReply // Send DREP ) { FSTATUS Status=FSUCCESS; // FPENDING ?? uint32 dreq_timeout_ms=0; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, DisconnectA); switch (pCEP->State) { case CMS_ESTABLISHED: case CMS_LAP_SENT: case CMS_MRA_LAP_RCVD: case CMS_LAP_RCVD: case CMS_MRA_LAP_SENT: if (pDRequest) { //pDRequest->Reserved3 = pCEP->LocalEndPoint.QPN; // an RTU or LAP/APR could be on the SendQ // RTU --> // DREQ --> Status = PrepareCepDgrm(pCEP); if (Status != FSUCCESS) { _DBG_WARNING((" Unable to send DREQ: Out of CM Dgrms!!!\n", _DBG_PTR(pCEP))); break; } // Format the DREQ msg FormatDREQ((CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement), pCEP->RemoteEndPoint.QPN, pDRequest->PrivateData, CMM_DREQ_USER_LEN, pCEP->TransactionID, pCEP->LocalCommID, pCEP->RemoteCommID); // Send out the DREQ Status = CmDgrmSend(pCEP->pDgrmElement); if (Status != FSUCCESS) { // fall through and let timer retry the send later _DBG_WARN((" DgrmSend() failed for DREQ!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); Status = FSUCCESS; } else { AtomicIncrementVoid(&gCM->Sent.Dreq); } //Status = FPENDING; if (pCEP->State == CMS_LAP_SENT || pCEP->State == CMS_MRA_LAP_RCVD) { CmTimerStop(pCEP, REQ_TIMER); } CepToDisconnecting(pCEP, CMS_DREQ_SENT); pCEP->RetryCount = 0; dreq_timeout_ms = TimeoutMultToTimeInMs(pCEP->LocalCMTimeout); CmTimerStart(pCEP, dreq_timeout_ms, DREQ_TIMER); } else { _DBG_ERROR((" Invalid state!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(CmGetStateString(pCEP->State)))); Status = FINVALID_STATE; } break; case CMS_DREQ_RCVD: if (pDReply) { // uncomment out these 3 lines to have DREQ processing time // factored into average turnaround time //uint64 elapsed_us; //elapsed_us = CmGetElapsedTime(pCEP); //gCM->turnaround_time_us = UpdateTurnaroundTime(gCM->turnaround_time_us, elapsed_us); // just in case a LAP/APR or DREQ is in Send Q Status = PrepareCepDgrm(pCEP); if (Status != FSUCCESS) { _DBG_WARNING((" Unable to send DREP: Out of CM Dgrms!!!\n", _DBG_PTR(pCEP))); break; } // Format the DREP msg FormatDREP((CM_MAD*)GsiDgrmGetSendMad(pCEP->pDgrmElement), pDReply->PrivateData, CMM_DREP_USER_LEN, pCEP->TransactionID, pCEP->LocalCommID, pCEP->RemoteCommID); // Send out the DREP Status = CmDgrmSend(pCEP->pDgrmElement); if (Status != FSUCCESS) { // fall through when remote end retries, we will resend DREP _DBG_WARN((" DgrmSend() failed for DREP!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(FSTATUS_MSG(Status)))); Status = FSUCCESS; } else { AtomicIncrementVoid(&gCM->Sent.Drep); } CepToTimewait(pCEP); } else { _DBG_INFO((" Already disconnecting!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(CmGetStateString(pCEP->State)))); Status = FCM_ALREADY_DISCONNECTING; } break; case CMS_SIM_DREQ_RCVD: // to handle the case of a REJ coming in after we sent RTU (possibly // because server never got our RTU), we simulated an inbound DREQ and // sent up a DISCONNECT_REQUEST callback. // As a result the application will call CmDisconnect, // but we don't want to put a DREP on the wire here. // Instead we simply move to the timewait state. // This permits the incoming REJ to simulate a DREQ API sequence. if (pDReply) { // uncomment out these 3 lines to have DREQ processing time // factored into average turnaround time //uint64 elapsed_us; //elapsed_us = CmGetElapsedTime(pCEP); //gCM->turnaround_time_us = UpdateTurnaroundTime(gCM->turnaround_time_us, elapsed_us); CepToTimewait(pCEP); Status = FSUCCESS; } else { _DBG_INFO((" Already disconnecting!!! <%s>\n", _DBG_PTR(pCEP), _DBG_PTR(CmGetStateString(pCEP->State)))); Status = FCM_ALREADY_DISCONNECTING; } break; default: _DBG_ERROR((" Invalid state!!! .\n", _DBG_PTR(pCEP), pCEP->LocalCommID, _DBG_PTR(CmGetStateString(pCEP->State)))); Status = FINVALID_STATE; break; } // switch() _DBG_LEAVE_LVL(_DBG_LVL_FUNC_TRACE); return Status; } // DisconnectA() // EOF