|
Packit |
961e70 |
/*
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
This file is provided under a dual BSD/GPLv2 license. When using or
|
|
Packit |
961e70 |
redistributing this file, you may do so under either license.
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
GPL LICENSE SUMMARY
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
Copyright(c) 2015 Intel Corporation.
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
This program is free software; you can redistribute it and/or modify
|
|
Packit |
961e70 |
it under the terms of version 2 of the GNU General Public License as
|
|
Packit |
961e70 |
published by the Free Software Foundation.
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
This program is distributed in the hope that it will be useful, but
|
|
Packit |
961e70 |
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
961e70 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
961e70 |
General Public License for more details.
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
Contact Information:
|
|
Packit |
961e70 |
Intel Corporation, www.intel.com
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
BSD LICENSE
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
Copyright(c) 2015 Intel Corporation.
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
Redistribution and use in source and binary forms, with or without
|
|
Packit |
961e70 |
modification, are permitted provided that the following conditions
|
|
Packit |
961e70 |
are met:
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
* Redistributions of source code must retain the above copyright
|
|
Packit |
961e70 |
notice, this list of conditions and the following disclaimer.
|
|
Packit |
961e70 |
* Redistributions in binary form must reproduce the above copyright
|
|
Packit |
961e70 |
notice, this list of conditions and the following disclaimer in
|
|
Packit |
961e70 |
the documentation and/or other materials provided with the
|
|
Packit |
961e70 |
distribution.
|
|
Packit |
961e70 |
* Neither the name of Intel Corporation nor the names of its
|
|
Packit |
961e70 |
contributors may be used to endorse or promote products derived
|
|
Packit |
961e70 |
from this software without specific prior written permission.
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
Packit |
961e70 |
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
Packit |
961e70 |
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
Packit |
961e70 |
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
Packit |
961e70 |
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
Packit |
961e70 |
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
Packit |
961e70 |
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
Packit |
961e70 |
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
Packit |
961e70 |
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
Packit |
961e70 |
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
Packit |
961e70 |
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
/* Copyright (c) 2003-2015 Intel Corporation. All rights reserved. */
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
#include "psm_user.h"
|
|
Packit |
961e70 |
#include "psm2_hal.h"
|
|
Packit |
961e70 |
#include "psm_mq_internal.h"
|
|
Packit |
961e70 |
#include "ptl_ips/ips_proto_header.h"
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
#ifdef PSM_CUDA
|
|
Packit |
961e70 |
#include "psm_gdrcpy.h"
|
|
Packit |
961e70 |
#endif
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
#if 0
|
|
Packit |
961e70 |
/* Not exposed in public psm, but may extend parts of PSM 2.1 to support
|
|
Packit |
961e70 |
* this feature before 2.3 */
|
|
Packit |
961e70 |
psm_mq_unexpected_callback_fn_t
|
|
Packit |
961e70 |
psmi_mq_register_unexpected_callback(psm2_mq_t mq,
|
|
Packit |
961e70 |
psm_mq_unexpected_callback_fn_t fn)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psm_mq_unexpected_callback_fn_t old_fn = mq->unexpected_callback;
|
|
Packit |
961e70 |
mq->unexpected_callback = fn;
|
|
Packit |
961e70 |
return old_fn;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
#endif
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
void psmi_mq_handle_rts_complete(psm2_mq_req_t req)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psm2_mq_t mq = req->mq;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
/* Stats on rendez-vous messages */
|
|
Packit |
961e70 |
psmi_mq_stats_rts_account(req);
|
|
Packit |
961e70 |
req->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
ips_barrier();
|
|
Packit |
961e70 |
if(!psmi_is_req_internal(req))
|
|
Packit |
961e70 |
mq_qq_append(&mq->completed_q, req);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
_HFI_VDBG("RTS complete, req=%p, recv_msglen = %d\n",
|
|
Packit |
961e70 |
req, req->req_data.recv_msglen);
|
|
Packit |
961e70 |
return;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
static void
|
|
Packit |
961e70 |
psmi_mq_req_copy(psm2_mq_req_t req,
|
|
Packit |
961e70 |
uint32_t offset, const void *buf, uint32_t nbytes)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
/* recv_msglen may be changed by unexpected receive req_data.buf. */
|
|
Packit |
961e70 |
uint32_t msglen_this, end;
|
|
Packit |
961e70 |
uint8_t *msgptr = (uint8_t *) req->req_data.buf + offset;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
/* out of receiving range. */
|
|
Packit |
961e70 |
if (offset >= req->req_data.recv_msglen) {
|
|
Packit |
961e70 |
req->send_msgoff += nbytes;
|
|
Packit |
961e70 |
return;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
end = offset + nbytes;
|
|
Packit |
961e70 |
if (end > req->req_data.recv_msglen) {
|
|
Packit |
961e70 |
msglen_this = req->req_data.recv_msglen - offset;
|
|
Packit |
961e70 |
end = req->req_data.recv_msglen;
|
|
Packit |
961e70 |
} else {
|
|
Packit |
961e70 |
msglen_this = nbytes;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
psmi_mq_mtucpy(msgptr, buf, msglen_this);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (req->recv_msgoff < end) {
|
|
Packit |
961e70 |
req->recv_msgoff = end;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
req->send_msgoff += nbytes;
|
|
Packit |
961e70 |
return;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
int
|
|
Packit |
961e70 |
psmi_mq_handle_data(psm2_mq_t mq, psm2_mq_req_t req,
|
|
Packit |
961e70 |
uint32_t offset, const void *buf, uint32_t nbytes)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psmi_assert(req != NULL);
|
|
Packit |
961e70 |
int rc;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (req->state == MQ_STATE_MATCHED)
|
|
Packit |
961e70 |
rc = MQ_RET_MATCH_OK;
|
|
Packit |
961e70 |
else {
|
|
Packit |
961e70 |
psmi_assert(req->state == MQ_STATE_UNEXP);
|
|
Packit |
961e70 |
rc = MQ_RET_UNEXP_OK;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
psmi_mq_req_copy(req, offset, buf, nbytes);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
/*
|
|
Packit |
961e70 |
* the reason to use >= is because send_msgoff
|
|
Packit |
961e70 |
* may be DW pad included.
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
if (req->send_msgoff >= req->req_data.send_msglen) {
|
|
Packit |
961e70 |
if (req->type & MQE_TYPE_EAGER_QUEUE) {
|
|
Packit |
961e70 |
STAILQ_REMOVE(&mq->eager_q, req, psm2_mq_req, nextq);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (req->state == MQ_STATE_MATCHED) {
|
|
Packit |
961e70 |
req->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
ips_barrier();
|
|
Packit |
961e70 |
mq_qq_append(&mq->completed_q, req);
|
|
Packit |
961e70 |
} else { /* MQ_STATE_UNEXP */
|
|
Packit |
961e70 |
req->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
return rc;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
static
|
|
Packit |
961e70 |
void mq_add_to_unexpected_hashes(psm2_mq_t mq, psm2_mq_req_t req)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
int table;
|
|
Packit |
961e70 |
mq_qq_append(&mq->unexpected_q, req);
|
|
Packit |
961e70 |
req->q[PSM2_ANYTAG_ANYSRC] = &mq->unexpected_q;
|
|
Packit |
961e70 |
mq->unexpected_list_len++;
|
|
Packit |
961e70 |
if_pt (mq->nohash_fastpath) {
|
|
Packit |
961e70 |
if_pf (mq->unexpected_list_len >= HASH_THRESHOLD)
|
|
Packit |
961e70 |
psmi_mq_fastpath_disable(mq);
|
|
Packit |
961e70 |
return;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
for (table = PSM2_TAG_SRC; table < PSM2_ANYTAG_ANYSRC; table++)
|
|
Packit |
961e70 |
mq_qq_append_which(mq->unexpected_htab,
|
|
Packit |
961e70 |
table, mq->hashvals[table], req);
|
|
Packit |
961e70 |
mq->unexpected_hash_len++;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
psm2_mq_req_t
|
|
Packit |
961e70 |
mq_list_scan(struct mqq *q, psm2_epaddr_t src, psm2_mq_tag_t *tag, int which, uint64_t *time_threshold)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psm2_mq_req_t *curp, cur;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
for (curp = &q->first;
|
|
Packit |
961e70 |
((cur = *curp) != NULL) && (cur->timestamp < *time_threshold);
|
|
Packit |
961e70 |
curp = &cur->next[which]) {
|
|
Packit |
961e70 |
if ((cur->req_data.peer == PSM2_MQ_ANY_ADDR || src == cur->req_data.peer) &&
|
|
Packit |
961e70 |
!((tag->tag[0] ^ cur->req_data.tag.tag[0]) & cur->req_data.tagsel.tag[0]) &&
|
|
Packit |
961e70 |
!((tag->tag[1] ^ cur->req_data.tag.tag[1]) & cur->req_data.tagsel.tag[1]) &&
|
|
Packit |
961e70 |
!((tag->tag[2] ^ cur->req_data.tag.tag[2]) & cur->req_data.tagsel.tag[2])) {
|
|
Packit |
961e70 |
*time_threshold = cur->timestamp;
|
|
Packit |
961e70 |
return cur;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
return NULL;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
psm2_mq_req_t
|
|
Packit |
961e70 |
mq_req_match(psm2_mq_t mq, psm2_epaddr_t src, psm2_mq_tag_t *tag, int remove)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psm2_mq_req_t match[4];
|
|
Packit |
961e70 |
int table;
|
|
Packit |
961e70 |
uint64_t best_ts = -1;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (mq->nohash_fastpath) {
|
|
Packit |
961e70 |
table = PSM2_ANYTAG_ANYSRC;
|
|
Packit |
961e70 |
match[table] =
|
|
Packit |
961e70 |
mq_list_scan(&mq->expected_q,
|
|
Packit |
961e70 |
src, tag, PSM2_ANYTAG_ANYSRC, &best_ts);
|
|
Packit |
961e70 |
if (match[table] && remove) {
|
|
Packit |
961e70 |
mq->expected_list_len--;
|
|
Packit |
961e70 |
mq_qq_remove_which(match[table], table);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
return match[table];
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
mq->hashvals[PSM2_TAG_SRC] = hash_64(*(uint64_t *) tag->tag) % NUM_HASH_BUCKETS;
|
|
Packit |
961e70 |
mq->hashvals[PSM2_TAG_ANYSRC] = hash_32(tag->tag[0]) % NUM_HASH_BUCKETS;
|
|
Packit |
961e70 |
mq->hashvals[PSM2_ANYTAG_SRC] = hash_32(tag->tag[1]) % NUM_HASH_BUCKETS;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
for (table = PSM2_TAG_SRC; table < PSM2_ANYTAG_ANYSRC; table++)
|
|
Packit |
961e70 |
match[table] =
|
|
Packit |
961e70 |
mq_list_scan(&mq->expected_htab[table][mq->hashvals[table]],
|
|
Packit |
961e70 |
src, tag, table, &best_ts);
|
|
Packit |
961e70 |
table = PSM2_ANYTAG_ANYSRC;
|
|
Packit |
961e70 |
match[table] = mq_list_scan(&mq->expected_q, src, tag, table, &best_ts);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
table = min_timestamp_4(match);
|
|
Packit |
961e70 |
if (table == -1)
|
|
Packit |
961e70 |
return NULL;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (remove) {
|
|
Packit |
961e70 |
if_pt (table == PSM2_ANYTAG_ANYSRC)
|
|
Packit |
961e70 |
mq->expected_list_len--;
|
|
Packit |
961e70 |
else
|
|
Packit |
961e70 |
mq->expected_hash_len--;
|
|
Packit |
961e70 |
mq_qq_remove_which(match[table], table);
|
|
Packit |
961e70 |
psmi_mq_fastpath_try_reenable(mq);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
return match[table];
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
/*
|
|
Packit |
961e70 |
* This handles the rendezvous MPI envelopes, the packet might have the whole
|
|
Packit |
961e70 |
* message payload, or zero payload.
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
int
|
|
Packit |
961e70 |
psmi_mq_handle_rts(psm2_mq_t mq, psm2_epaddr_t src, psm2_mq_tag_t *tag,
|
|
Packit |
961e70 |
uint32_t send_msglen, const void *payload, uint32_t paylen,
|
|
Packit |
961e70 |
int msgorder, mq_rts_callback_fn_t cb, psm2_mq_req_t *req_o)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psm2_mq_req_t req;
|
|
Packit |
961e70 |
uint32_t msglen;
|
|
Packit |
961e70 |
int rc;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
PSMI_LOCK_ASSERT(mq->progress_lock);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (msgorder && (req = mq_req_match(mq, src, tag, 1))) {
|
|
Packit |
961e70 |
/* we have a match, no need to callback */
|
|
Packit |
961e70 |
msglen = mq_set_msglen(req, req->req_data.buf_len, send_msglen);
|
|
Packit |
961e70 |
/* reset send_msglen because sender only sends this many */
|
|
Packit |
961e70 |
req->req_data.send_msglen = msglen;
|
|
Packit |
961e70 |
req->state = MQ_STATE_MATCHED;
|
|
Packit |
961e70 |
req->req_data.peer = src;
|
|
Packit |
961e70 |
req->req_data.tag = *tag;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (paylen > msglen) paylen = msglen;
|
|
Packit |
961e70 |
if (paylen) {
|
|
Packit |
961e70 |
psmi_mq_mtucpy(req->req_data.buf, payload, paylen);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
req->recv_msgoff = req->send_msgoff = paylen;
|
|
Packit |
961e70 |
*req_o = req; /* yes match */
|
|
Packit |
961e70 |
PSM2_LOG_EPM(OPCODE_LONG_RTS,PSM2_LOG_RX,src->epid,mq->ep->epid,
|
|
Packit |
961e70 |
"req->rts_reqidx_peer: %d",req->rts_reqidx_peer);
|
|
Packit |
961e70 |
rc = MQ_RET_MATCH_OK;
|
|
Packit |
961e70 |
} else if (msgorder > 1) {
|
|
Packit |
961e70 |
/* There is NO request match, and this is the first time
|
|
Packit |
961e70 |
* to try to process this packet, we leave the packet in
|
|
Packit |
961e70 |
* hardware queue for retry in hope there is a request
|
|
Packit |
961e70 |
* match next time, this is for performance
|
|
Packit |
961e70 |
* consideration.
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
rc = MQ_RET_UNEXP_NO_RESOURCES;
|
|
Packit |
961e70 |
} else { /* No match, keep track of callback */
|
|
Packit |
961e70 |
req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
|
|
Packit |
961e70 |
psmi_assert(req != NULL);
|
|
Packit |
961e70 |
/* We don't know recv_msglen yet but we set it here for
|
|
Packit |
961e70 |
* mq_iprobe */
|
|
Packit |
961e70 |
req->req_data.send_msglen = req->req_data.recv_msglen = send_msglen;
|
|
Packit |
961e70 |
PSM2_LOG_EPM_COND(req->req_data.send_msglen > mq->hfi_thresh_rv,
|
|
Packit |
961e70 |
OPCODE_LONG_RTS,PSM2_LOG_RX,src->epid,mq->ep->epid,
|
|
Packit |
961e70 |
"req->rts_reqidx_peer: %d",req->rts_reqidx_peer);
|
|
Packit |
961e70 |
req->state = MQ_STATE_UNEXP_RV;
|
|
Packit |
961e70 |
req->req_data.peer = src;
|
|
Packit |
961e70 |
req->req_data.tag = *tag;
|
|
Packit |
961e70 |
req->rts_callback = cb;
|
|
Packit |
961e70 |
if (paylen > send_msglen) paylen = send_msglen;
|
|
Packit |
961e70 |
if (paylen) {
|
|
Packit |
961e70 |
req->req_data.buf = psmi_mq_sysbuf_alloc(mq, paylen);
|
|
Packit |
961e70 |
psmi_assert(paylen == 0 || req->req_data.buf != NULL);
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_num++;
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_bytes += paylen;
|
|
Packit |
961e70 |
psmi_mq_mtucpy(req->req_data.buf, payload, paylen);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
req->recv_msgoff = req->send_msgoff = paylen;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (msgorder) {
|
|
Packit |
961e70 |
mq_add_to_unexpected_hashes(mq, req);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
/* caller will handle out of order case */
|
|
Packit |
961e70 |
*req_o = req; /* no match, will callback */
|
|
Packit |
961e70 |
rc = MQ_RET_UNEXP_OK;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
#ifdef PSM_DEBUG
|
|
Packit |
961e70 |
if (req)
|
|
Packit |
961e70 |
_HFI_VDBG("match=%s (req=%p) src=%s mqtag=%08x.%08x.%08x recvlen=%d "
|
|
Packit |
961e70 |
"sendlen=%d errcode=%d\n",
|
|
Packit |
961e70 |
rc == MQ_RET_MATCH_OK ? "YES" : "NO", req,
|
|
Packit |
961e70 |
psmi_epaddr_get_name(src->epid),
|
|
Packit |
961e70 |
req->req_data.tag.tag[0], req->req_data.tag.tag[1], req->req_data.tag.tag[2],
|
|
Packit |
961e70 |
req->req_data.recv_msglen, req->req_data.send_msglen, req->req_data.error_code);
|
|
Packit |
961e70 |
else
|
|
Packit |
961e70 |
_HFI_VDBG("match=%s (req=%p) src=%s\n",
|
|
Packit |
961e70 |
rc == MQ_RET_MATCH_OK ? "YES" : "NO", req,
|
|
Packit |
961e70 |
psmi_epaddr_get_name(src->epid));
|
|
Packit |
961e70 |
#endif /* #ifdef PSM_DEBUG */
|
|
Packit |
961e70 |
return rc;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
/*
|
|
Packit |
961e70 |
* This handles the regular (i.e. non-rendezvous MPI envelopes)
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
int
|
|
Packit |
961e70 |
psmi_mq_handle_envelope(psm2_mq_t mq, psm2_epaddr_t src, psm2_mq_tag_t *tag,
|
|
Packit |
961e70 |
uint32_t send_msglen, uint32_t offset,
|
|
Packit |
961e70 |
const void *payload, uint32_t paylen, int msgorder,
|
|
Packit |
961e70 |
uint32_t opcode, psm2_mq_req_t *req_o)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psm2_mq_req_t req;
|
|
Packit |
961e70 |
uint32_t msglen;
|
|
Packit Service |
7ed5cc |
psmi_mtucpy_fn_t psmi_mtucpy_fn;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (msgorder && (req = mq_req_match(mq, src, tag, 1))) {
|
|
Packit |
961e70 |
/* we have a match */
|
|
Packit Service |
7ed5cc |
void *user_buffer = req->req_data.buf;
|
|
Packit |
961e70 |
psmi_assert(MQE_TYPE_IS_RECV(req->type));
|
|
Packit |
961e70 |
req->req_data.peer = src;
|
|
Packit |
961e70 |
req->req_data.tag = *tag;
|
|
Packit |
961e70 |
msglen = mq_set_msglen(req, req->req_data.buf_len, send_msglen);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
_HFI_VDBG("match=YES (req=%p) opcode=%x src=%s mqtag=%x.%x.%x"
|
|
Packit |
961e70 |
" msglen=%d paylen=%d\n", req, opcode,
|
|
Packit |
961e70 |
psmi_epaddr_get_name(src->epid),
|
|
Packit |
961e70 |
tag->tag[0], tag->tag[1], tag->tag[2], msglen,
|
|
Packit |
961e70 |
paylen);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
switch (opcode) {
|
|
Packit |
961e70 |
case MQ_MSG_TINY:
|
|
Packit |
961e70 |
/* mq_copy_tiny() can handle zero byte */
|
|
Packit |
961e70 |
#ifdef PSM_CUDA
|
|
Packit |
961e70 |
if (PSMI_USE_GDR_COPY(req, msglen)) {
|
|
Packit Service |
7ed5cc |
user_buffer = gdr_convert_gpu_to_host_addr(GDR_FD,
|
|
Packit |
961e70 |
(unsigned long)req->req_data.buf,
|
|
Packit |
961e70 |
msglen, 1, src->proto);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
#endif
|
|
Packit Service |
7ed5cc |
mq_copy_tiny((uint32_t *) user_buffer, (uint32_t *) payload, msglen);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
req->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
ips_barrier();
|
|
Packit |
961e70 |
mq_qq_append(&mq->completed_q, req);
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
case MQ_MSG_SHORT: /* message fits in 1 payload */
|
|
Packit Service |
7ed5cc |
psmi_mtucpy_fn = psmi_mq_mtucpy;
|
|
Packit |
961e70 |
#ifdef PSM_CUDA
|
|
Packit |
961e70 |
if (PSMI_USE_GDR_COPY(req, msglen)) {
|
|
Packit |
961e70 |
user_buffer = gdr_convert_gpu_to_host_addr(GDR_FD,
|
|
Packit |
961e70 |
(unsigned long)req->req_data.buf,
|
|
Packit |
961e70 |
msglen, 1, src->proto);
|
|
Packit |
961e70 |
psmi_mtucpy_fn = psmi_mq_mtucpy_host_mem;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
#endif
|
|
Packit |
961e70 |
if (msglen <= paylen) {
|
|
Packit |
961e70 |
psmi_mtucpy_fn(user_buffer, payload, msglen);
|
|
Packit |
961e70 |
} else {
|
|
Packit |
961e70 |
psmi_assert((msglen & ~0x3) == paylen);
|
|
Packit |
961e70 |
psmi_mtucpy_fn(user_buffer, payload, paylen);
|
|
Packit |
961e70 |
/*
|
|
Packit |
961e70 |
* there are nonDW bytes attached in header,
|
|
Packit |
961e70 |
* copy after the DW payload.
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
mq_copy_tiny((uint32_t *)(user_buffer+paylen),
|
|
Packit |
961e70 |
(uint32_t *)&offset, msglen & 0x3);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
req->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
ips_barrier();
|
|
Packit |
961e70 |
mq_qq_append(&mq->completed_q, req);
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
case MQ_MSG_EAGER:
|
|
Packit |
961e70 |
req->state = MQ_STATE_MATCHED;
|
|
Packit |
961e70 |
req->type |= MQE_TYPE_EAGER_QUEUE;
|
|
Packit |
961e70 |
req->send_msgoff = req->recv_msgoff = 0;
|
|
Packit |
961e70 |
STAILQ_INSERT_TAIL(&mq->eager_q, req, nextq);
|
|
Packit |
961e70 |
_HFI_VDBG("exp MSG_EAGER of length %d bytes pay=%d\n",
|
|
Packit |
961e70 |
msglen, paylen);
|
|
Packit |
961e70 |
#ifdef PSM_CUDA
|
|
Packit |
961e70 |
if (PSMI_USE_GDR_COPY(req, req->req_data.send_msglen)) {
|
|
Packit |
961e70 |
req->req_data.buf = gdr_convert_gpu_to_host_addr(GDR_FD,
|
|
Packit |
961e70 |
(unsigned long)req->user_gpu_buffer,
|
|
Packit |
961e70 |
req->req_data.send_msglen, 1, src->proto);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
#endif
|
|
Packit |
961e70 |
if (paylen > 0)
|
|
Packit |
961e70 |
psmi_mq_handle_data(mq, req, offset, payload,
|
|
Packit |
961e70 |
paylen);
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
default:
|
|
Packit |
961e70 |
psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
|
|
Packit |
961e70 |
"Internal error, unknown packet 0x%x",
|
|
Packit |
961e70 |
opcode);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
mq->stats.rx_user_bytes += msglen;
|
|
Packit |
961e70 |
mq->stats.rx_user_num++;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
*req_o = req; /* yes match */
|
|
Packit |
961e70 |
return MQ_RET_MATCH_OK;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
/* unexpected message or out of order message. */
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
#if 0
|
|
Packit |
961e70 |
/*
|
|
Packit |
961e70 |
* Keep a callback here in case we want to fit some other high-level
|
|
Packit |
961e70 |
* protocols over MQ (i.e. shmem). These protocols would bypass the
|
|
Packit |
961e70 |
* normal message handling and go to higher-level message handlers.
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
if (msgorder && mq->unexpected_callback) {
|
|
Packit |
961e70 |
mq->unexpected_callback(mq, opcode, epaddr, req_data.tag, send_msglen,
|
|
Packit |
961e70 |
payload, paylen);
|
|
Packit |
961e70 |
*req_o = NULL;
|
|
Packit |
961e70 |
return MQ_RET_UNEXP_OK;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
#endif
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (msgorder > 1) {
|
|
Packit |
961e70 |
/* There is NO request match, and this is the first time
|
|
Packit |
961e70 |
* to try to process this packet, we leave the packet in
|
|
Packit |
961e70 |
* hardware queue for retry in hope there is a request
|
|
Packit |
961e70 |
* match nex time, this is for performance
|
|
Packit |
961e70 |
* consideration.
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
return MQ_RET_UNEXP_NO_RESOURCES;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
|
|
Packit |
961e70 |
psmi_assert(req != NULL);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
req->req_data.peer = src;
|
|
Packit |
961e70 |
req->req_data.tag = *tag;
|
|
Packit |
961e70 |
req->recv_msgoff = 0;
|
|
Packit |
961e70 |
req->req_data.recv_msglen = req->req_data.send_msglen = req->req_data.buf_len = msglen =
|
|
Packit |
961e70 |
send_msglen;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
_HFI_VDBG("match=NO (req=%p) opcode=%x src=%s mqtag=%08x.%08x.%08x"
|
|
Packit |
961e70 |
" send_msglen=%d\n", req, opcode,
|
|
Packit |
961e70 |
psmi_epaddr_get_name(src->epid),
|
|
Packit |
961e70 |
tag->tag[0], tag->tag[1], tag->tag[2], send_msglen);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
switch (opcode) {
|
|
Packit |
961e70 |
case MQ_MSG_TINY:
|
|
Packit |
961e70 |
if (msglen > 0) {
|
|
Packit |
961e70 |
req->req_data.buf = psmi_mq_sysbuf_alloc(mq, msglen);
|
|
Packit |
961e70 |
psmi_assert(msglen == 0 || req->req_data.buf != NULL);
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_num++;
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_bytes += paylen;
|
|
Packit |
961e70 |
mq_copy_tiny((uint32_t *) req->req_data.buf,
|
|
Packit |
961e70 |
(uint32_t *) payload, msglen);
|
|
Packit |
961e70 |
} else
|
|
Packit |
961e70 |
req->req_data.buf = NULL;
|
|
Packit |
961e70 |
req->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
case MQ_MSG_SHORT:
|
|
Packit |
961e70 |
req->req_data.buf = psmi_mq_sysbuf_alloc(mq, msglen);
|
|
Packit |
961e70 |
psmi_assert(msglen == 0 || req->req_data.buf != NULL);
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_num++;
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_bytes += paylen;
|
|
Packit |
961e70 |
if (msglen <= paylen) {
|
|
Packit |
961e70 |
psmi_mq_mtucpy(req->req_data.buf, payload, msglen);
|
|
Packit |
961e70 |
} else {
|
|
Packit |
961e70 |
psmi_assert((msglen & ~0x3) == paylen);
|
|
Packit |
961e70 |
psmi_mq_mtucpy(req->req_data.buf, payload, paylen);
|
|
Packit |
961e70 |
/*
|
|
Packit |
961e70 |
* there are nonDW bytes attached in header,
|
|
Packit |
961e70 |
* copy after the DW payload.
|
|
Packit |
961e70 |
*/
|
|
Packit |
961e70 |
mq_copy_tiny((uint32_t *)(req->req_data.buf+paylen),
|
|
Packit |
961e70 |
(uint32_t *)&offset, msglen & 0x3);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
req->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
case MQ_MSG_EAGER:
|
|
Packit |
961e70 |
req->send_msgoff = 0;
|
|
Packit |
961e70 |
req->req_data.buf = psmi_mq_sysbuf_alloc(mq, msglen);
|
|
Packit |
961e70 |
psmi_assert(msglen == 0 || req->req_data.buf != NULL);
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_num++;
|
|
Packit |
961e70 |
mq->stats.rx_sysbuf_bytes += paylen;
|
|
Packit |
961e70 |
req->state = MQ_STATE_UNEXP;
|
|
Packit |
961e70 |
req->type |= MQE_TYPE_EAGER_QUEUE;
|
|
Packit |
961e70 |
STAILQ_INSERT_TAIL(&mq->eager_q, req, nextq);
|
|
Packit |
961e70 |
_HFI_VDBG("unexp MSG_EAGER of length %d bytes pay=%d\n",
|
|
Packit |
961e70 |
msglen, paylen);
|
|
Packit |
961e70 |
if (paylen > 0)
|
|
Packit |
961e70 |
psmi_mq_handle_data(mq, req, offset, payload, paylen);
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
default:
|
|
Packit |
961e70 |
psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
|
|
Packit |
961e70 |
"Internal error, unknown packet 0x%x",
|
|
Packit |
961e70 |
opcode);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
mq->stats.rx_sys_bytes += msglen;
|
|
Packit |
961e70 |
mq->stats.rx_sys_num++;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
if (msgorder) {
|
|
Packit |
961e70 |
mq_add_to_unexpected_hashes(mq, req);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
/* caller will handle out of order case */
|
|
Packit |
961e70 |
*req_o = req; /* no match, will callback */
|
|
Packit |
961e70 |
return MQ_RET_UNEXP_OK;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
int psmi_mq_handle_outoforder(psm2_mq_t mq, psm2_mq_req_t ureq)
|
|
Packit |
961e70 |
{
|
|
Packit |
961e70 |
psm2_mq_req_t ereq;
|
|
Packit |
961e70 |
uint32_t msglen;
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
ereq = mq_req_match(mq, ureq->req_data.peer, &ureq->req_data.tag, 1);
|
|
Packit |
961e70 |
if (ereq == NULL) {
|
|
Packit |
961e70 |
mq_add_to_unexpected_hashes(mq, ureq);
|
|
Packit |
961e70 |
return 0;
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
psmi_assert(MQE_TYPE_IS_RECV(ereq->type));
|
|
Packit |
961e70 |
ereq->req_data.peer = ureq->req_data.peer;
|
|
Packit |
961e70 |
ereq->req_data.tag = ureq->req_data.tag;
|
|
Packit |
961e70 |
msglen = mq_set_msglen(ereq, ereq->req_data.buf_len, ureq->req_data.send_msglen);
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
switch (ureq->state) {
|
|
Packit |
961e70 |
case MQ_STATE_COMPLETE:
|
|
Packit |
961e70 |
if (ureq->req_data.buf != NULL) { /* 0-byte don't alloc a sysreq_data.buf */
|
|
Packit |
961e70 |
psmi_mq_mtucpy(ereq->req_data.buf, (const void *)ureq->req_data.buf,
|
|
Packit |
961e70 |
msglen);
|
|
Packit |
961e70 |
psmi_mq_sysbuf_free(mq, ureq->req_data.buf);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
ereq->state = MQ_STATE_COMPLETE;
|
|
Packit |
961e70 |
ips_barrier();
|
|
Packit |
961e70 |
mq_qq_append(&mq->completed_q, ereq);
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
case MQ_STATE_UNEXP: /* not done yet */
|
|
Packit |
961e70 |
ereq->state = MQ_STATE_MATCHED;
|
|
Packit |
961e70 |
ereq->msg_seqnum = ureq->msg_seqnum;
|
|
Packit |
961e70 |
ereq->ptl_req_ptr = ureq->ptl_req_ptr;
|
|
Packit |
961e70 |
ereq->send_msgoff = ureq->send_msgoff;
|
|
Packit |
961e70 |
ereq->recv_msgoff = min(ureq->recv_msgoff, msglen);
|
|
Packit |
961e70 |
if (ereq->recv_msgoff) {
|
|
Packit |
961e70 |
psmi_mq_mtucpy(ereq->req_data.buf,
|
|
Packit |
961e70 |
(const void *)ureq->req_data.buf,
|
|
Packit |
961e70 |
ereq->recv_msgoff);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
psmi_mq_sysbuf_free(mq, ureq->req_data.buf);
|
|
Packit |
961e70 |
ereq->type = ureq->type;
|
|
Packit |
961e70 |
STAILQ_INSERT_AFTER(&mq->eager_q, ureq, ereq, nextq);
|
|
Packit |
961e70 |
STAILQ_REMOVE(&mq->eager_q, ureq, psm2_mq_req, nextq);
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
case MQ_STATE_UNEXP_RV: /* rendez-vous ... */
|
|
Packit |
961e70 |
ereq->state = MQ_STATE_MATCHED;
|
|
Packit |
961e70 |
ereq->rts_peer = ureq->rts_peer;
|
|
Packit |
961e70 |
ereq->rts_sbuf = ureq->rts_sbuf;
|
|
Packit |
961e70 |
ereq->send_msgoff = ureq->send_msgoff;
|
|
Packit |
961e70 |
ereq->recv_msgoff = min(ureq->recv_msgoff, msglen);
|
|
Packit |
961e70 |
if (ereq->recv_msgoff) {
|
|
Packit |
961e70 |
psmi_mq_mtucpy(ereq->req_data.buf,
|
|
Packit |
961e70 |
(const void *)ureq->req_data.buf,
|
|
Packit |
961e70 |
ereq->recv_msgoff);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
if (ereq->send_msgoff) {
|
|
Packit |
961e70 |
psmi_mq_sysbuf_free(mq, ureq->req_data.buf);
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
ereq->rts_callback = ureq->rts_callback;
|
|
Packit |
961e70 |
ereq->rts_reqidx_peer = ureq->rts_reqidx_peer;
|
|
Packit |
961e70 |
ereq->type = ureq->type;
|
|
Packit |
961e70 |
ereq->rts_callback(ereq, 0);
|
|
Packit |
961e70 |
break;
|
|
Packit |
961e70 |
default:
|
|
Packit |
961e70 |
fprintf(stderr, "Unexpected state %d in req %p\n", ureq->state,
|
|
Packit |
961e70 |
ureq);
|
|
Packit |
961e70 |
fprintf(stderr, "type=%d, mq=%p, tag=%08x.%08x.%08x\n",
|
|
Packit |
961e70 |
ureq->type, ureq->mq, ureq->req_data.tag.tag[0],
|
|
Packit |
961e70 |
ureq->req_data.tag.tag[1], ureq->req_data.tag.tag[2]);
|
|
Packit |
961e70 |
abort();
|
|
Packit |
961e70 |
}
|
|
Packit |
961e70 |
|
|
Packit |
961e70 |
psmi_mq_req_free(ureq);
|
|
Packit |
961e70 |
return 0;
|
|
Packit |
961e70 |
}
|