Blame psm_mq_recv.c

Packit 961e70
/*
Packit 961e70
Packit 961e70
  This file is provided under a dual BSD/GPLv2 license.  When using or
Packit 961e70
  redistributing this file, you may do so under either license.
Packit 961e70
Packit 961e70
  GPL LICENSE SUMMARY
Packit 961e70
Packit 961e70
  Copyright(c) 2015 Intel Corporation.
Packit 961e70
Packit 961e70
  This program is free software; you can redistribute it and/or modify
Packit 961e70
  it under the terms of version 2 of the GNU General Public License as
Packit 961e70
  published by the Free Software Foundation.
Packit 961e70
Packit 961e70
  This program is distributed in the hope that it will be useful, but
Packit 961e70
  WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 961e70
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 961e70
  General Public License for more details.
Packit 961e70
Packit 961e70
  Contact Information:
Packit 961e70
  Intel Corporation, www.intel.com
Packit 961e70
Packit 961e70
  BSD LICENSE
Packit 961e70
Packit 961e70
  Copyright(c) 2015 Intel Corporation.
Packit 961e70
Packit 961e70
  Redistribution and use in source and binary forms, with or without
Packit 961e70
  modification, are permitted provided that the following conditions
Packit 961e70
  are met:
Packit 961e70
Packit 961e70
    * Redistributions of source code must retain the above copyright
Packit 961e70
      notice, this list of conditions and the following disclaimer.
Packit 961e70
    * Redistributions in binary form must reproduce the above copyright
Packit 961e70
      notice, this list of conditions and the following disclaimer in
Packit 961e70
      the documentation and/or other materials provided with the
Packit 961e70
      distribution.
Packit 961e70
    * Neither the name of Intel Corporation nor the names of its
Packit 961e70
      contributors may be used to endorse or promote products derived
Packit 961e70
      from this software without specific prior written permission.
Packit 961e70
Packit 961e70
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Packit 961e70
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Packit 961e70
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Packit 961e70
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
Packit 961e70
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
Packit 961e70
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
Packit 961e70
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
Packit 961e70
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
Packit 961e70
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
Packit 961e70
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Packit 961e70
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 961e70
Packit 961e70
*/
Packit 961e70
Packit 961e70
/* Copyright (c) 2003-2015 Intel Corporation. All rights reserved. */
Packit 961e70
Packit 961e70
#include "psm_user.h"
Packit 961e70
#include "psm2_hal.h"
Packit 961e70
#include "psm_mq_internal.h"
Packit 961e70
#include "ptl_ips/ips_proto_header.h"
Packit 961e70
Packit 961e70
#ifdef PSM_CUDA
Packit 961e70
#include "psm_gdrcpy.h"
Packit 961e70
#endif
Packit 961e70
Packit 961e70
#if 0
Packit 961e70
/* Not exposed in public psm, but may extend parts of PSM 2.1 to support
Packit 961e70
 * this feature before 2.3 */
Packit 961e70
psm_mq_unexpected_callback_fn_t
Packit 961e70
psmi_mq_register_unexpected_callback(psm2_mq_t mq,
Packit 961e70
				     psm_mq_unexpected_callback_fn_t fn)
Packit 961e70
{
Packit 961e70
	psm_mq_unexpected_callback_fn_t old_fn = mq->unexpected_callback;
Packit 961e70
	mq->unexpected_callback = fn;
Packit 961e70
	return old_fn;
Packit 961e70
}
Packit 961e70
#endif
Packit 961e70
Packit 961e70
void psmi_mq_handle_rts_complete(psm2_mq_req_t req)
Packit 961e70
{
Packit 961e70
	psm2_mq_t mq = req->mq;
Packit 961e70
Packit 961e70
	/* Stats on rendez-vous messages */
Packit 961e70
	psmi_mq_stats_rts_account(req);
Packit 961e70
	req->state = MQ_STATE_COMPLETE;
Packit 961e70
	ips_barrier();
Packit 961e70
	if(!psmi_is_req_internal(req))
Packit 961e70
		mq_qq_append(&mq->completed_q, req);
Packit 961e70
Packit 961e70
	_HFI_VDBG("RTS complete, req=%p, recv_msglen = %d\n",
Packit 961e70
		  req, req->req_data.recv_msglen);
Packit 961e70
	return;
Packit 961e70
}
Packit 961e70
Packit 961e70
static void
Packit 961e70
psmi_mq_req_copy(psm2_mq_req_t req,
Packit 961e70
		 uint32_t offset, const void *buf, uint32_t nbytes)
Packit 961e70
{
Packit 961e70
	/* recv_msglen may be changed by unexpected receive req_data.buf. */
Packit 961e70
	uint32_t msglen_this, end;
Packit 961e70
	uint8_t *msgptr = (uint8_t *) req->req_data.buf + offset;
Packit 961e70
Packit 961e70
	/* out of receiving range. */
Packit 961e70
	if (offset >= req->req_data.recv_msglen) {
Packit 961e70
		req->send_msgoff += nbytes;
Packit 961e70
		return;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	end = offset + nbytes;
Packit 961e70
	if (end > req->req_data.recv_msglen) {
Packit 961e70
		msglen_this = req->req_data.recv_msglen - offset;
Packit 961e70
		end = req->req_data.recv_msglen;
Packit 961e70
	} else {
Packit 961e70
		msglen_this = nbytes;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	psmi_mq_mtucpy(msgptr, buf, msglen_this);
Packit 961e70
Packit 961e70
	if (req->recv_msgoff < end) {
Packit 961e70
		req->recv_msgoff = end;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	req->send_msgoff += nbytes;
Packit 961e70
	return;
Packit 961e70
}
Packit 961e70
Packit 961e70
int
Packit 961e70
psmi_mq_handle_data(psm2_mq_t mq, psm2_mq_req_t req,
Packit 961e70
		    uint32_t offset, const void *buf, uint32_t nbytes)
Packit 961e70
{
Packit 961e70
	psmi_assert(req != NULL);
Packit 961e70
	int rc;
Packit 961e70
Packit 961e70
	if (req->state == MQ_STATE_MATCHED)
Packit 961e70
		rc = MQ_RET_MATCH_OK;
Packit 961e70
	else {
Packit 961e70
		psmi_assert(req->state == MQ_STATE_UNEXP);
Packit 961e70
		rc = MQ_RET_UNEXP_OK;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	psmi_mq_req_copy(req, offset, buf, nbytes);
Packit 961e70
Packit 961e70
	/*
Packit 961e70
	 * the reason to use >= is because send_msgoff
Packit 961e70
	 * may be DW pad included.
Packit 961e70
	 */
Packit 961e70
	if (req->send_msgoff >= req->req_data.send_msglen) {
Packit 961e70
		if (req->type & MQE_TYPE_EAGER_QUEUE) {
Packit 961e70
			STAILQ_REMOVE(&mq->eager_q, req, psm2_mq_req, nextq);
Packit 961e70
		}
Packit 961e70
Packit 961e70
		if (req->state == MQ_STATE_MATCHED) {
Packit 961e70
			req->state = MQ_STATE_COMPLETE;
Packit 961e70
			ips_barrier();
Packit 961e70
			mq_qq_append(&mq->completed_q, req);
Packit 961e70
		} else {	/* MQ_STATE_UNEXP */
Packit 961e70
			req->state = MQ_STATE_COMPLETE;
Packit 961e70
		}
Packit 961e70
	}
Packit 961e70
Packit 961e70
	return rc;
Packit 961e70
}
Packit 961e70
Packit 961e70
static
Packit 961e70
void mq_add_to_unexpected_hashes(psm2_mq_t mq, psm2_mq_req_t req)
Packit 961e70
{
Packit 961e70
	int table;
Packit 961e70
	mq_qq_append(&mq->unexpected_q, req);
Packit 961e70
	req->q[PSM2_ANYTAG_ANYSRC] = &mq->unexpected_q;
Packit 961e70
	mq->unexpected_list_len++;
Packit 961e70
	if_pt (mq->nohash_fastpath) {
Packit 961e70
		if_pf (mq->unexpected_list_len >= HASH_THRESHOLD)
Packit 961e70
			psmi_mq_fastpath_disable(mq);
Packit 961e70
		return;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	for (table = PSM2_TAG_SRC; table < PSM2_ANYTAG_ANYSRC; table++)
Packit 961e70
		mq_qq_append_which(mq->unexpected_htab,
Packit 961e70
				   table, mq->hashvals[table], req);
Packit 961e70
	mq->unexpected_hash_len++;
Packit 961e70
}
Packit 961e70
Packit 961e70
Packit 961e70
psm2_mq_req_t
Packit 961e70
mq_list_scan(struct mqq *q, psm2_epaddr_t src, psm2_mq_tag_t *tag, int which, uint64_t *time_threshold)
Packit 961e70
{
Packit 961e70
	psm2_mq_req_t *curp, cur;
Packit 961e70
Packit 961e70
	for (curp = &q->first;
Packit 961e70
	     ((cur = *curp) != NULL) && (cur->timestamp < *time_threshold);
Packit 961e70
	     curp = &cur->next[which]) {
Packit 961e70
		if ((cur->req_data.peer == PSM2_MQ_ANY_ADDR || src == cur->req_data.peer) &&
Packit 961e70
		    !((tag->tag[0] ^ cur->req_data.tag.tag[0]) & cur->req_data.tagsel.tag[0]) &&
Packit 961e70
		    !((tag->tag[1] ^ cur->req_data.tag.tag[1]) & cur->req_data.tagsel.tag[1]) &&
Packit 961e70
		    !((tag->tag[2] ^ cur->req_data.tag.tag[2]) & cur->req_data.tagsel.tag[2])) {
Packit 961e70
			*time_threshold = cur->timestamp;
Packit 961e70
			return cur;
Packit 961e70
		}
Packit 961e70
	}
Packit 961e70
	return NULL;
Packit 961e70
}
Packit 961e70
Packit 961e70
psm2_mq_req_t
Packit 961e70
mq_req_match(psm2_mq_t mq, psm2_epaddr_t src, psm2_mq_tag_t *tag, int remove)
Packit 961e70
{
Packit 961e70
	psm2_mq_req_t match[4];
Packit 961e70
	int table;
Packit 961e70
	uint64_t best_ts = -1;
Packit 961e70
Packit 961e70
	if (mq->nohash_fastpath) {
Packit 961e70
		table = PSM2_ANYTAG_ANYSRC;
Packit 961e70
		match[table] =
Packit 961e70
			mq_list_scan(&mq->expected_q,
Packit 961e70
				     src, tag, PSM2_ANYTAG_ANYSRC, &best_ts);
Packit 961e70
		if (match[table] && remove) {
Packit 961e70
			mq->expected_list_len--;
Packit 961e70
			mq_qq_remove_which(match[table], table);
Packit 961e70
		}
Packit 961e70
		return match[table];
Packit 961e70
	}
Packit 961e70
Packit 961e70
	mq->hashvals[PSM2_TAG_SRC] = hash_64(*(uint64_t *) tag->tag) % NUM_HASH_BUCKETS;
Packit 961e70
	mq->hashvals[PSM2_TAG_ANYSRC] = hash_32(tag->tag[0]) % NUM_HASH_BUCKETS;
Packit 961e70
	mq->hashvals[PSM2_ANYTAG_SRC] = hash_32(tag->tag[1]) % NUM_HASH_BUCKETS;
Packit 961e70
Packit 961e70
	for (table = PSM2_TAG_SRC; table < PSM2_ANYTAG_ANYSRC; table++)
Packit 961e70
		match[table] =
Packit 961e70
			mq_list_scan(&mq->expected_htab[table][mq->hashvals[table]],
Packit 961e70
				     src, tag, table, &best_ts);
Packit 961e70
	table = PSM2_ANYTAG_ANYSRC;
Packit 961e70
	match[table] = mq_list_scan(&mq->expected_q, src, tag, table, &best_ts);
Packit 961e70
Packit 961e70
	table = min_timestamp_4(match);
Packit 961e70
	if (table == -1)
Packit 961e70
		return NULL;
Packit 961e70
Packit 961e70
	if (remove) {
Packit 961e70
		if_pt (table == PSM2_ANYTAG_ANYSRC)
Packit 961e70
			mq->expected_list_len--;
Packit 961e70
		else
Packit 961e70
			mq->expected_hash_len--;
Packit 961e70
		mq_qq_remove_which(match[table], table);
Packit 961e70
		psmi_mq_fastpath_try_reenable(mq);
Packit 961e70
	}
Packit 961e70
	return match[table];
Packit 961e70
}
Packit 961e70
/*
Packit 961e70
 * This handles the rendezvous MPI envelopes, the packet might have the whole
Packit 961e70
 * message payload, or zero payload.
Packit 961e70
 */
Packit 961e70
int
Packit 961e70
psmi_mq_handle_rts(psm2_mq_t mq, psm2_epaddr_t src, psm2_mq_tag_t *tag,
Packit 961e70
		   uint32_t send_msglen, const void *payload, uint32_t paylen,
Packit 961e70
		   int msgorder, mq_rts_callback_fn_t cb, psm2_mq_req_t *req_o)
Packit 961e70
{
Packit 961e70
	psm2_mq_req_t req;
Packit 961e70
	uint32_t msglen;
Packit 961e70
	int rc;
Packit 961e70
Packit 961e70
	PSMI_LOCK_ASSERT(mq->progress_lock);
Packit 961e70
Packit 961e70
	if (msgorder && (req = mq_req_match(mq, src, tag, 1))) {
Packit 961e70
		/* we have a match, no need to callback */
Packit 961e70
		msglen = mq_set_msglen(req, req->req_data.buf_len, send_msglen);
Packit 961e70
		/* reset send_msglen because sender only sends this many */
Packit 961e70
		req->req_data.send_msglen = msglen;
Packit 961e70
		req->state = MQ_STATE_MATCHED;
Packit 961e70
		req->req_data.peer = src;
Packit 961e70
		req->req_data.tag = *tag;
Packit 961e70
Packit 961e70
		if (paylen > msglen) paylen = msglen;
Packit 961e70
		if (paylen) {
Packit 961e70
			psmi_mq_mtucpy(req->req_data.buf, payload, paylen);
Packit 961e70
		}
Packit 961e70
		req->recv_msgoff = req->send_msgoff = paylen;
Packit 961e70
		*req_o = req;	/* yes match */
Packit 961e70
		PSM2_LOG_EPM(OPCODE_LONG_RTS,PSM2_LOG_RX,src->epid,mq->ep->epid,
Packit 961e70
			    "req->rts_reqidx_peer: %d",req->rts_reqidx_peer);
Packit 961e70
		rc = MQ_RET_MATCH_OK;
Packit 961e70
	} else if (msgorder > 1) {
Packit 961e70
		/* There is NO request match, and this is the first time
Packit 961e70
		 * to try to process this packet, we leave the packet in
Packit 961e70
		 * hardware queue for retry in hope there is a request
Packit 961e70
		 * match next time, this is for performance
Packit 961e70
		 * consideration.
Packit 961e70
		 */
Packit 961e70
		rc = MQ_RET_UNEXP_NO_RESOURCES;
Packit 961e70
	} else {		/* No match, keep track of callback */
Packit 961e70
		req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
Packit 961e70
		psmi_assert(req != NULL);
Packit 961e70
		/* We don't know recv_msglen yet but we set it here for
Packit 961e70
		 * mq_iprobe */
Packit 961e70
		req->req_data.send_msglen = req->req_data.recv_msglen = send_msglen;
Packit 961e70
		PSM2_LOG_EPM_COND(req->req_data.send_msglen > mq->hfi_thresh_rv,
Packit 961e70
				 OPCODE_LONG_RTS,PSM2_LOG_RX,src->epid,mq->ep->epid,
Packit 961e70
				    "req->rts_reqidx_peer: %d",req->rts_reqidx_peer);
Packit 961e70
		req->state = MQ_STATE_UNEXP_RV;
Packit 961e70
		req->req_data.peer = src;
Packit 961e70
		req->req_data.tag = *tag;
Packit 961e70
		req->rts_callback = cb;
Packit 961e70
		if (paylen > send_msglen) paylen = send_msglen;
Packit 961e70
		if (paylen) {
Packit 961e70
			req->req_data.buf = psmi_mq_sysbuf_alloc(mq, paylen);
Packit 961e70
			psmi_assert(paylen == 0 || req->req_data.buf != NULL);
Packit 961e70
			mq->stats.rx_sysbuf_num++;
Packit 961e70
			mq->stats.rx_sysbuf_bytes += paylen;
Packit 961e70
			psmi_mq_mtucpy(req->req_data.buf, payload, paylen);
Packit 961e70
		}
Packit 961e70
		req->recv_msgoff = req->send_msgoff = paylen;
Packit 961e70
Packit 961e70
		if (msgorder) {
Packit 961e70
			mq_add_to_unexpected_hashes(mq, req);
Packit 961e70
		}
Packit 961e70
		/* caller will handle out of order case */
Packit 961e70
		*req_o = req;	/* no match, will callback */
Packit 961e70
		rc = MQ_RET_UNEXP_OK;
Packit 961e70
	}
Packit 961e70
Packit 961e70
#ifdef PSM_DEBUG
Packit 961e70
	if (req)
Packit 961e70
		_HFI_VDBG("match=%s (req=%p) src=%s mqtag=%08x.%08x.%08x recvlen=%d "
Packit 961e70
			  "sendlen=%d errcode=%d\n",
Packit 961e70
			  rc == MQ_RET_MATCH_OK ? "YES" : "NO", req,
Packit 961e70
			  psmi_epaddr_get_name(src->epid),
Packit 961e70
			  req->req_data.tag.tag[0], req->req_data.tag.tag[1], req->req_data.tag.tag[2],
Packit 961e70
			  req->req_data.recv_msglen, req->req_data.send_msglen, req->req_data.error_code);
Packit 961e70
	else
Packit 961e70
		_HFI_VDBG("match=%s (req=%p) src=%s\n",
Packit 961e70
			  rc == MQ_RET_MATCH_OK ? "YES" : "NO", req,
Packit 961e70
			  psmi_epaddr_get_name(src->epid));
Packit 961e70
#endif /* #ifdef PSM_DEBUG */
Packit 961e70
	return rc;
Packit 961e70
}
Packit 961e70
Packit 961e70
/*
Packit 961e70
 * This handles the regular (i.e. non-rendezvous MPI envelopes)
Packit 961e70
 */
Packit 961e70
int
Packit 961e70
psmi_mq_handle_envelope(psm2_mq_t mq, psm2_epaddr_t src, psm2_mq_tag_t *tag,
Packit 961e70
			uint32_t send_msglen, uint32_t offset,
Packit 961e70
			const void *payload, uint32_t paylen, int msgorder,
Packit 961e70
			uint32_t opcode, psm2_mq_req_t *req_o)
Packit 961e70
{
Packit 961e70
	psm2_mq_req_t req;
Packit 961e70
	uint32_t msglen;
Packit Service 7ed5cc
	psmi_mtucpy_fn_t psmi_mtucpy_fn;
Packit 961e70
Packit 961e70
	if (msgorder && (req = mq_req_match(mq, src, tag, 1))) {
Packit 961e70
		/* we have a match */
Packit Service 7ed5cc
		void *user_buffer = req->req_data.buf;
Packit 961e70
		psmi_assert(MQE_TYPE_IS_RECV(req->type));
Packit 961e70
		req->req_data.peer = src;
Packit 961e70
		req->req_data.tag = *tag;
Packit 961e70
		msglen = mq_set_msglen(req, req->req_data.buf_len, send_msglen);
Packit 961e70
Packit 961e70
		_HFI_VDBG("match=YES (req=%p) opcode=%x src=%s mqtag=%x.%x.%x"
Packit 961e70
			  " msglen=%d paylen=%d\n", req, opcode,
Packit 961e70
			  psmi_epaddr_get_name(src->epid),
Packit 961e70
			  tag->tag[0], tag->tag[1], tag->tag[2], msglen,
Packit 961e70
			  paylen);
Packit 961e70
Packit 961e70
		switch (opcode) {
Packit 961e70
		case MQ_MSG_TINY:
Packit 961e70
			/* mq_copy_tiny() can handle zero byte */
Packit 961e70
#ifdef PSM_CUDA
Packit 961e70
			if (PSMI_USE_GDR_COPY(req, msglen)) {
Packit Service 7ed5cc
				user_buffer = gdr_convert_gpu_to_host_addr(GDR_FD,
Packit 961e70
								(unsigned long)req->req_data.buf,
Packit 961e70
								msglen, 1, src->proto);
Packit 961e70
			}
Packit 961e70
#endif
Packit Service 7ed5cc
			mq_copy_tiny((uint32_t *) user_buffer, (uint32_t *) payload, msglen);
Packit 961e70
Packit 961e70
			req->state = MQ_STATE_COMPLETE;
Packit 961e70
			ips_barrier();
Packit 961e70
			mq_qq_append(&mq->completed_q, req);
Packit 961e70
			break;
Packit 961e70
Packit 961e70
		case MQ_MSG_SHORT:	/* message fits in 1 payload */
Packit Service 7ed5cc
			psmi_mtucpy_fn = psmi_mq_mtucpy;
Packit 961e70
#ifdef PSM_CUDA
Packit 961e70
			if (PSMI_USE_GDR_COPY(req, msglen)) {
Packit 961e70
				user_buffer = gdr_convert_gpu_to_host_addr(GDR_FD,
Packit 961e70
							(unsigned long)req->req_data.buf,
Packit 961e70
							msglen, 1, src->proto);
Packit 961e70
				psmi_mtucpy_fn = psmi_mq_mtucpy_host_mem;
Packit 961e70
			}
Packit 961e70
#endif
Packit 961e70
			if (msglen <= paylen) {
Packit 961e70
				psmi_mtucpy_fn(user_buffer, payload, msglen);
Packit 961e70
			} else {
Packit 961e70
				psmi_assert((msglen & ~0x3) == paylen);
Packit 961e70
				psmi_mtucpy_fn(user_buffer, payload, paylen);
Packit 961e70
				/*
Packit 961e70
				 * there are nonDW bytes attached in header,
Packit 961e70
				 * copy after the DW payload.
Packit 961e70
				 */
Packit 961e70
				mq_copy_tiny((uint32_t *)(user_buffer+paylen),
Packit 961e70
					(uint32_t *)&offset, msglen & 0x3);
Packit 961e70
			}
Packit 961e70
			req->state = MQ_STATE_COMPLETE;
Packit 961e70
			ips_barrier();
Packit 961e70
			mq_qq_append(&mq->completed_q, req);
Packit 961e70
			break;
Packit 961e70
Packit 961e70
		case MQ_MSG_EAGER:
Packit 961e70
			req->state = MQ_STATE_MATCHED;
Packit 961e70
			req->type |= MQE_TYPE_EAGER_QUEUE;
Packit 961e70
			req->send_msgoff = req->recv_msgoff = 0;
Packit 961e70
			STAILQ_INSERT_TAIL(&mq->eager_q, req, nextq);
Packit 961e70
			_HFI_VDBG("exp MSG_EAGER of length %d bytes pay=%d\n",
Packit 961e70
				  msglen, paylen);
Packit 961e70
#ifdef PSM_CUDA
Packit 961e70
			if (PSMI_USE_GDR_COPY(req, req->req_data.send_msglen)) {
Packit 961e70
				req->req_data.buf = gdr_convert_gpu_to_host_addr(GDR_FD,
Packit 961e70
						(unsigned long)req->user_gpu_buffer,
Packit 961e70
						req->req_data.send_msglen, 1, src->proto);
Packit 961e70
			}
Packit 961e70
#endif
Packit 961e70
			if (paylen > 0)
Packit 961e70
				psmi_mq_handle_data(mq, req, offset, payload,
Packit 961e70
						    paylen);
Packit 961e70
			break;
Packit 961e70
Packit 961e70
		default:
Packit 961e70
			psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
Packit 961e70
					  "Internal error, unknown packet 0x%x",
Packit 961e70
					  opcode);
Packit 961e70
		}
Packit 961e70
Packit 961e70
		mq->stats.rx_user_bytes += msglen;
Packit 961e70
		mq->stats.rx_user_num++;
Packit 961e70
Packit 961e70
		*req_o = req;	/* yes match */
Packit 961e70
		return MQ_RET_MATCH_OK;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	/* unexpected message or out of order message. */
Packit 961e70
Packit 961e70
#if 0
Packit 961e70
	/*
Packit 961e70
	 * Keep a callback here in case we want to fit some other high-level
Packit 961e70
	 * protocols over MQ (i.e. shmem).  These protocols would bypass the
Packit 961e70
	 * normal message handling and go to higher-level message handlers.
Packit 961e70
	 */
Packit 961e70
	if (msgorder && mq->unexpected_callback) {
Packit 961e70
		mq->unexpected_callback(mq, opcode, epaddr, req_data.tag, send_msglen,
Packit 961e70
					payload, paylen);
Packit 961e70
		*req_o = NULL;
Packit 961e70
		return MQ_RET_UNEXP_OK;
Packit 961e70
	}
Packit 961e70
#endif
Packit 961e70
Packit 961e70
	if (msgorder > 1) {
Packit 961e70
		/* There is NO request match, and this is the first time
Packit 961e70
		 * to try to process this packet, we leave the packet in
Packit 961e70
		 * hardware queue for retry in hope there is a request
Packit 961e70
		 * match nex time, this is for performance
Packit 961e70
		 * consideration.
Packit 961e70
		 */
Packit 961e70
		return MQ_RET_UNEXP_NO_RESOURCES;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	req = psmi_mq_req_alloc(mq, MQE_TYPE_RECV);
Packit 961e70
	psmi_assert(req != NULL);
Packit 961e70
Packit 961e70
	req->req_data.peer = src;
Packit 961e70
	req->req_data.tag = *tag;
Packit 961e70
	req->recv_msgoff = 0;
Packit 961e70
	req->req_data.recv_msglen = req->req_data.send_msglen = req->req_data.buf_len = msglen =
Packit 961e70
	    send_msglen;
Packit 961e70
Packit 961e70
	_HFI_VDBG("match=NO (req=%p) opcode=%x src=%s mqtag=%08x.%08x.%08x"
Packit 961e70
		  " send_msglen=%d\n", req, opcode,
Packit 961e70
		  psmi_epaddr_get_name(src->epid),
Packit 961e70
		  tag->tag[0], tag->tag[1], tag->tag[2], send_msglen);
Packit 961e70
Packit 961e70
	switch (opcode) {
Packit 961e70
	case MQ_MSG_TINY:
Packit 961e70
		if (msglen > 0) {
Packit 961e70
			req->req_data.buf = psmi_mq_sysbuf_alloc(mq, msglen);
Packit 961e70
			psmi_assert(msglen == 0 || req->req_data.buf != NULL);
Packit 961e70
			mq->stats.rx_sysbuf_num++;
Packit 961e70
			mq->stats.rx_sysbuf_bytes += paylen;
Packit 961e70
			mq_copy_tiny((uint32_t *) req->req_data.buf,
Packit 961e70
				     (uint32_t *) payload, msglen);
Packit 961e70
		} else
Packit 961e70
			req->req_data.buf = NULL;
Packit 961e70
		req->state = MQ_STATE_COMPLETE;
Packit 961e70
		break;
Packit 961e70
Packit 961e70
	case MQ_MSG_SHORT:
Packit 961e70
		req->req_data.buf = psmi_mq_sysbuf_alloc(mq, msglen);
Packit 961e70
		psmi_assert(msglen == 0 || req->req_data.buf != NULL);
Packit 961e70
		mq->stats.rx_sysbuf_num++;
Packit 961e70
		mq->stats.rx_sysbuf_bytes += paylen;
Packit 961e70
		if (msglen <= paylen) {
Packit 961e70
			psmi_mq_mtucpy(req->req_data.buf, payload, msglen);
Packit 961e70
		} else {
Packit 961e70
			psmi_assert((msglen & ~0x3) == paylen);
Packit 961e70
			psmi_mq_mtucpy(req->req_data.buf, payload, paylen);
Packit 961e70
			/*
Packit 961e70
			 * there are nonDW bytes attached in header,
Packit 961e70
			 * copy after the DW payload.
Packit 961e70
			 */
Packit 961e70
			mq_copy_tiny((uint32_t *)(req->req_data.buf+paylen),
Packit 961e70
				(uint32_t *)&offset, msglen & 0x3);
Packit 961e70
		}
Packit 961e70
		req->state = MQ_STATE_COMPLETE;
Packit 961e70
		break;
Packit 961e70
Packit 961e70
	case MQ_MSG_EAGER:
Packit 961e70
		req->send_msgoff = 0;
Packit 961e70
		req->req_data.buf = psmi_mq_sysbuf_alloc(mq, msglen);
Packit 961e70
		psmi_assert(msglen == 0 || req->req_data.buf != NULL);
Packit 961e70
		mq->stats.rx_sysbuf_num++;
Packit 961e70
		mq->stats.rx_sysbuf_bytes += paylen;
Packit 961e70
		req->state = MQ_STATE_UNEXP;
Packit 961e70
		req->type |= MQE_TYPE_EAGER_QUEUE;
Packit 961e70
		STAILQ_INSERT_TAIL(&mq->eager_q, req, nextq);
Packit 961e70
		_HFI_VDBG("unexp MSG_EAGER of length %d bytes pay=%d\n",
Packit 961e70
			  msglen, paylen);
Packit 961e70
		if (paylen > 0)
Packit 961e70
			psmi_mq_handle_data(mq, req, offset, payload, paylen);
Packit 961e70
		break;
Packit 961e70
Packit 961e70
	default:
Packit 961e70
		psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
Packit 961e70
				  "Internal error, unknown packet 0x%x",
Packit 961e70
				  opcode);
Packit 961e70
	}
Packit 961e70
Packit 961e70
	mq->stats.rx_sys_bytes += msglen;
Packit 961e70
	mq->stats.rx_sys_num++;
Packit 961e70
Packit 961e70
	if (msgorder) {
Packit 961e70
		mq_add_to_unexpected_hashes(mq, req);
Packit 961e70
	}
Packit 961e70
	/* caller will handle out of order case */
Packit 961e70
	*req_o = req;		/* no match, will callback */
Packit 961e70
	return MQ_RET_UNEXP_OK;
Packit 961e70
}
Packit 961e70
Packit 961e70
int psmi_mq_handle_outoforder(psm2_mq_t mq, psm2_mq_req_t ureq)
Packit 961e70
{
Packit 961e70
	psm2_mq_req_t ereq;
Packit 961e70
	uint32_t msglen;
Packit 961e70
Packit 961e70
	ereq = mq_req_match(mq, ureq->req_data.peer, &ureq->req_data.tag, 1);
Packit 961e70
	if (ereq == NULL) {
Packit 961e70
		mq_add_to_unexpected_hashes(mq, ureq);
Packit 961e70
		return 0;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	psmi_assert(MQE_TYPE_IS_RECV(ereq->type));
Packit 961e70
	ereq->req_data.peer = ureq->req_data.peer;
Packit 961e70
	ereq->req_data.tag = ureq->req_data.tag;
Packit 961e70
	msglen = mq_set_msglen(ereq, ereq->req_data.buf_len, ureq->req_data.send_msglen);
Packit 961e70
Packit 961e70
	switch (ureq->state) {
Packit 961e70
	case MQ_STATE_COMPLETE:
Packit 961e70
		if (ureq->req_data.buf != NULL) {	/* 0-byte don't alloc a sysreq_data.buf */
Packit 961e70
			psmi_mq_mtucpy(ereq->req_data.buf, (const void *)ureq->req_data.buf,
Packit 961e70
				       msglen);
Packit 961e70
			psmi_mq_sysbuf_free(mq, ureq->req_data.buf);
Packit 961e70
		}
Packit 961e70
		ereq->state = MQ_STATE_COMPLETE;
Packit 961e70
		ips_barrier();
Packit 961e70
		mq_qq_append(&mq->completed_q, ereq);
Packit 961e70
		break;
Packit 961e70
	case MQ_STATE_UNEXP:	/* not done yet */
Packit 961e70
		ereq->state = MQ_STATE_MATCHED;
Packit 961e70
		ereq->msg_seqnum = ureq->msg_seqnum;
Packit 961e70
		ereq->ptl_req_ptr = ureq->ptl_req_ptr;
Packit 961e70
		ereq->send_msgoff = ureq->send_msgoff;
Packit 961e70
		ereq->recv_msgoff = min(ureq->recv_msgoff, msglen);
Packit 961e70
		if (ereq->recv_msgoff) {
Packit 961e70
			psmi_mq_mtucpy(ereq->req_data.buf,
Packit 961e70
				       (const void *)ureq->req_data.buf,
Packit 961e70
				       ereq->recv_msgoff);
Packit 961e70
		}
Packit 961e70
		psmi_mq_sysbuf_free(mq, ureq->req_data.buf);
Packit 961e70
		ereq->type = ureq->type;
Packit 961e70
		STAILQ_INSERT_AFTER(&mq->eager_q, ureq, ereq, nextq);
Packit 961e70
		STAILQ_REMOVE(&mq->eager_q, ureq, psm2_mq_req, nextq);
Packit 961e70
		break;
Packit 961e70
	case MQ_STATE_UNEXP_RV:	/* rendez-vous ... */
Packit 961e70
		ereq->state = MQ_STATE_MATCHED;
Packit 961e70
		ereq->rts_peer = ureq->rts_peer;
Packit 961e70
		ereq->rts_sbuf = ureq->rts_sbuf;
Packit 961e70
		ereq->send_msgoff = ureq->send_msgoff;
Packit 961e70
		ereq->recv_msgoff = min(ureq->recv_msgoff, msglen);
Packit 961e70
		if (ereq->recv_msgoff) {
Packit 961e70
			psmi_mq_mtucpy(ereq->req_data.buf,
Packit 961e70
				       (const void *)ureq->req_data.buf,
Packit 961e70
				       ereq->recv_msgoff);
Packit 961e70
		}
Packit 961e70
		if (ereq->send_msgoff) {
Packit 961e70
			psmi_mq_sysbuf_free(mq, ureq->req_data.buf);
Packit 961e70
		}
Packit 961e70
		ereq->rts_callback = ureq->rts_callback;
Packit 961e70
		ereq->rts_reqidx_peer = ureq->rts_reqidx_peer;
Packit 961e70
		ereq->type = ureq->type;
Packit 961e70
		ereq->rts_callback(ereq, 0);
Packit 961e70
		break;
Packit 961e70
	default:
Packit 961e70
		fprintf(stderr, "Unexpected state %d in req %p\n", ureq->state,
Packit 961e70
			ureq);
Packit 961e70
		fprintf(stderr, "type=%d, mq=%p, tag=%08x.%08x.%08x\n",
Packit 961e70
			ureq->type, ureq->mq, ureq->req_data.tag.tag[0],
Packit 961e70
			ureq->req_data.tag.tag[1], ureq->req_data.tag.tag[2]);
Packit 961e70
		abort();
Packit 961e70
	}
Packit 961e70
Packit 961e70
	psmi_mq_req_free(ureq);
Packit 961e70
	return 0;
Packit 961e70
}