/* This file is provided under a dual BSD/GPLv2 license. When using or redistributing this file, you may do so under either license. GPL LICENSE SUMMARY Copyright(c) 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Contact Information: Intel Corporation, www.intel.com BSD LICENSE Copyright(c) 2015 Intel Corporation. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Copyright (c) 2003-2015 Intel Corporation. All rights reserved. */ #include "psm_user.h" #include "psm2_hal.h" #include "ips_epstate.h" #include "ips_proto.h" #include "ips_expected_proto.h" #include "ips_proto_help.h" #include "ips_proto_internal.h" /* * Receive header queue initialization. */ psm2_error_t ips_recvhdrq_init(const psmi_context_t *context, const struct ips_epstate *epstate, const struct ips_proto *proto, const struct ips_recvhdrq_callbacks *callbacks, uint32_t subcontext, struct ips_recvhdrq *recvq, struct ips_recvhdrq_state *recvq_state, psmi_hal_cl_q psm_hal_cl_hdrq) { psm2_error_t err = PSM2_OK; memset(recvq, 0, sizeof(*recvq)); recvq->proto = (struct ips_proto *)proto; recvq->state = recvq_state; recvq->context = context; recvq->subcontext = subcontext; recvq->psm_hal_cl_hdrq = psm_hal_cl_hdrq; pthread_spin_init(&recvq->hdrq_lock, PTHREAD_PROCESS_SHARED); recvq->hdrq_elemlast = ((psmi_hal_get_rx_hdr_q_cnt(context->psm_hw_ctxt) - 1) * (psmi_hal_get_rx_hdr_q_ent_size(context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT)); recvq->epstate = epstate; recvq->recvq_callbacks = *callbacks; /* deep copy */ SLIST_INIT(&recvq->pending_acks); recvq->state->hdrq_head = 0; recvq->state->rcv_egr_index_head = NO_EAGER_UPDATE; recvq->state->num_hdrq_done = 0; recvq->state->num_egrq_done = 0; recvq->state->hdr_countdown = 0; recvq->state->hdrq_cachedlastscan = 0; { union psmi_envvar_val env_hdr_update; psmi_getenv("PSM2_HEAD_UPDATE", "header queue update interval (0 to update after all entries are processed). Default is 64", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT_FLAGS, (union psmi_envvar_val) 64, &env_hdr_update); /* Cap max header update interval to size of header/eager queue */ recvq->state->head_update_interval = min(env_hdr_update.e_uint, psmi_hal_get_rx_hdr_q_cnt(context->psm_hw_ctxt) - 1); recvq->state->egrq_update_interval = 1; } return err; } /* flush the eager buffers, by setting the eager index head to eager index tail if eager buffer queue is full. Called when we had eager buffer overflows (ERR_TID/HFI_RHF_H_TIDERR was set in RHF errors), and no good eager packets were received, so that eager head wasn't advanced. */ #if 0 static void ips_flush_egrq_if_required(struct ips_recvhdrq *recvq) { const uint32_t tail = ips_recvq_tail_get(&recvq->egrq); const uint32_t head = ips_recvq_head_get(&recvq->egrq); uint32_t egr_cnt = recvq->egrq.elemcnt; if ((head % egr_cnt) == ((tail + 1) % egr_cnt)) { _HFI_DBG("eager array full after overflow, flushing " "(head %llx, tail %llx)\n", (long long)head, (long long)tail); recvq->proto->stats.egr_overflow++; } return; } #endif /* * Helpers for ips_recvhdrq_progress. */ static __inline__ int _get_proto_subcontext(const struct ips_message_header *p_hdr) { return ((__be32_to_cpu(p_hdr->bth[1]) >> HFI_BTH_SUBCTXT_SHIFT) & HFI_BTH_SUBCTXT_MASK); } static __inline__ void _dump_invalid_pkt(struct ips_recvhdrq_event *rcv_ev) { char *payload = ips_recvhdrq_event_payload(rcv_ev); uint32_t paylen = ips_recvhdrq_event_paylen(rcv_ev) + ((__be32_to_cpu(rcv_ev->p_hdr->bth[0]) >> 20) & 3); #ifdef PSM_DEBUG ips_proto_show_header((struct ips_message_header *) rcv_ev->p_hdr, "received invalid pkt"); #endif if (hfi_debug & __HFI_PKTDBG) { ips_proto_dump_frame(rcv_ev->p_hdr, HFI_MESSAGE_HDR_SIZE, "header"); if (paylen) ips_proto_dump_frame(payload, paylen, "data"); } } static __inline__ void _update_error_stats(struct ips_proto *proto, uint32_t err) { if (err & PSMI_HAL_RHF_ERR_ICRC) proto->error_stats.num_icrc_err++; if (err & PSMI_HAL_RHF_ERR_ECC) proto->error_stats.num_ecc_err++; if (err & PSMI_HAL_RHF_ERR_LEN) proto->error_stats.num_len_err++; if (err & PSMI_HAL_RHF_ERR_TID) proto->error_stats.num_tid_err++; if (err & PSMI_HAL_RHF_ERR_DC) proto->error_stats.num_dc_err++; if (err & PSMI_HAL_RHF_ERR_DCUN) proto->error_stats.num_dcunc_err++; if (err & PSMI_HAL_RHF_ERR_KHDRLEN) proto->error_stats.num_khdrlen_err++; } #ifdef PSM_DEBUG static int _check_headers(struct ips_recvhdrq_event *rcv_ev, psmi_hal_cl_q cl_q) { struct ips_recvhdrq *recvq = (struct ips_recvhdrq *)rcv_ev->recvq; struct ips_proto *proto = rcv_ev->proto; uint32_t *lrh = (uint32_t *) rcv_ev->p_hdr; uint32_t dest_context; const uint16_t pkt_dlid = __be16_to_cpu(rcv_ev->p_hdr->lrh[1]); const uint16_t base_dlid = __be16_to_cpu(recvq->proto->epinfo.ep_base_lid); /* Check that the receive header queue entry has a sane sequence number */ if (psmi_hal_check_rhf_sequence_number(psmi_hal_rhf_get_seq(rcv_ev->psm_hal_rhf)) != PSM_HAL_ERROR_OK) { unsigned int seqno=0; psmi_hal_get_rhf_expected_sequence_number(&seqno, cl_q, recvq->context->psm_hw_ctxt); psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR, "ErrPkt: Invalid header queue entry! RHF Sequence in Hdrq Seq: %d, Recvq State Seq: %d. LRH[0]: 0x%08x, LRH[1] (PktCount): 0x%08x\n", psmi_hal_rhf_get_seq(rcv_ev->psm_hal_rhf), seqno, lrh[0], lrh[1]); return -1; } /* Verify that the packet was destined for our context */ dest_context = ips_proto_dest_context_from_header(proto, rcv_ev->p_hdr); if_pf(dest_context != recvq->proto->epinfo.ep_context) { struct ips_recvhdrq_state *state = recvq->state; /* Packet not targeted at us. Drop packet and continue */ ips_proto_dump_err_stats(proto); _dump_invalid_pkt(rcv_ev); psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR, "ErrPkt: Received packet for context %d on context %d. Receive Header Queue offset: 0x%x. Exiting.\n", dest_context, recvq->proto->epinfo.ep_context, state->hdrq_head); return -1; } /* Verify that rhf packet length matches the length in LRH */ if_pf(psmi_hal_rhf_get_packet_length(rcv_ev->psm_hal_rhf) != (__be16_to_cpu(rcv_ev->p_hdr->lrh[2]) << BYTE2DWORD_SHIFT)) { _HFI_EPDBG ("ErrPkt: RHF Packet Len (0x%x) does not match LRH (0x%x).\n", psmi_hal_rhf_get_packet_length(rcv_ev->psm_hal_rhf) >> 2, __be16_to_cpu(rcv_ev->p_hdr->lrh[2])); ips_proto_dump_err_stats(proto); _dump_invalid_pkt(rcv_ev); return -1; } /* Verify that the DLID matches our local LID. */ if_pf(!((base_dlid <= pkt_dlid) && (pkt_dlid <= (base_dlid + (1 << recvq->proto->epinfo.ep_lmc))))) { _HFI_EPDBG ("ErrPkt: DLID in LRH (0x%04x) does not match local LID (0x%04x) Skipping packet!\n", rcv_ev->p_hdr->lrh[1], recvq->proto->epinfo.ep_base_lid); ips_proto_dump_err_stats(proto); _dump_invalid_pkt(rcv_ev); return -1; } return 0; } #endif static __inline__ int do_pkt_cksum(struct ips_recvhdrq_event *rcv_ev) { char *payload = ips_recvhdrq_event_payload(rcv_ev); uint32_t paylen = ips_recvhdrq_event_paylen(rcv_ev) + ((__be32_to_cpu(rcv_ev->p_hdr->bth[0]) >> 20) & 3); uint32_t *ckptr; uint32_t recv_cksum, cksum, dest_subcontext; /* With checksum every packet has a payload */ psmi_assert_always(payload); ckptr = (uint32_t *) (payload + paylen); recv_cksum = ckptr[0]; /* Calculate checksum hdr + payload (includes any padding words) */ cksum = 0xffffffff; cksum = ips_crc_calculate(HFI_MESSAGE_HDR_SIZE, (uint8_t *) rcv_ev->p_hdr, cksum); if (paylen) cksum = ips_crc_calculate(paylen, (uint8_t *) payload, cksum); if ((cksum != recv_cksum) || (ckptr[0] != ckptr[1])) { struct ips_epstate_entry *epstaddr; uint32_t lcontext; psmi_hal_cl_idx hd, tl; epstaddr = ips_epstate_lookup(rcv_ev->recvq->epstate, rcv_ev->p_hdr->connidx); epstaddr = (epstaddr && epstaddr->ipsaddr) ? epstaddr : NULL; lcontext = epstaddr ? rcv_ev->proto->epinfo.ep_context : -1; hd = psmi_hal_get_cl_q_head_index(PSM_HAL_CL_Q_RX_HDR_Q, rcv_ev->recvq->context->psm_hw_ctxt); tl = psmi_hal_get_cl_q_tail_index(PSM_HAL_CL_Q_RX_HDR_Q, rcv_ev->recvq->context->psm_hw_ctxt); dest_subcontext = _get_proto_subcontext(rcv_ev->p_hdr); _HFI_ERROR ("ErrPkt: SharedContext: %s. Local Context: %i, Checksum mismatch from LID %d! Received Checksum: 0x%08x, Expected: 0x%08x & 0x%08x. Opcode: 0x%08x, Error Flag: 0x%08x. hdrq hd 0x%x tl 0x%x rhf 0x%" PRIx64 ", rhfseq 0x%x\n", (dest_subcontext != rcv_ev->recvq->subcontext) ? "Yes" : "No", lcontext, epstaddr ? __be16_to_cpu(epstaddr->ipsaddr->pathgrp-> pg_base_dlid) : -1, cksum, ckptr[0], ckptr[1], _get_proto_hfi_opcode(rcv_ev->p_hdr), psmi_hal_rhf_get_all_err_flags(rcv_ev->psm_hal_rhf), hd, tl, rcv_ev->psm_hal_rhf.raw_rhf, psmi_hal_rhf_get_seq(rcv_ev->psm_hal_rhf)); /* Dump packet */ _dump_invalid_pkt(rcv_ev); return 0; /* Packet checksum error */ } return 1; } PSMI_ALWAYS_INLINE( void process_pending_acks(struct ips_recvhdrq *recvq)) { ips_scb_t ctrlscb; struct ips_message_header *msg_hdr = NULL; /* If any pending acks, dispatch them now */ while (!SLIST_EMPTY(&recvq->pending_acks)) { struct ips_flow *flow = SLIST_FIRST(&recvq->pending_acks); SLIST_REMOVE_HEAD(&recvq->pending_acks, next); SLIST_NEXT(flow, next) = NULL; ctrlscb.scb_flags = 0; msg_hdr = &ctrlscb.ips_lrh; msg_hdr->ack_seq_num = flow->recv_seq_num.psn_num; if (flow->flags & IPS_FLOW_FLAG_PENDING_ACK) { psmi_assert_always((flow-> flags & IPS_FLOW_FLAG_PENDING_NAK) == 0); flow->flags &= ~IPS_FLOW_FLAG_PENDING_ACK; ips_proto_send_ctrl_message(flow, OPCODE_ACK, &flow->ipsaddr-> ctrl_msg_queued, &ctrlscb, ctrlscb.cksum, 0); } else { psmi_assert_always(flow-> flags & IPS_FLOW_FLAG_PENDING_NAK); flow->flags &= ~IPS_FLOW_FLAG_PENDING_NAK; ips_proto_send_ctrl_message(flow, OPCODE_NAK, &flow->ipsaddr-> ctrl_msg_queued, &ctrlscb, ctrlscb.cksum, 0); } } } /* * Core receive progress function * * recvhdrq_progress is the core function that services the receive header * queue and optionally, the eager queue. At the lowest level, it identifies * packets marked with errors by the chip and also detects and corrects when * eager overflow conditions occur. At the highest level, it queries the * 'epstate' interface to classify packets from "known" and "unknown" * endpoints. In order to support shared contexts, it can also handle packets * destined for other contexts (or "subcontexts"). */ psm2_error_t ips_recvhdrq_progress(struct ips_recvhdrq *recvq) { /* When PSM_PERF is enabled, the following line causes the PMU to start a stop watch to measure instruction cycles of the RX speedpath of PSM. The stop watch is stopped below. */ GENERIC_PERF_BEGIN(PSM_RX_SPEEDPATH_CTR); struct ips_recvhdrq_state *state = recvq->state; PSMI_CACHEALIGN struct ips_recvhdrq_event rcv_ev = {.proto = recvq->proto, .recvq = recvq }; struct ips_epstate_entry *epstaddr; uint32_t num_hdrq_done = 0; const uint32_t num_hdrq_todo = psmi_hal_get_rx_hdr_q_cnt(recvq->context->psm_hw_ctxt); uint32_t dest_subcontext; const uint32_t hdrq_elemsz = psmi_hal_get_rx_hdr_q_ent_size(recvq->context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT; int ret = IPS_RECVHDRQ_CONTINUE; int done = 0, empty = 0; int do_hdr_update = 0; const psmi_hal_cl_q psm_hal_hdr_q = recvq->psm_hal_cl_hdrq; const psmi_hal_cl_q psm_hal_egr_q = psm_hal_hdr_q + 1; /* Returns whether the currently set 'rcv_hdr'/head is a readable entry */ #define next_hdrq_is_ready() (! empty ) if (psmi_hal_cl_q_empty(state->hdrq_head, psm_hal_hdr_q, recvq->context->psm_hw_ctxt)) return PSM2_OK; PSM2_LOG_MSG("entering"); done = !next_hdrq_is_ready(); rcv_ev.psm_hal_hdr_q = psm_hal_hdr_q; while (!done) { psmi_hal_get_receive_event(state->hdrq_head, recvq->context->psm_hw_ctxt, &rcv_ev); rcv_ev.has_cksum = ((recvq->proto->flags & IPS_PROTO_FLAG_CKSUM) && (rcv_ev.p_hdr->flags & IPS_SEND_FLAG_PKTCKSUM)); _HFI_VDBG ("new packet: rcv_hdr %p, rhf %" PRIx64 "\n", rcv_ev.p_hdr, rcv_ev.psm_hal_rhf.raw_rhf); #ifdef PSM_DEBUG if_pf(_check_headers(&rcv_ev, psm_hal_hdr_q)) goto skip_packet; #endif dest_subcontext = _get_proto_subcontext(rcv_ev.p_hdr); /* If the destination is not our subcontext, process * message as subcontext message (shared contexts) */ if (dest_subcontext != recvq->subcontext) { rcv_ev.ipsaddr = NULL; ret = recvq->recvq_callbacks.callback_subcontext (&rcv_ev, dest_subcontext); if (ret == IPS_RECVHDRQ_REVISIT) { PSM2_LOG_MSG("leaving"); /* When PSM_PERF is enabled, the following line causes the PMU to stop a stop watch to measure instruction cycles of the RX speedpath of PSM. The stop watch was started above. */ GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR); return PSM2_OK_NO_PROGRESS; } goto skip_packet; } if_pf(psmi_hal_rhf_get_all_err_flags(rcv_ev.psm_hal_rhf)) { _update_error_stats(recvq->proto, psmi_hal_rhf_get_all_err_flags(rcv_ev.psm_hal_rhf)); recvq->recvq_callbacks.callback_error(&rcv_ev); if ((psmi_hal_rhf_get_rx_type(rcv_ev.psm_hal_rhf) != PSM_HAL_RHF_RX_TYPE_EAGER) || (!(psmi_hal_rhf_get_all_err_flags(rcv_ev.psm_hal_rhf) & PSMI_HAL_RHF_ERR_TID))) goto skip_packet; /* no pending eager update, header * is not currently under tracing. */ if (state->hdr_countdown == 0 && state->rcv_egr_index_head == NO_EAGER_UPDATE) { uint32_t egr_cnt = psmi_hal_get_rx_egr_tid_cnt(recvq->context->psm_hw_ctxt); psmi_hal_cl_idx etail=0, ehead=0; ehead = psmi_hal_get_cl_q_head_index( psm_hal_egr_q, rcv_ev.recvq->context->psm_hw_ctxt); etail = psmi_hal_get_cl_q_tail_index( psm_hal_egr_q, rcv_ev.recvq->context->psm_hw_ctxt); if (ehead == ((etail + 1) % egr_cnt)) { /* eager is full, * trace existing header entries */ uint32_t hdr_size = recvq->hdrq_elemlast + hdrq_elemsz; psmi_hal_cl_idx htail=0; htail = psmi_hal_get_cl_q_tail_index( psm_hal_hdr_q, rcv_ev.recvq->context->psm_hw_ctxt); const uint32_t hhead = state->hdrq_head; state->hdr_countdown = (htail > hhead) ? (htail - hhead) : (htail + hdr_size - hhead); } } /* Eager packet and tiderr. * Don't consider updating egr head, unless we're in * the congested state. If we're congested, we should * try to keep the eager buffers free. */ if (!rcv_ev.is_congested) goto skip_packet_no_egr_update; else goto skip_packet; } /* If checksum is enabled, verify that it is valid */ if_pf(rcv_ev.has_cksum && !do_pkt_cksum(&rcv_ev)) goto skip_packet; if (_HFI_VDBG_ON) { psmi_hal_cl_idx egr_buff_q_head, egr_buff_q_tail; egr_buff_q_head = psmi_hal_get_cl_q_head_index( psm_hal_egr_q, rcv_ev.recvq->context->psm_hw_ctxt); egr_buff_q_tail = psmi_hal_get_cl_q_tail_index( psm_hal_egr_q, rcv_ev.recvq->context->psm_hw_ctxt); _HFI_VDBG_ALWAYS( "hdrq_head %d, p_hdr: %p, opcode %x, payload %p paylen %d; " "egrhead %x egrtail %x; " "useegrbit %x egrindex %x, egroffset %x, egrindexhead %x\n", state->hdrq_head, rcv_ev.p_hdr, _get_proto_hfi_opcode(rcv_ev.p_hdr), ips_recvhdrq_event_payload(&rcv_ev), ips_recvhdrq_event_paylen(&rcv_ev), egr_buff_q_head,egr_buff_q_tail, psmi_hal_rhf_get_use_egr_buff(rcv_ev.psm_hal_rhf), psmi_hal_rhf_get_egr_buff_index(rcv_ev.psm_hal_rhf), psmi_hal_rhf_get_egr_buff_offset(rcv_ev.psm_hal_rhf), state->rcv_egr_index_head); } PSM2_LOG_PKT_STRM(PSM2_LOG_RX,rcv_ev.p_hdr,&rcv_ev.psm_hal_rhf.raw_rhf, "PKT_STRM:"); /* Classify packet from a known or unknown endpoint */ epstaddr = ips_epstate_lookup(recvq->epstate, rcv_ev.p_hdr->connidx); if_pf((epstaddr == NULL) || (epstaddr->ipsaddr == NULL)) { rcv_ev.ipsaddr = NULL; recvq->recvq_callbacks. callback_packet_unknown(&rcv_ev); } else { rcv_ev.ipsaddr = epstaddr->ipsaddr; ret = ips_proto_process_packet(&rcv_ev); if (ret == IPS_RECVHDRQ_REVISIT) { PSM2_LOG_MSG("leaving"); /* When PSM_PERF is enabled, the following line causes the PMU to stop a stop watch to measure instruction cycles of the RX speedpath of PSM. The stop watch was started above. */ GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR); return PSM2_OK_NO_PROGRESS; } } skip_packet: /* * if eager buffer is used, record the index. */ if (psmi_hal_rhf_get_use_egr_buff(rcv_ev.psm_hal_rhf)) { /* set only when a new entry is used */ if (psmi_hal_rhf_get_egr_buff_offset(rcv_ev.psm_hal_rhf) == 0) { state->rcv_egr_index_head = psmi_hal_rhf_get_egr_buff_index(rcv_ev.psm_hal_rhf); state->num_egrq_done++; } /* a header entry is using an eager entry, stop tracing. */ state->hdr_countdown = 0; } skip_packet_no_egr_update: /* Note that state->hdrq_head is sampled speculatively by the code * in ips_ptl_shared_poll() when context sharing, so it is not safe * for this shared variable to temporarily exceed the last element. */ _HFI_VDBG ("head %d, elemsz %d elemlast %d\n", state->hdrq_head, hdrq_elemsz, recvq->hdrq_elemlast); psmi_hal_retire_hdr_q_entry(&state->hdrq_head, psm_hal_hdr_q, recvq->context->psm_hw_ctxt, hdrq_elemsz, recvq->hdrq_elemlast, &empty); state->num_hdrq_done++; num_hdrq_done++; done = (!next_hdrq_is_ready() || (ret == IPS_RECVHDRQ_BREAK) || (num_hdrq_done == num_hdrq_todo)); do_hdr_update = (state->head_update_interval ? (state->num_hdrq_done == state->head_update_interval) : done); if (do_hdr_update) { psmi_hal_set_cl_q_head_index( state->hdrq_head, psm_hal_hdr_q, rcv_ev.recvq->context->psm_hw_ctxt); /* Reset header queue entries processed */ state->num_hdrq_done = 0; } if (state->num_egrq_done >= state->egrq_update_interval) { /* Lazy update of egrq */ if (state->rcv_egr_index_head != NO_EAGER_UPDATE) { psmi_hal_set_cl_q_head_index( state->rcv_egr_index_head, psm_hal_egr_q, recvq->context->psm_hw_ctxt); state->rcv_egr_index_head = NO_EAGER_UPDATE; state->num_egrq_done = 0; } } if (state->hdr_countdown > 0) { /* a header entry is consumed. */ state->hdr_countdown -= hdrq_elemsz; if (state->hdr_countdown == 0) { /* header entry count reaches zero. */ psmi_hal_cl_idx tail=0; tail = psmi_hal_get_cl_q_tail_index( psm_hal_egr_q, recvq->context->psm_hw_ctxt); psmi_hal_cl_idx head=0; head = psmi_hal_get_cl_q_head_index( psm_hal_egr_q, recvq->context->psm_hw_ctxt); uint32_t egr_cnt = psmi_hal_get_rx_egr_tid_cnt(recvq->context->psm_hw_ctxt); /* Checks eager-full again. This is a real false-egr-full */ if (head == ((tail + 1) % egr_cnt)) { psmi_hal_set_cl_q_tail_index( tail, psm_hal_egr_q, recvq->context->psm_hw_ctxt); _HFI_DBG ("eager array full after overflow, flushing " "(head %llx, tail %llx)\n", (long long)head, (long long)tail); recvq->proto->stats.egr_overflow++; } else _HFI_ERROR ("PSM BUG: EgrOverflow: eager queue is not full\n"); } } } /* while (hdrq_entries_to_read) */ /* Process any pending acks before exiting */ process_pending_acks(recvq); PSM2_LOG_MSG("leaving"); /* When PSM_PERF is enabled, the following line causes the PMU to stop a stop watch to measure instruction cycles of the RX speedpath of PSM. The stop watch was started above. */ GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR); return num_hdrq_done ? PSM2_OK : PSM2_OK_NO_PROGRESS; } /* This function is designed to implement RAPID CCA. It iterates through the recvq, checking each element for set FECN or BECN bits. In the case of finding one, the proper response is executed, and the bits are cleared. */ psm2_error_t ips_recvhdrq_scan_cca (struct ips_recvhdrq *recvq) { /* Looks at hdr and determines if it is the last item in the queue */ #define is_last_hdr(idx) \ psmi_hal_cl_q_empty(idx, psm_hal_hdr_q, recvq->context->psm_hw_ctxt) struct ips_recvhdrq_state *state = recvq->state; PSMI_CACHEALIGN struct ips_recvhdrq_event rcv_ev = {.proto = recvq->proto, .recvq = recvq }; uint32_t num_hdrq_done = state->hdrq_cachedlastscan / psmi_hal_get_rx_hdr_q_ent_size(recvq->context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT; const int num_hdrq_todo = psmi_hal_get_rx_hdr_q_cnt(recvq->context->psm_hw_ctxt); const uint32_t hdrq_elemsz = psmi_hal_get_rx_hdr_q_ent_size(recvq->context->psm_hw_ctxt) >> BYTE2DWORD_SHIFT; int done; uint32_t scan_head = state->hdrq_head + state->hdrq_cachedlastscan; const psmi_hal_cl_q psm_hal_hdr_q = recvq->psm_hal_cl_hdrq; /* Skip the first element, since we're going to process it soon anyway */ if ( state->hdrq_cachedlastscan == 0 ) { scan_head += hdrq_elemsz; num_hdrq_done++; } PSM2_LOG_MSG("entering"); done = !is_last_hdr(scan_head); rcv_ev.psm_hal_hdr_q = psm_hal_hdr_q; while (!done) { psmi_hal_get_receive_event(scan_head, recvq->context->psm_hw_ctxt, &rcv_ev); rcv_ev.has_cksum = ((recvq->proto->flags & IPS_PROTO_FLAG_CKSUM) && (rcv_ev.p_hdr->flags & IPS_SEND_FLAG_PKTCKSUM)); _HFI_VDBG ("scanning new packet for CCA: rcv_hdr %p, rhf %" PRIx64 "\n", rcv_ev.p_hdr, rcv_ev.psm_hal_rhf.raw_rhf); if_pt ( _is_cca_fecn_set(rcv_ev.p_hdr) & IPS_RECV_EVENT_FECN ) { struct ips_epstate_entry *epstaddr = ips_epstate_lookup(recvq->epstate, rcv_ev.p_hdr->connidx); if (epstaddr != NULL && epstaddr->ipsaddr != NULL) { rcv_ev.ipsaddr = epstaddr->ipsaddr; /* Send BECN back */ ips_epaddr_t *ipsaddr = rcv_ev.ipsaddr; struct ips_message_header *p_hdr = rcv_ev.p_hdr; ips_epaddr_flow_t flowid = ips_proto_flowid(p_hdr); struct ips_flow *flow; ips_scb_t ctrlscb; psmi_assert(flowid < EP_FLOW_LAST); flow = &ipsaddr->flows[flowid]; ctrlscb.scb_flags = 0; ctrlscb.ips_lrh.data[0].u32w0 = flow->cca_ooo_pkts; rcv_ev.proto->epaddr_stats.congestion_pkts++; /* Clear FECN event */ rcv_ev.is_congested &= ~IPS_RECV_EVENT_FECN; ips_proto_send_ctrl_message(flow, OPCODE_BECN, &flow->ipsaddr-> ctrl_msg_queued, &ctrlscb, ctrlscb.cksum, 0); } } else if_pt (0 != (_is_cca_becn_set(rcv_ev.p_hdr) << (IPS_RECV_EVENT_BECN - 1))) { struct ips_epstate_entry *epstaddr = ips_epstate_lookup(recvq->epstate, rcv_ev.p_hdr->connidx); if (epstaddr != NULL && epstaddr->ipsaddr != NULL) { rcv_ev.ipsaddr = epstaddr->ipsaddr; /* Adjust flow */ struct ips_proto *proto = rcv_ev.proto; struct ips_message_header *p_hdr = rcv_ev.p_hdr; ips_epaddr_t *ipsaddr = rcv_ev.ipsaddr; struct ips_flow *flow; ips_epaddr_flow_t flowid = ips_proto_flowid(p_hdr); psmi_assert(flowid < EP_FLOW_LAST); flow = &ipsaddr->flows[flowid]; if ((flow->path->pr_ccti + proto->cace[flow->path->pr_sl].ccti_increase) <= proto->ccti_limit) { ips_cca_adjust_rate(flow->path, proto->cace[flow->path->pr_sl].ccti_increase); /* Clear congestion event */ rcv_ev.is_congested &= ~IPS_RECV_EVENT_BECN; } } } num_hdrq_done++; scan_head += hdrq_elemsz; state->hdrq_cachedlastscan += hdrq_elemsz; done = (num_hdrq_done == num_hdrq_todo && !is_last_hdr(scan_head) ); } /* while (hdrq_entries_to_read) */ PSM2_LOG_MSG("leaving"); return PSM2_OK; }