/*
* Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved.
* Copyright (c) 2009 HNR Consulting. All rights reserved.
* Copyright (c) 2012 Lawrence Livermore National Lab. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
/*
* Abstract:
* OSM Congestion Control configuration implementation
*
* Author:
* Albert Chu, LLNL
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdlib.h>
#include <string.h>
#include <iba/ib_types.h>
#include <complib/cl_debug.h>
#include <opensm/osm_file_ids.h>
#define FILE_ID OSM_FILE_CONGESTION_CONTROL_C
#include <opensm/osm_subnet.h>
#include <opensm/osm_opensm.h>
#include <opensm/osm_log.h>
#include <opensm/osm_subnet.h>
#include <opensm/osm_congestion_control.h>
#define CONGESTION_CONTROL_INITIAL_TID_VALUE 0x7A93
static void cc_mad_post(osm_congestion_control_t *p_cc,
osm_madw_t *p_madw,
osm_node_t *p_node,
osm_physp_t *p_physp,
ib_net16_t attr_id,
ib_net32_t attr_mod)
{
osm_subn_opt_t *p_opt = &p_cc->subn->opt;
ib_cc_mad_t *p_cc_mad;
uint8_t port;
OSM_LOG_ENTER(p_cc->log);
port = osm_physp_get_port_num(p_physp);
p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
p_cc_mad->header.base_ver = 1;
p_cc_mad->header.mgmt_class = IB_MCLASS_CC;
p_cc_mad->header.class_ver = 2;
p_cc_mad->header.method = IB_MAD_METHOD_SET;
p_cc_mad->header.status = 0;
p_cc_mad->header.class_spec = 0;
p_cc_mad->header.trans_id =
cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
(uint64_t) (0xFFFFFFFF));
if (p_cc_mad->header.trans_id == 0)
p_cc_mad->header.trans_id =
cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
(uint64_t) (0xFFFFFFFF));
p_cc_mad->header.attr_id = attr_id;
p_cc_mad->header.resv = 0;
p_cc_mad->header.attr_mod = attr_mod;
p_cc_mad->cc_key = p_opt->cc_key;
memset(p_cc_mad->log_data, '\0', IB_CC_LOG_DATA_SIZE);
p_madw->mad_addr.dest_lid = osm_node_get_base_lid(p_node, port);
p_madw->mad_addr.addr_type.gsi.remote_qp = IB_QP1;
p_madw->mad_addr.addr_type.gsi.remote_qkey =
cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
p_madw->resp_expected = TRUE;
p_madw->fail_msg = CL_DISP_MSGID_NONE;
p_madw->context.cc_context.node_guid = osm_node_get_node_guid(p_node);
p_madw->context.cc_context.port_guid = osm_physp_get_port_guid(p_physp);
p_madw->context.cc_context.port = port;
p_madw->context.cc_context.mad_method = IB_MAD_METHOD_SET;
p_madw->context.cc_context.attr_mod = attr_mod;
cl_spinlock_acquire(&p_cc->mad_queue_lock);
cl_atomic_inc(&p_cc->outstanding_mads);
cl_qlist_insert_tail(&p_cc->mad_queue, &p_madw->list_item);
cl_spinlock_release(&p_cc->mad_queue_lock);
cl_event_signal(&p_cc->cc_poller_wakeup);
OSM_LOG_EXIT(p_cc->log);
}
static void cc_setup_mad_data(osm_sm_t * p_sm)
{
osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
osm_subn_opt_t *p_opt = &p_sm->p_subn->opt;
uint16_t ccti_limit;
unsigned i;
/* Switch Congestion Setting */
p_cc->sw_cong_setting.control_map = p_opt->cc_sw_cong_setting_control_map;
memcpy(p_cc->sw_cong_setting.victim_mask,
p_opt->cc_sw_cong_setting_victim_mask,
IB_CC_PORT_MASK_DATA_SIZE);
memcpy(p_cc->sw_cong_setting.credit_mask,
p_opt->cc_sw_cong_setting_credit_mask,
IB_CC_PORT_MASK_DATA_SIZE);
/* threshold is 4 bits, takes up upper nibble of byte */
p_cc->sw_cong_setting.threshold_resv = (p_opt->cc_sw_cong_setting_threshold << 4);
p_cc->sw_cong_setting.packet_size = p_opt->cc_sw_cong_setting_packet_size;
/* cs threshold is 4 bits, takes up upper nibble of short */
p_cc->sw_cong_setting.cs_threshold_resv =
cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_threshold << 12);
p_cc->sw_cong_setting.cs_return_delay =
cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_return_delay.shift << 14
| p_opt->cc_sw_cong_setting_credit_starvation_return_delay.multiplier);
p_cc->sw_cong_setting.marking_rate = p_opt->cc_sw_cong_setting_marking_rate;
/* CA Congestion Setting */
p_cc->ca_cong_setting.port_control = p_opt->cc_ca_cong_setting_port_control;
p_cc->ca_cong_setting.control_map = p_opt->cc_ca_cong_setting_control_map;
for (i = 0; i < IB_CA_CONG_ENTRY_DATA_SIZE; i++) {
ib_ca_cong_entry_t *p_entry;
p_entry = &p_cc->ca_cong_setting.entry_list[i];
p_entry->ccti_timer = p_opt->cc_ca_cong_entries[i].ccti_timer;
p_entry->ccti_increase = p_opt->cc_ca_cong_entries[i].ccti_increase;
p_entry->trigger_threshold = p_opt->cc_ca_cong_entries[i].trigger_threshold;
p_entry->ccti_min = p_opt->cc_ca_cong_entries[i].ccti_min;
p_entry->resv0 = 0;
p_entry->resv1 = 0;
}
/* Congestion Control Table */
/* if no entries, we will always send at least 1 mad to set ccti_limit = 0 */
if (!p_opt->cc_cct.entries_len)
p_cc->cc_tbl_mads = 1;
else {
p_cc->cc_tbl_mads = p_opt->cc_cct.entries_len - 1;
p_cc->cc_tbl_mads /= IB_CC_TBL_ENTRY_LIST_MAX;
p_cc->cc_tbl_mads += 1;
}
CL_ASSERT(p_cc->cc_tbl_mads <= OSM_CCT_ENTRY_MAD_BLOCKS);
if (!p_opt->cc_cct.entries_len)
ccti_limit = 0;
else
ccti_limit = p_opt->cc_cct.entries_len - 1;
for (i = 0; i < p_cc->cc_tbl_mads; i++) {
int j;
p_cc->cc_tbl[i].ccti_limit = cl_hton16(ccti_limit);
p_cc->cc_tbl[i].resv = 0;
memset(p_cc->cc_tbl[i].entry_list,
'\0',
sizeof(p_cc->cc_tbl[i].entry_list));
if (!ccti_limit)
break;
for (j = 0; j < IB_CC_TBL_ENTRY_LIST_MAX; j++) {
int k;
k = (i * IB_CC_TBL_ENTRY_LIST_MAX) + j;
p_cc->cc_tbl[i].entry_list[j].shift_multiplier =
cl_hton16(p_opt->cc_cct.entries[k].shift << 14
| p_opt->cc_cct.entries[k].multiplier);
}
}
}
static ib_api_status_t cc_send_sw_cong_setting(osm_sm_t * p_sm,
osm_node_t *p_node)
{
osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
unsigned force_update;
osm_physp_t *p_physp;
osm_madw_t *p_madw = NULL;
ib_cc_mad_t *p_cc_mad = NULL;
ib_sw_cong_setting_t *p_sw_cong_setting = NULL;
OSM_LOG_ENTER(p_sm->p_log);
p_physp = osm_node_get_physp_ptr(p_node, 0);
force_update = p_physp->need_update || p_sm->p_subn->need_update;
if (!force_update
&& !memcmp(&p_cc->sw_cong_setting,
&p_physp->cc.sw.sw_cong_setting,
sizeof(p_cc->sw_cong_setting)))
return IB_SUCCESS;
p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
MAD_BLOCK_SIZE, NULL);
if (p_madw == NULL) {
OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C101: "
"failed to allocate mad\n");
return IB_INSUFFICIENT_MEMORY;
}
p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
memcpy(p_sw_cong_setting,
&p_cc->sw_cong_setting,
sizeof(p_cc->sw_cong_setting));
cc_mad_post(p_cc, p_madw, p_node, p_physp,
IB_MAD_ATTR_SW_CONG_SETTING, 0);
OSM_LOG_EXIT(p_sm->p_log);
return IB_SUCCESS;
}
static ib_api_status_t cc_send_ca_cong_setting(osm_sm_t * p_sm,
osm_node_t *p_node,
osm_physp_t *p_physp)
{
osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
unsigned force_update;
osm_madw_t *p_madw = NULL;
ib_cc_mad_t *p_cc_mad = NULL;
ib_ca_cong_setting_t *p_ca_cong_setting = NULL;
OSM_LOG_ENTER(p_sm->p_log);
force_update = p_physp->need_update || p_sm->p_subn->need_update;
if (!force_update
&& !memcmp(&p_cc->ca_cong_setting,
&p_physp->cc.ca.ca_cong_setting,
sizeof(p_cc->ca_cong_setting)))
return IB_SUCCESS;
p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
MAD_BLOCK_SIZE, NULL);
if (p_madw == NULL) {
OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C102: "
"failed to allocate mad\n");
return IB_INSUFFICIENT_MEMORY;
}
p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
memcpy(p_ca_cong_setting,
&p_cc->ca_cong_setting,
sizeof(p_cc->ca_cong_setting));
cc_mad_post(p_cc, p_madw, p_node, p_physp,
IB_MAD_ATTR_CA_CONG_SETTING, 0);
OSM_LOG_EXIT(p_sm->p_log);
return IB_SUCCESS;
}
static ib_api_status_t cc_send_cct(osm_sm_t * p_sm,
osm_node_t *p_node,
osm_physp_t *p_physp)
{
osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
unsigned force_update;
osm_madw_t *p_madw = NULL;
ib_cc_mad_t *p_cc_mad = NULL;
ib_cc_tbl_t *p_cc_tbl = NULL;
unsigned int index = 0;
OSM_LOG_ENTER(p_sm->p_log);
force_update = p_physp->need_update || p_sm->p_subn->need_update;
for (index = 0; index < p_cc->cc_tbl_mads; index++) {
if (!force_update
&& !memcmp(&p_cc->cc_tbl[index],
&p_physp->cc.ca.cc_tbl[index],
sizeof(p_cc->cc_tbl[index])))
continue;
p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
MAD_BLOCK_SIZE, NULL);
if (p_madw == NULL) {
OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C103: "
"failed to allocate mad\n");
return IB_INSUFFICIENT_MEMORY;
}
p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
p_cc_tbl = (ib_cc_tbl_t *)ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
memcpy(p_cc_tbl,
&p_cc->cc_tbl[index],
sizeof(p_cc->cc_tbl[index]));
cc_mad_post(p_cc, p_madw, p_node, p_physp,
IB_MAD_ATTR_CC_TBL, cl_hton32(index));
}
OSM_LOG_EXIT(p_sm->p_log);
return IB_SUCCESS;
}
int osm_congestion_control_setup(struct osm_opensm *p_osm)
{
cl_qmap_t *p_tbl;
cl_map_item_t *p_next;
int ret = 0;
if (!p_osm->subn.opt.congestion_control)
return 0;
OSM_LOG_ENTER(&p_osm->log);
/*
* Do nothing unless the most recent routing attempt was successful.
*/
if (!p_osm->routing_engine_used)
return 0;
cc_setup_mad_data(&p_osm->sm);
cl_plock_acquire(&p_osm->lock);
p_tbl = &p_osm->subn.port_guid_tbl;
p_next = cl_qmap_head(p_tbl);
while (p_next != cl_qmap_end(p_tbl)) {
osm_port_t *p_port = (osm_port_t *) p_next;
osm_node_t *p_node = p_port->p_node;
ib_api_status_t status;
p_next = cl_qmap_next(p_next);
if (p_port->cc_unavailable_flag)
continue;
if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
status = cc_send_sw_cong_setting(&p_osm->sm, p_node);
if (status != IB_SUCCESS)
ret = -1;
} else if (osm_node_get_type(p_node) == IB_NODE_TYPE_CA) {
status = cc_send_ca_cong_setting(&p_osm->sm,
p_node,
p_port->p_physp);
if (status != IB_SUCCESS)
ret = -1;
status = cc_send_cct(&p_osm->sm,
p_node,
p_port->p_physp);
if (status != IB_SUCCESS)
ret = -1;
}
}
cl_plock_release(&p_osm->lock);
OSM_LOG_EXIT(&p_osm->log);
return ret;
}
int osm_congestion_control_wait_pending_transactions(struct osm_opensm *p_osm)
{
osm_congestion_control_t *cc = &p_osm->cc;
if (!p_osm->subn.opt.congestion_control)
return 0;
while (1) {
unsigned count = cc->outstanding_mads;
if (!count || osm_exit_flag)
break;
cl_event_wait_on(&cc->outstanding_mads_done_event,
EVENT_NO_TIMEOUT,
TRUE);
}
return osm_exit_flag;
}
static inline void decrement_outstanding_mads(osm_congestion_control_t *p_cc)
{
uint32_t outstanding;
outstanding = cl_atomic_dec(&p_cc->outstanding_mads);
if (!outstanding)
cl_event_signal(&p_cc->outstanding_mads_done_event);
cl_atomic_dec(&p_cc->outstanding_mads_on_wire);
cl_event_signal(&p_cc->sig_mads_on_wire_continue);
}
static void cc_rcv_mad(void *context, void *data)
{
osm_congestion_control_t *p_cc = context;
osm_opensm_t *p_osm = p_cc->osm;
osm_madw_t *p_madw = data;
ib_cc_mad_t *p_cc_mad;
osm_madw_context_t *p_mad_context = &p_madw->context;
ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
ib_net64_t node_guid = p_mad_context->cc_context.node_guid;
ib_net64_t port_guid = p_mad_context->cc_context.port_guid;
uint8_t port = p_mad_context->cc_context.port;
osm_port_t *p_port;
OSM_LOG_ENTER(p_cc->log);
OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
"Processing received MAD status 0x%x for "
"attr ID %u mod 0x%x node 0x%" PRIx64 " port %u\n",
cl_ntoh16(p_mad->status), cl_ntoh16(p_mad->attr_id),
cl_ntoh32(p_mad_context->cc_context.attr_mod),
cl_ntoh64(node_guid), port);
p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
cl_plock_acquire(&p_osm->lock);
p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
if (!p_port) {
OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C109: "
"Port GUID 0x%" PRIx64 " not in table\n",
cl_ntoh64(port_guid));
cl_plock_release(&p_osm->lock);
goto Exit;
}
p_port->cc_timeout_count = 0;
if (p_cc_mad->header.status) {
if (p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_CLASS_VER
|| p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD
|| p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD_ATTR)
p_port->cc_unavailable_flag = TRUE;
cl_plock_release(&p_osm->lock);
goto Exit;
}
else
p_port->cc_unavailable_flag = FALSE;
if (p_cc_mad->header.attr_id == IB_MAD_ATTR_SW_CONG_SETTING) {
ib_sw_cong_setting_t *p_sw_cong_setting;
p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
p_port->p_physp->cc.sw.sw_cong_setting = *p_sw_cong_setting;
}
else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CA_CONG_SETTING) {
ib_ca_cong_setting_t *p_ca_cong_setting;
p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
p_port->p_physp->cc.ca.ca_cong_setting = *p_ca_cong_setting;
}
else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CC_TBL) {
ib_net32_t attr_mod = p_mad_context->cc_context.attr_mod;
uint32_t index = cl_ntoh32(attr_mod);
ib_cc_tbl_t *p_cc_tbl;
p_cc_tbl = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
p_port->p_physp->cc.ca.cc_tbl[index] = *p_cc_tbl;
}
else
OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10A: "
"Unexpected MAD attribute ID %u received\n",
cl_ntoh16(p_cc_mad->header.attr_id));
cl_plock_release(&p_osm->lock);
Exit:
decrement_outstanding_mads(p_cc);
osm_mad_pool_put(p_cc->mad_pool, p_madw);
OSM_LOG_EXIT(p_cc->log);
}
static void cc_poller_send(osm_congestion_control_t *p_cc,
osm_madw_t *p_madw)
{
osm_subn_opt_t *p_opt = &p_cc->subn->opt;
ib_api_status_t status;
cl_status_t sts;
osm_madw_context_t mad_context = p_madw->context;
status = osm_vendor_send(p_cc->bind_handle, p_madw, TRUE);
if (status == IB_SUCCESS) {
cl_atomic_inc(&p_cc->outstanding_mads_on_wire);
while (p_cc->outstanding_mads_on_wire >
(int32_t)p_opt->cc_max_outstanding_mads) {
wait:
sts = cl_event_wait_on(&p_cc->sig_mads_on_wire_continue,
EVENT_NO_TIMEOUT, TRUE);
if (sts != CL_SUCCESS)
goto wait;
}
} else
OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C104: "
"send failed to node 0x%" PRIx64 "port %u\n",
cl_ntoh64(mad_context.cc_context.node_guid),
mad_context.cc_context.port);
}
static void cc_poller(void *p_ptr)
{
osm_congestion_control_t *p_cc = p_ptr;
osm_madw_t *p_madw;
OSM_LOG_ENTER(p_cc->log);
if (p_cc->thread_state == OSM_THREAD_STATE_NONE)
p_cc->thread_state = OSM_THREAD_STATE_RUN;
while (p_cc->thread_state == OSM_THREAD_STATE_RUN) {
cl_spinlock_acquire(&p_cc->mad_queue_lock);
p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
cl_spinlock_release(&p_cc->mad_queue_lock);
if (p_madw != (osm_madw_t *) cl_qlist_end(&p_cc->mad_queue))
cc_poller_send(p_cc, p_madw);
else
cl_event_wait_on(&p_cc->cc_poller_wakeup,
EVENT_NO_TIMEOUT, TRUE);
}
OSM_LOG_EXIT(p_cc->log);
}
ib_api_status_t osm_congestion_control_init(osm_congestion_control_t * p_cc,
struct osm_opensm *p_osm,
const osm_subn_opt_t * p_opt)
{
ib_api_status_t status = IB_SUCCESS;
OSM_LOG_ENTER(&p_osm->log);
memset(p_cc, 0, sizeof(*p_cc));
p_cc->osm = p_osm;
p_cc->subn = &p_osm->subn;
p_cc->sm = &p_osm->sm;
p_cc->log = &p_osm->log;
p_cc->mad_pool = &p_osm->mad_pool;
p_cc->trans_id = CONGESTION_CONTROL_INITIAL_TID_VALUE;
p_cc->vendor = p_osm->p_vendor;
p_cc->cc_disp_h = cl_disp_register(&p_osm->disp, OSM_MSG_MAD_CC,
cc_rcv_mad, p_cc);
if (p_cc->cc_disp_h == CL_DISP_INVALID_HANDLE)
goto Exit;
cl_qlist_init(&p_cc->mad_queue);
status = cl_spinlock_init(&p_cc->mad_queue_lock);
if (status != IB_SUCCESS)
goto Exit;
cl_event_construct(&p_cc->cc_poller_wakeup);
status = cl_event_init(&p_cc->cc_poller_wakeup, FALSE);
if (status != IB_SUCCESS)
goto Exit;
cl_event_construct(&p_cc->outstanding_mads_done_event);
status = cl_event_init(&p_cc->outstanding_mads_done_event, FALSE);
if (status != IB_SUCCESS)
goto Exit;
cl_event_construct(&p_cc->sig_mads_on_wire_continue);
status = cl_event_init(&p_cc->sig_mads_on_wire_continue, FALSE);
if (status != IB_SUCCESS)
goto Exit;
p_cc->thread_state = OSM_THREAD_STATE_NONE;
status = cl_thread_init(&p_cc->cc_poller, cc_poller, p_cc,
"cc poller");
if (status != IB_SUCCESS)
goto Exit;
status = IB_SUCCESS;
Exit:
OSM_LOG_EXIT(p_cc->log);
return status;
}
static void cc_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
osm_madw_t * p_req_madw)
{
osm_congestion_control_t *p_cc = bind_context;
OSM_LOG_ENTER(p_cc->log);
CL_ASSERT(p_madw);
/* HACK - should be extended when supporting CC traps */
CL_ASSERT(p_req_madw != NULL);
osm_madw_copy_context(p_madw, p_req_madw);
osm_mad_pool_put(p_cc->mad_pool, p_req_madw);
/* Do not decrement outstanding mads here, do it in the dispatcher */
if (cl_disp_post(p_cc->cc_disp_h, OSM_MSG_MAD_CC,
p_madw, NULL, NULL) != CL_SUCCESS) {
OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C105: "
"Congestion Control Dispatcher post failed\n");
osm_mad_pool_put(p_cc->mad_pool, p_madw);
}
OSM_LOG_EXIT(p_cc->log);
}
static void cc_mad_send_err_callback(void *bind_context,
osm_madw_t * p_madw)
{
osm_congestion_control_t *p_cc = bind_context;
osm_madw_context_t *p_madw_context = &p_madw->context;
osm_opensm_t *p_osm = p_cc->osm;
uint64_t node_guid = p_madw_context->cc_context.node_guid;
uint64_t port_guid = p_madw_context->cc_context.port_guid;
uint8_t port = p_madw_context->cc_context.port;
osm_port_t *p_port;
int log_flag = 1;
OSM_LOG_ENTER(p_cc->log);
cl_plock_acquire(&p_osm->lock);
p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
if (!p_port) {
OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10B: "
"Port GUID 0x%" PRIx64 " not in table\n",
cl_ntoh64(port_guid));
cl_plock_release(&p_osm->lock);
goto Exit;
}
/* If timed out before, don't bothering logging again
* we assume no CC support
*/
if (p_madw->status == IB_TIMEOUT
&& p_port->cc_timeout_count)
log_flag = 0;
if (log_flag)
OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): "
"attr id = %u LID %u GUID 0x%016" PRIx64 " port %u "
"TID 0x%" PRIx64 "\n",
ib_get_err_str(p_madw->status),
p_madw->p_mad->attr_id,
cl_ntoh16(p_madw->mad_addr.dest_lid),
cl_ntoh64(node_guid),
port,
cl_ntoh64(p_madw->p_mad->trans_id));
if (p_madw->status == IB_TIMEOUT) {
p_port->cc_timeout_count++;
if (p_port->cc_timeout_count > OSM_CC_TIMEOUT_COUNT_THRESHOLD
&& !p_port->cc_unavailable_flag) {
p_port->cc_unavailable_flag = TRUE;
p_port->cc_timeout_count = 0;
}
} else
p_cc->subn->subnet_initialization_error = TRUE;
cl_plock_release(&p_osm->lock);
Exit:
osm_mad_pool_put(p_cc->mad_pool, p_madw);
decrement_outstanding_mads(p_cc);
OSM_LOG_EXIT(p_cc->log);
}
ib_api_status_t osm_congestion_control_bind(osm_congestion_control_t * p_cc,
ib_net64_t port_guid)
{
osm_bind_info_t bind_info;
ib_api_status_t status = IB_SUCCESS;
OSM_LOG_ENTER(p_cc->log);
bind_info.port_guid = p_cc->port_guid = port_guid;
bind_info.mad_class = IB_MCLASS_CC;
bind_info.class_version = 2;
bind_info.is_responder = FALSE;
bind_info.is_report_processor = FALSE;
bind_info.is_trap_processor = FALSE;
bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE;
bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE;
bind_info.timeout = p_cc->subn->opt.transaction_timeout;
bind_info.retries = p_cc->subn->opt.transaction_retries;
OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
"Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
p_cc->bind_handle = osm_vendor_bind(p_cc->vendor, &bind_info,
p_cc->mad_pool,
cc_mad_recv_callback,
cc_mad_send_err_callback, p_cc);
if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
status = IB_ERROR;
OSM_LOG(p_cc->log, OSM_LOG_ERROR,
"ERR C107: Vendor specific bind failed (%s)\n",
ib_get_err_str(status));
goto Exit;
}
Exit:
OSM_LOG_EXIT(p_cc->log);
return status;
}
void osm_congestion_control_shutdown(osm_congestion_control_t * p_cc)
{
OSM_LOG_ENTER(p_cc->log);
if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
OSM_LOG(p_cc->log, OSM_LOG_ERROR,
"ERR C108: No previous bind\n");
goto Exit;
}
cl_disp_unregister(p_cc->cc_disp_h);
Exit:
OSM_LOG_EXIT(p_cc->log);
}
void osm_congestion_control_destroy(osm_congestion_control_t * p_cc)
{
osm_madw_t *p_madw;
OSM_LOG_ENTER(p_cc->log);
p_cc->thread_state = OSM_THREAD_STATE_EXIT;
cl_event_signal(&p_cc->sig_mads_on_wire_continue);
cl_event_signal(&p_cc->cc_poller_wakeup);
cl_thread_destroy(&p_cc->cc_poller);
cl_spinlock_acquire(&p_cc->mad_queue_lock);
while (!cl_is_qlist_empty(&p_cc->mad_queue)) {
p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
osm_mad_pool_put(p_cc->mad_pool, p_madw);
}
cl_spinlock_release(&p_cc->mad_queue_lock);
cl_spinlock_destroy(&p_cc->mad_queue_lock);
cl_event_destroy(&p_cc->cc_poller_wakeup);
cl_event_destroy(&p_cc->outstanding_mads_done_event);
cl_event_destroy(&p_cc->sig_mads_on_wire_continue);
OSM_LOG_EXIT(p_cc->log);
}