/* * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ /* * Abstract: * Implementation of osm_link_mgr_t. * This file implements the Link Manager object. */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #define FILE_ID OSM_FILE_LINK_MGR_C #include #include #include #include #include #include #include static uint8_t link_mgr_get_smsl(IN osm_sm_t * sm, IN osm_physp_t * p_physp) { osm_opensm_t *p_osm = sm->p_subn->p_osm; struct osm_routing_engine *re = p_osm->routing_engine_used; ib_net16_t slid; ib_net16_t smlid; uint8_t sl; OSM_LOG_ENTER(sm->p_log); if (!(re && re->path_sl && (slid = osm_physp_get_base_lid(p_physp)))) { /* * Use default SL if routing engine does not provide a * path SL lookup callback. */ OSM_LOG_EXIT(sm->p_log); return sm->p_subn->opt.sm_sl; } smlid = sm->p_subn->sm_base_lid; /* Call into routing engine to find proper SL */ sl = re->path_sl(re->context, sm->p_subn->opt.sm_sl, slid, smlid); OSM_LOG_EXIT(sm->p_log); return sl; } static int link_mgr_set_physp_pi(osm_sm_t * sm, IN osm_physp_t * p_physp, IN uint8_t port_state) { uint8_t payload[IB_SMP_DATA_SIZE], payload2[IB_SMP_DATA_SIZE]; ib_port_info_t *p_pi = (ib_port_info_t *) payload; ib_mlnx_ext_port_info_t *p_epi = (ib_mlnx_ext_port_info_t *) payload2; const ib_port_info_t *p_old_pi; const ib_mlnx_ext_port_info_t *p_old_epi; osm_madw_context_t context; osm_node_t *p_node; ib_api_status_t status; uint8_t port_num, mtu, op_vls, smsl = OSM_DEFAULT_SL; boolean_t esp0 = FALSE, send_set = FALSE, send_set2 = FALSE; osm_physp_t *p_remote_physp, *physp0 = NULL; int issue_ext = 0, fdr10_change = 0; int ret = 0; ib_net32_t attr_mod, cap_mask; boolean_t update_mkey = FALSE; ib_net64_t m_key = 0; ib_net16_t capability_mask2; osm_port_t *p_port; OSM_LOG_ENTER(sm->p_log); p_node = osm_physp_get_node_ptr(p_physp); CL_ASSERT(p_node); p_old_pi = &p_physp->port_info; port_num = osm_physp_get_port_num(p_physp); memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || port_num == 0) { /* Need to make sure LID and SMLID fields in PortInfo are not 0 */ if (!p_pi->base_lid) { p_port = osm_get_port_by_guid(sm->p_subn, osm_physp_get_port_guid(p_physp)); CL_ASSERT(p_port); p_pi->base_lid = p_port->lid; sm->lid_mgr.dirty = TRUE; send_set = TRUE; } /* we are initializing the ports with our local sm_base_lid */ p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid; if (p_pi->master_sm_base_lid != p_old_pi->master_sm_base_lid) send_set = TRUE; } if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) physp0 = osm_node_get_physp_ptr(p_node, 0); if (port_num == 0) { /* CAs don't have a port 0, and for switch port 0, we need to check if this is enhanced or base port 0. For base port 0 the following parameters are not valid (IBA 1.2.1 p.830 table 146). */ if (!p_node->sw) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: " "Cannot find switch by guid: 0x%" PRIx64 "\n", cl_ntoh64(p_node->node_info.node_guid)); goto Exit; } if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) == FALSE) { /* Even for base port 0 we might have to set smsl (if we are using lash routing) */ smsl = link_mgr_get_smsl(sm, p_physp); if (smsl != ib_port_info_get_master_smsl(p_old_pi)) { send_set = TRUE; OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Setting SMSL to %d on port 0 GUID 0x%016" PRIx64 "\n", smsl, cl_ntoh64(osm_physp_get_port_guid (p_physp))); /* Enter if base lid and master_sm_lid didn't change */ } else if (send_set == FALSE) { /* This means the switch doesn't support enhanced port 0 and we don't need to change SMSL. Can skip it. */ OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Skipping port 0, GUID 0x%016" PRIx64 "\n", cl_ntoh64(osm_physp_get_port_guid (p_physp))); goto Exit; } } else esp0 = TRUE; } /* Should never write back a value that is bigger then 3 in the PortPhysicalState field - so can not simply copy! Actually we want to write there: port physical state - no change, link down default state = polling port state - as requested. */ p_pi->state_info2 = 0x02; ib_port_info_set_port_state(p_pi, port_state); /* Determine ports' M_Key and CapabilityMask2 */ if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && osm_physp_get_port_num(p_physp) != 0) { m_key = ib_port_info_get_m_key(&physp0->port_info); capability_mask2 = physp0->port_info.capability_mask2; } else { m_key = ib_port_info_get_m_key(p_pi); capability_mask2 = p_pi->capability_mask2; } /* Check whether this is base port0 smsl handling only */ if (port_num == 0 && esp0 == FALSE) { ib_port_info_set_master_smsl(p_pi, smsl); goto Send; } /* PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0 */ if (ib_port_info_get_link_down_def_state(p_pi) != ib_port_info_get_link_down_def_state(p_old_pi)) send_set = TRUE; /* didn't get PortInfo before */ if (!ib_port_info_get_port_state(p_old_pi)) send_set = TRUE; /* we only change port fields if we do not change state */ if (port_state == IB_LINK_NO_CHANGE) { /* The following fields are relevant only for CA port, router, or Enh. SP0 */ if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || port_num == 0) { p_pi->m_key = sm->p_subn->opt.m_key; if (memcmp(&p_pi->m_key, &p_old_pi->m_key, sizeof(p_pi->m_key))) { update_mkey = TRUE; send_set = TRUE; } p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix; if (memcmp(&p_pi->subnet_prefix, &p_old_pi->subnet_prefix, sizeof(p_pi->subnet_prefix))) send_set = TRUE; smsl = link_mgr_get_smsl(sm, p_physp); if (smsl != ib_port_info_get_master_smsl(p_old_pi)) { ib_port_info_set_master_smsl(p_pi, smsl); OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Setting SMSL to %d on GUID 0x%016" PRIx64 ", port %d\n", smsl, cl_ntoh64(osm_physp_get_port_guid (p_physp)), port_num); send_set = TRUE; } p_pi->m_key_lease_period = sm->p_subn->opt.m_key_lease_period; if (memcmp(&p_pi->m_key_lease_period, &p_old_pi->m_key_lease_period, sizeof(p_pi->m_key_lease_period))) send_set = TRUE; p_pi->mkey_lmc = 0; ib_port_info_set_mpb(p_pi, sm->p_subn->opt.m_key_protect_bits); if (esp0 == FALSE || sm->p_subn->opt.lmc_esp0) ib_port_info_set_lmc(p_pi, sm->p_subn->opt.lmc); if (ib_port_info_get_lmc(p_old_pi) != ib_port_info_get_lmc(p_pi) || ib_port_info_get_mpb(p_old_pi) != ib_port_info_get_mpb(p_pi)) send_set = TRUE; ib_port_info_set_timeout(p_pi, sm->p_subn->opt. subnet_timeout); if (ib_port_info_get_timeout(p_pi) != ib_port_info_get_timeout(p_old_pi)) send_set = TRUE; } /* Several timeout mechanisms: */ p_remote_physp = osm_physp_get_remote(p_physp); if (port_num != 0 && p_remote_physp) { if (osm_node_get_type(osm_physp_get_node_ptr(p_physp)) == IB_NODE_TYPE_ROUTER) { ib_port_info_set_hoq_lifetime(p_pi, sm->p_subn-> opt. leaf_head_of_queue_lifetime); } else if (osm_node_get_type (osm_physp_get_node_ptr(p_physp)) == IB_NODE_TYPE_SWITCH) { /* Is remote end CA or router (a leaf port) ? */ if (osm_node_get_type (osm_physp_get_node_ptr(p_remote_physp)) != IB_NODE_TYPE_SWITCH) { ib_port_info_set_hoq_lifetime(p_pi, sm-> p_subn-> opt. leaf_head_of_queue_lifetime); ib_port_info_set_vl_stall_count(p_pi, sm-> p_subn-> opt. leaf_vl_stall_count); } else { ib_port_info_set_hoq_lifetime(p_pi, sm-> p_subn-> opt. head_of_queue_lifetime); ib_port_info_set_vl_stall_count(p_pi, sm-> p_subn-> opt. vl_stall_count); } } if (ib_port_info_get_hoq_lifetime(p_pi) != ib_port_info_get_hoq_lifetime(p_old_pi) || ib_port_info_get_vl_stall_count(p_pi) != ib_port_info_get_vl_stall_count(p_old_pi)) send_set = TRUE; } ib_port_info_set_phy_and_overrun_err_thd(p_pi, sm->p_subn->opt. local_phy_errors_threshold, sm->p_subn->opt. overrun_errors_threshold); if (p_pi->error_threshold != p_old_pi->error_threshold) send_set = TRUE; /* Set the easy common parameters for all port types, then determine the neighbor MTU. */ if (sm->p_subn->opt.force_link_width && (sm->p_subn->opt.force_link_width < IB_LINK_WIDTH_ACTIVE_2X || (capability_mask2 & IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED)) && (sm->p_subn->opt.force_link_width != IB_LINK_WIDTH_SET_LWS || p_pi->link_width_enabled != p_pi->link_width_supported)) { p_pi->link_width_enabled = sm->p_subn->opt.force_link_width; if (p_pi->link_width_enabled != p_old_pi->link_width_enabled) send_set = TRUE; } if (sm->p_subn->opt.force_link_speed && (sm->p_subn->opt.force_link_speed != IB_LINK_SPEED_SET_LSS || ib_port_info_get_link_speed_enabled(p_pi) != ib_port_info_get_link_speed_sup(p_pi))) { ib_port_info_set_link_speed_enabled(p_pi, sm->p_subn->opt. force_link_speed); if (p_pi->link_speed != p_old_pi->link_speed) send_set = TRUE; } if (sm->p_subn->opt.fdr10 && p_physp->ext_port_info.link_speed_supported & FDR10) { if (sm->p_subn->opt.fdr10 == 1) { /* enable */ if (!(p_physp->ext_port_info.link_speed_enabled & FDR10)) fdr10_change = 1; } else { /* disable */ if (p_physp->ext_port_info.link_speed_enabled & FDR10) fdr10_change = 1; } if (fdr10_change) { p_old_epi = &p_physp->ext_port_info; memcpy(payload2, p_old_epi, sizeof(ib_mlnx_ext_port_info_t)); p_epi->state_change_enable = 0x01; if (sm->p_subn->opt.fdr10 == 1) p_epi->link_speed_enabled = FDR10; else p_epi->link_speed_enabled = 0; send_set2 = TRUE; } } if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH && osm_physp_get_port_num(p_physp) != 0) { cap_mask = physp0->port_info.capability_mask; } else cap_mask = p_pi->capability_mask; if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) issue_ext = 1; /* Do peer ports support extended link speeds ? */ if (port_num != 0 && p_remote_physp) { osm_physp_t *rphysp0; ib_net32_t rem_cap_mask; if (osm_node_get_type(p_remote_physp->p_node) == IB_NODE_TYPE_SWITCH) { rphysp0 = osm_node_get_physp_ptr(p_remote_physp->p_node, 0); rem_cap_mask = rphysp0->port_info.capability_mask; } else rem_cap_mask = p_remote_physp->port_info.capability_mask; if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS && rem_cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) { if (sm->p_subn->opt.force_link_speed_ext && (sm->p_subn->opt.force_link_speed_ext != IB_LINK_SPEED_EXT_SET_LSES || p_pi->link_speed_ext_enabled != ib_port_info_get_link_speed_ext_sup(p_pi))) { p_pi->link_speed_ext_enabled = sm->p_subn->opt.force_link_speed_ext; if (p_pi->link_speed_ext_enabled != p_old_pi->link_speed_ext_enabled) send_set = TRUE; } } } /* calc new op_vls and mtu */ op_vls = osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp, ib_port_info_get_op_vls(p_old_pi)); mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp, ib_port_info_get_neighbor_mtu(p_old_pi)); ib_port_info_set_neighbor_mtu(p_pi, mtu); if (ib_port_info_get_neighbor_mtu(p_pi) != ib_port_info_get_neighbor_mtu(p_old_pi)) send_set = TRUE; ib_port_info_set_op_vls(p_pi, op_vls); if (ib_port_info_get_op_vls(p_pi) != ib_port_info_get_op_vls(p_old_pi)) send_set = TRUE; /* provide the vl_high_limit from the qos mgr */ if (sm->p_subn->opt.qos && p_physp->vl_high_limit != p_old_pi->vl_high_limit) { send_set = TRUE; p_pi->vl_high_limit = p_physp->vl_high_limit; } } Send: context.pi_context.active_transition = FALSE; if (port_state != IB_LINK_NO_CHANGE && port_state != ib_port_info_get_port_state(p_old_pi)) { send_set = TRUE; if (port_state == IB_LINK_ACTIVE) context.pi_context.active_transition = TRUE; } context.pi_context.node_guid = osm_node_get_node_guid(p_node); context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); context.pi_context.set_method = TRUE; context.pi_context.light_sweep = FALSE; context.pi_context.client_rereg = FALSE; /* We need to send the PortInfoSet request with the new sm_lid in the following cases: 1. There is a change in the values (send_set == TRUE) 2. This is a switch external port (so it wasn't handled yet by osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE, which means the SM just became master, and it then needs to send at PortInfoSet to every port. */ if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num && sm->p_subn->first_time_master_sweep == TRUE) send_set = TRUE; if (!send_set) goto SEND_EPI; attr_mod = cl_hton32(port_num); if (issue_ext) attr_mod |= cl_hton32(1 << 31); /* AM SMSupportExtendedSpeeds */ status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, attr_mod, FALSE, m_key, 0, CL_DISP_MSGID_NONE, &context); if (status) ret = -1; /* If we sent a new mkey above, update our guid2mkey map now, on the assumption that the SubnSet succeeds */ if (update_mkey) osm_db_guid2mkey_set(sm->p_subn->p_g2m, cl_ntoh64(p_physp->port_guid), cl_ntoh64(p_pi->m_key)); SEND_EPI: if (send_set2) { status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), payload2, sizeof(payload2), IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO, cl_hton32(port_num), FALSE, m_key, 0, CL_DISP_MSGID_NONE, &context); if (status) ret = -1; } Exit: OSM_LOG_EXIT(sm->p_log); return ret; } static int link_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node, IN const uint8_t link_state) { osm_physp_t *p_physp, *p_physp_remote; uint32_t i, num_physp; int ret = 0; uint8_t current_state; OSM_LOG_ENTER(sm->p_log); OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Node 0x%" PRIx64 " going to %s\n", cl_ntoh64(osm_node_get_node_guid(p_node)), ib_get_port_state_str(link_state)); /* Set the PortInfo for every Physical Port associated with this Port. Start iterating with port 1, since the linkstate is not applicable to the management port on switches. */ num_physp = osm_node_get_num_physp(p_node); for (i = 0; i < num_physp; i++) { /* Don't bother doing anything if this Physical Port is not valid. or if the state of the port is already better then the specified state. */ p_physp = osm_node_get_physp_ptr(p_node, (uint8_t) i); if (!p_physp) continue; current_state = osm_physp_get_port_state(p_physp); if (current_state == IB_LINK_DOWN) continue; /* Set PortState to DOWN in case Remote Physical Port is unreachable. We have to check this for all ports, except port zero. */ p_physp_remote = osm_physp_get_remote(p_physp); if ((i != 0) && (!p_physp_remote || !osm_physp_is_valid(p_physp_remote))) { if (current_state != IB_LINK_INIT) link_mgr_set_physp_pi(sm, p_physp, IB_LINK_DOWN); continue; } /* Normally we only send state update if state is lower then required state. However, we need to send update if no state change required. */ if (link_state != IB_LINK_NO_CHANGE && link_state <= current_state) OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Physical port %u already %s. Skipping\n", p_physp->port_num, ib_get_port_state_str(current_state)); else if (link_mgr_set_physp_pi(sm, p_physp, link_state)) ret = -1; } OSM_LOG_EXIT(sm->p_log); return ret; } int osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state) { cl_qmap_t *p_node_guid_tbl; osm_node_t *p_node; int ret = 0; OSM_LOG_ENTER(sm->p_log); p_node_guid_tbl = &sm->p_subn->node_guid_tbl; CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl); p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item)) if (link_mgr_process_node(sm, p_node, link_state)) ret = -1; CL_PLOCK_RELEASE(sm->p_lock); OSM_LOG_EXIT(sm->p_log); return ret; }