Blob Blame History Raw
/*
 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
 * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

/*
 * Abstract:
 *    Implementation of osm_link_mgr_t.
 * This file implements the Link Manager object.
 */

#if HAVE_CONFIG_H
#  include <config.h>
#endif				/* HAVE_CONFIG_H */

#include <string.h>
#include <iba/ib_types.h>
#include <complib/cl_debug.h>
#include <opensm/osm_file_ids.h>
#define FILE_ID OSM_FILE_LINK_MGR_C
#include <opensm/osm_sm.h>
#include <opensm/osm_node.h>
#include <opensm/osm_switch.h>
#include <opensm/osm_helper.h>
#include <opensm/osm_msgdef.h>
#include <opensm/osm_opensm.h>
#include <opensm/osm_db_pack.h>

static uint8_t link_mgr_get_smsl(IN osm_sm_t * sm, IN osm_physp_t * p_physp)
{
	osm_opensm_t *p_osm = sm->p_subn->p_osm;
	struct osm_routing_engine *re = p_osm->routing_engine_used;
	ib_net16_t slid;
	ib_net16_t smlid;
	uint8_t sl;

	OSM_LOG_ENTER(sm->p_log);

	if (!(re && re->path_sl &&
	      (slid = osm_physp_get_base_lid(p_physp)))) {
		/*
		 * Use default SL if routing engine does not provide a
		 * path SL lookup callback.
		 */
		OSM_LOG_EXIT(sm->p_log);
		return sm->p_subn->opt.sm_sl;
	}

	smlid = sm->p_subn->sm_base_lid;

	/* Call into routing engine to find proper SL */
	sl = re->path_sl(re->context, sm->p_subn->opt.sm_sl,
			 slid, smlid);

	OSM_LOG_EXIT(sm->p_log);
	return sl;
}

static int link_mgr_set_physp_pi(osm_sm_t * sm, IN osm_physp_t * p_physp,
				 IN uint8_t port_state)
{
	uint8_t payload[IB_SMP_DATA_SIZE], payload2[IB_SMP_DATA_SIZE];
	ib_port_info_t *p_pi = (ib_port_info_t *) payload;
	ib_mlnx_ext_port_info_t *p_epi = (ib_mlnx_ext_port_info_t *) payload2;
	const ib_port_info_t *p_old_pi;
	const ib_mlnx_ext_port_info_t *p_old_epi;
	osm_madw_context_t context;
	osm_node_t *p_node;
	ib_api_status_t status;
	uint8_t port_num, mtu, op_vls, smsl = OSM_DEFAULT_SL;
	boolean_t esp0 = FALSE, send_set = FALSE, send_set2 = FALSE;
	osm_physp_t *p_remote_physp, *physp0 = NULL;
	int issue_ext = 0, fdr10_change = 0;
	int ret = 0;
	ib_net32_t attr_mod, cap_mask;
	boolean_t update_mkey = FALSE;
	ib_net64_t m_key = 0;
	ib_net16_t capability_mask2;
	osm_port_t *p_port;

	OSM_LOG_ENTER(sm->p_log);

	p_node = osm_physp_get_node_ptr(p_physp);
	CL_ASSERT(p_node);

	p_old_pi = &p_physp->port_info;

	port_num = osm_physp_get_port_num(p_physp);

	memcpy(payload, p_old_pi, sizeof(ib_port_info_t));

	if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
	    port_num == 0) {
		/* Need to make sure LID and SMLID fields in PortInfo are not 0 */
		if (!p_pi->base_lid) {
			p_port = osm_get_port_by_guid(sm->p_subn,
						      osm_physp_get_port_guid(p_physp));
			CL_ASSERT(p_port);
			p_pi->base_lid = p_port->lid;
			sm->lid_mgr.dirty = TRUE;
			send_set = TRUE;
		}

		/* we are initializing the ports with our local sm_base_lid */
		p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid;
		if (p_pi->master_sm_base_lid != p_old_pi->master_sm_base_lid)
			send_set = TRUE;
	}

	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH)
		physp0 = osm_node_get_physp_ptr(p_node, 0);

	if (port_num == 0) {
		/*
		   CAs don't have a port 0, and for switch port 0,
		   we need to check if this is enhanced or base port 0.
		   For base port 0 the following parameters are not valid
		   (IBA 1.2.1 p.830 table 146).
		 */
		if (!p_node->sw) {
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: "
				"Cannot find switch by guid: 0x%" PRIx64 "\n",
				cl_ntoh64(p_node->node_info.node_guid));
			goto Exit;
		}

		if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)
		    == FALSE) {

			/* Even for base port 0 we might have to set smsl
			   (if we are using lash routing) */
			smsl = link_mgr_get_smsl(sm, p_physp);
			if (smsl != ib_port_info_get_master_smsl(p_old_pi)) {
				send_set = TRUE;
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Setting SMSL to %d on port 0 GUID 0x%016"
					PRIx64 "\n", smsl,
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)));
			/* Enter if base lid and master_sm_lid didn't change */
			} else if (send_set == FALSE) {
				/* This means the switch doesn't support
				   enhanced port 0 and we don't need to
				   change SMSL. Can skip it. */
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Skipping port 0, GUID 0x%016" PRIx64
					"\n",
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)));
				goto Exit;
			}
		} else
			esp0 = TRUE;
	}

	/*
	   Should never write back a value that is bigger then 3 in
	   the PortPhysicalState field - so can not simply copy!

	   Actually we want to write there:
	   port physical state - no change,
	   link down default state = polling
	   port state - as requested.
	 */
	p_pi->state_info2 = 0x02;
	ib_port_info_set_port_state(p_pi, port_state);

	/* Determine ports' M_Key and CapabilityMask2 */
	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH &&
	    osm_physp_get_port_num(p_physp) != 0) {
		m_key = ib_port_info_get_m_key(&physp0->port_info);
		capability_mask2 = physp0->port_info.capability_mask2;
	} else {
		m_key = ib_port_info_get_m_key(p_pi);
		capability_mask2 = p_pi->capability_mask2;
	}

	/* Check whether this is base port0 smsl handling only */
	if (port_num == 0 && esp0 == FALSE) {
		ib_port_info_set_master_smsl(p_pi, smsl);
		goto Send;
	}

	/*
	   PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0
	 */

	if (ib_port_info_get_link_down_def_state(p_pi) !=
	    ib_port_info_get_link_down_def_state(p_old_pi))
		send_set = TRUE;

	/* didn't get PortInfo before */
	if (!ib_port_info_get_port_state(p_old_pi))
		send_set = TRUE;

	/* we only change port fields if we do not change state */
	if (port_state == IB_LINK_NO_CHANGE) {
		/* The following fields are relevant only for CA port, router, or Enh. SP0 */
		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
		    port_num == 0) {
			p_pi->m_key = sm->p_subn->opt.m_key;
			if (memcmp(&p_pi->m_key, &p_old_pi->m_key,
				   sizeof(p_pi->m_key))) {
				update_mkey = TRUE;
				send_set = TRUE;
			}

			p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix;
			if (memcmp(&p_pi->subnet_prefix,
				   &p_old_pi->subnet_prefix,
				   sizeof(p_pi->subnet_prefix)))
				send_set = TRUE;

			smsl = link_mgr_get_smsl(sm, p_physp);
			if (smsl != ib_port_info_get_master_smsl(p_old_pi)) {

				ib_port_info_set_master_smsl(p_pi, smsl);

				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
					"Setting SMSL to %d on GUID 0x%016"
					PRIx64 ", port %d\n", smsl,
					cl_ntoh64(osm_physp_get_port_guid
						  (p_physp)), port_num);

				send_set = TRUE;
			}

			p_pi->m_key_lease_period =
			    sm->p_subn->opt.m_key_lease_period;
			if (memcmp(&p_pi->m_key_lease_period,
				   &p_old_pi->m_key_lease_period,
				   sizeof(p_pi->m_key_lease_period)))
				send_set = TRUE;

			p_pi->mkey_lmc = 0;
			ib_port_info_set_mpb(p_pi, sm->p_subn->opt.m_key_protect_bits);
			if (esp0 == FALSE || sm->p_subn->opt.lmc_esp0)
				ib_port_info_set_lmc(p_pi, sm->p_subn->opt.lmc);
			if (ib_port_info_get_lmc(p_old_pi) !=
			    ib_port_info_get_lmc(p_pi) ||
			    ib_port_info_get_mpb(p_old_pi) !=
			    ib_port_info_get_mpb(p_pi))
				send_set = TRUE;

			ib_port_info_set_timeout(p_pi,
						 sm->p_subn->opt.
						 subnet_timeout);
			if (ib_port_info_get_timeout(p_pi) !=
			    ib_port_info_get_timeout(p_old_pi))
				send_set = TRUE;
		}

		/*
		   Several timeout mechanisms:
		 */
		p_remote_physp = osm_physp_get_remote(p_physp);
		if (port_num != 0 && p_remote_physp) {
			if (osm_node_get_type(osm_physp_get_node_ptr(p_physp))
			    == IB_NODE_TYPE_ROUTER) {
				ib_port_info_set_hoq_lifetime(p_pi,
							      sm->p_subn->
							      opt.
							      leaf_head_of_queue_lifetime);
			} else
			    if (osm_node_get_type
				(osm_physp_get_node_ptr(p_physp)) ==
				IB_NODE_TYPE_SWITCH) {
				/* Is remote end CA or router (a leaf port) ? */
				if (osm_node_get_type
				    (osm_physp_get_node_ptr(p_remote_physp)) !=
				    IB_NODE_TYPE_SWITCH) {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      leaf_head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									leaf_vl_stall_count);
				} else {
					ib_port_info_set_hoq_lifetime(p_pi,
								      sm->
								      p_subn->
								      opt.
								      head_of_queue_lifetime);
					ib_port_info_set_vl_stall_count(p_pi,
									sm->
									p_subn->
									opt.
									vl_stall_count);
				}
			}
			if (ib_port_info_get_hoq_lifetime(p_pi) !=
			    ib_port_info_get_hoq_lifetime(p_old_pi) ||
			    ib_port_info_get_vl_stall_count(p_pi) !=
			    ib_port_info_get_vl_stall_count(p_old_pi))
				send_set = TRUE;
		}

		ib_port_info_set_phy_and_overrun_err_thd(p_pi,
							 sm->p_subn->opt.
							 local_phy_errors_threshold,
							 sm->p_subn->opt.
							 overrun_errors_threshold);
		if (p_pi->error_threshold != p_old_pi->error_threshold)
			send_set = TRUE;

		/*
		   Set the easy common parameters for all port types,
		   then determine the neighbor MTU.
		 */
		if (sm->p_subn->opt.force_link_width &&
		    (sm->p_subn->opt.force_link_width < IB_LINK_WIDTH_ACTIVE_2X ||
		     (capability_mask2 &
		      IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED)) &&
		    (sm->p_subn->opt.force_link_width != IB_LINK_WIDTH_SET_LWS ||
		     p_pi->link_width_enabled != p_pi->link_width_supported)) {
			p_pi->link_width_enabled = sm->p_subn->opt.force_link_width;
			if (p_pi->link_width_enabled != p_old_pi->link_width_enabled)
				send_set = TRUE;
		}

		if (sm->p_subn->opt.force_link_speed &&
		    (sm->p_subn->opt.force_link_speed != IB_LINK_SPEED_SET_LSS ||
		     ib_port_info_get_link_speed_enabled(p_pi) !=
		     ib_port_info_get_link_speed_sup(p_pi))) {
			ib_port_info_set_link_speed_enabled(p_pi,
							    sm->p_subn->opt.
							    force_link_speed);
			if (p_pi->link_speed != p_old_pi->link_speed)
				send_set = TRUE;
		}

		if (sm->p_subn->opt.fdr10 &&
		    p_physp->ext_port_info.link_speed_supported & FDR10) {
			if (sm->p_subn->opt.fdr10 == 1) { /* enable */
				if (!(p_physp->ext_port_info.link_speed_enabled & FDR10))
					fdr10_change = 1;
			} else {	/* disable */
				if (p_physp->ext_port_info.link_speed_enabled & FDR10)
					fdr10_change = 1;
			}
			if (fdr10_change) {
				p_old_epi = &p_physp->ext_port_info;
				memcpy(payload2, p_old_epi,
				       sizeof(ib_mlnx_ext_port_info_t));
				p_epi->state_change_enable = 0x01;
				if (sm->p_subn->opt.fdr10 == 1)
					p_epi->link_speed_enabled = FDR10;
				else
					p_epi->link_speed_enabled = 0;
				send_set2 = TRUE;
			}
		}

		if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH &&
		    osm_physp_get_port_num(p_physp) != 0) {
			cap_mask = physp0->port_info.capability_mask;
		} else
			cap_mask = p_pi->capability_mask;

		if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS)
			issue_ext = 1;

		/* Do peer ports support extended link speeds ? */
		if (port_num != 0 && p_remote_physp) {
			osm_physp_t *rphysp0;
			ib_net32_t rem_cap_mask;

			if (osm_node_get_type(p_remote_physp->p_node) ==
			    IB_NODE_TYPE_SWITCH) {
				rphysp0 = osm_node_get_physp_ptr(p_remote_physp->p_node, 0);
				rem_cap_mask = rphysp0->port_info.capability_mask;
			} else
				rem_cap_mask = p_remote_physp->port_info.capability_mask;

			if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS &&
			    rem_cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) {
				if (sm->p_subn->opt.force_link_speed_ext &&
				    (sm->p_subn->opt.force_link_speed_ext != IB_LINK_SPEED_EXT_SET_LSES ||
				     p_pi->link_speed_ext_enabled !=
				     ib_port_info_get_link_speed_ext_sup(p_pi))) {
					p_pi->link_speed_ext_enabled = sm->p_subn->opt.force_link_speed_ext;
					if (p_pi->link_speed_ext_enabled !=
					    p_old_pi->link_speed_ext_enabled)
						send_set = TRUE;
				}
			}
		}

		/* calc new op_vls and mtu */
		op_vls =
		    osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp,
					       ib_port_info_get_op_vls(p_old_pi));
		mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp,
					      ib_port_info_get_neighbor_mtu(p_old_pi));

		ib_port_info_set_neighbor_mtu(p_pi, mtu);
		if (ib_port_info_get_neighbor_mtu(p_pi) !=
		    ib_port_info_get_neighbor_mtu(p_old_pi))
			send_set = TRUE;

		ib_port_info_set_op_vls(p_pi, op_vls);
		if (ib_port_info_get_op_vls(p_pi) !=
		    ib_port_info_get_op_vls(p_old_pi))
			send_set = TRUE;

		/* provide the vl_high_limit from the qos mgr */
		if (sm->p_subn->opt.qos &&
		    p_physp->vl_high_limit != p_old_pi->vl_high_limit) {
			send_set = TRUE;
			p_pi->vl_high_limit = p_physp->vl_high_limit;
		}
	}

Send:
	context.pi_context.active_transition = FALSE;
	if (port_state != IB_LINK_NO_CHANGE &&
	    port_state != ib_port_info_get_port_state(p_old_pi)) {
		send_set = TRUE;
		if (port_state == IB_LINK_ACTIVE)
			context.pi_context.active_transition = TRUE;
	}

	context.pi_context.node_guid = osm_node_get_node_guid(p_node);
	context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
	context.pi_context.set_method = TRUE;
	context.pi_context.light_sweep = FALSE;
	context.pi_context.client_rereg = FALSE;

	/* We need to send the PortInfoSet request with the new sm_lid
	   in the following cases:
	   1. There is a change in the values (send_set == TRUE)
	   2. This is a switch external port (so it wasn't handled yet by
	   osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE,
	   which means the SM just became master, and it then needs to send at
	   PortInfoSet to every port.
	 */
	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num
	    && sm->p_subn->first_time_master_sweep == TRUE)
		send_set = TRUE;

	if (!send_set)
		goto SEND_EPI;

	attr_mod = cl_hton32(port_num);
	if (issue_ext)
		attr_mod |= cl_hton32(1 << 31);	/* AM SMSupportExtendedSpeeds */
	status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
			     payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
			     attr_mod, FALSE, m_key,
			     0, CL_DISP_MSGID_NONE, &context);
	if (status)
		ret = -1;

	/* If we sent a new mkey above, update our guid2mkey map
	   now, on the assumption that the SubnSet succeeds
	 */
	if (update_mkey)
		osm_db_guid2mkey_set(sm->p_subn->p_g2m,
				     cl_ntoh64(p_physp->port_guid),
				     cl_ntoh64(p_pi->m_key));

SEND_EPI:
	if (send_set2) {
		status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
				     payload2, sizeof(payload2),
				     IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO,
				     cl_hton32(port_num), FALSE, m_key,
				     0, CL_DISP_MSGID_NONE, &context);
		if (status)
			ret = -1;
	}

Exit:
	OSM_LOG_EXIT(sm->p_log);
	return ret;
}

static int link_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node,
				 IN const uint8_t link_state)
{
	osm_physp_t *p_physp, *p_physp_remote;
	uint32_t i, num_physp;
	int ret = 0;
	uint8_t current_state;

	OSM_LOG_ENTER(sm->p_log);

	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
		"Node 0x%" PRIx64 " going to %s\n",
		cl_ntoh64(osm_node_get_node_guid(p_node)),
		ib_get_port_state_str(link_state));

	/*
	   Set the PortInfo for every Physical Port associated
	   with this Port.  Start iterating with port 1, since the linkstate
	   is not applicable to the management port on switches.
	 */
	num_physp = osm_node_get_num_physp(p_node);
	for (i = 0; i < num_physp; i++) {
		/*
		   Don't bother doing anything if this Physical Port is not valid.
		   or if the state of the port is already better then the
		   specified state.
		 */
		p_physp = osm_node_get_physp_ptr(p_node, (uint8_t) i);
		if (!p_physp)
			continue;

		current_state = osm_physp_get_port_state(p_physp);
		if (current_state == IB_LINK_DOWN)
			continue;

		/*
		    Set PortState to DOWN in case Remote Physical Port is
		    unreachable. We have to check this for all ports, except
		    port zero.
		 */
		p_physp_remote = osm_physp_get_remote(p_physp);
		if ((i != 0) && (!p_physp_remote ||
		    !osm_physp_is_valid(p_physp_remote))) {
			if (current_state != IB_LINK_INIT)
				link_mgr_set_physp_pi(sm, p_physp, IB_LINK_DOWN);
			continue;
		}

		/*
		   Normally we only send state update if state is lower
		   then required state. However, we need to send update if
		   no state change required.
		 */
		if (link_state != IB_LINK_NO_CHANGE &&
		    link_state <= current_state)
			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
				"Physical port %u already %s. Skipping\n",
				p_physp->port_num,
				ib_get_port_state_str(current_state));
		else if (link_mgr_set_physp_pi(sm, p_physp, link_state))
			ret = -1;
	}

	OSM_LOG_EXIT(sm->p_log);
	return ret;
}

int osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state)
{
	cl_qmap_t *p_node_guid_tbl;
	osm_node_t *p_node;
	int ret = 0;

	OSM_LOG_ENTER(sm->p_log);

	p_node_guid_tbl = &sm->p_subn->node_guid_tbl;

	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);

	for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
	     p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl);
	     p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item))
		if (link_mgr_process_node(sm, p_node, link_state))
			ret = -1;

	CL_PLOCK_RELEASE(sm->p_lock);

	OSM_LOG_EXIT(sm->p_log);
	return ret;
}