Blame opensm/osm_switch.c

Packit 13e616
/*
Packit 13e616
 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
Packit 13e616
 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
Packit 13e616
 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
Packit 13e616
 * Copyright (c) 2009 HNR Consulting. All rights reserved.
Packit 13e616
 *
Packit 13e616
 * This software is available to you under a choice of one of two
Packit 13e616
 * licenses.  You may choose to be licensed under the terms of the GNU
Packit 13e616
 * General Public License (GPL) Version 2, available from the file
Packit 13e616
 * COPYING in the main directory of this source tree, or the
Packit 13e616
 * OpenIB.org BSD license below:
Packit 13e616
 *
Packit 13e616
 *     Redistribution and use in source and binary forms, with or
Packit 13e616
 *     without modification, are permitted provided that the following
Packit 13e616
 *     conditions are met:
Packit 13e616
 *
Packit 13e616
 *      - Redistributions of source code must retain the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer.
Packit 13e616
 *
Packit 13e616
 *      - Redistributions in binary form must reproduce the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer in the documentation and/or other materials
Packit 13e616
 *        provided with the distribution.
Packit 13e616
 *
Packit 13e616
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Packit 13e616
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Packit 13e616
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Packit 13e616
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
Packit 13e616
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
Packit 13e616
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
Packit 13e616
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
Packit 13e616
 * SOFTWARE.
Packit 13e616
 *
Packit 13e616
 */
Packit 13e616
Packit 13e616
/*
Packit 13e616
 * Abstract:
Packit 13e616
 *    Implementation of osm_switch_t.
Packit 13e616
 * This object represents an Infiniband switch.
Packit 13e616
 * This object is part of the opensm family of objects.
Packit 13e616
 */
Packit 13e616
Packit 13e616
#if HAVE_CONFIG_H
Packit 13e616
#  include <config.h>
Packit 13e616
#endif				/* HAVE_CONFIG_H */
Packit 13e616
Packit 13e616
#include <stdlib.h>
Packit 13e616
#include <string.h>
Packit 13e616
#include <complib/cl_math.h>
Packit 13e616
#include <iba/ib_types.h>
Packit 13e616
#include <opensm/osm_file_ids.h>
Packit 13e616
#define FILE_ID OSM_FILE_SWITCH_C
Packit 13e616
#include <opensm/osm_switch.h>
Packit 13e616
Packit 13e616
struct switch_port_path {
Packit 13e616
	uint8_t port_num;
Packit 13e616
	uint32_t path_count;
Packit 13e616
	int found_sys_guid;
Packit 13e616
	int found_node_guid;
Packit 13e616
	uint32_t forwarded_to;
Packit 13e616
};
Packit 13e616
Packit 13e616
cl_status_t osm_switch_set_hops(IN osm_switch_t * p_sw, IN uint16_t lid_ho,
Packit 13e616
				IN uint8_t port_num, IN uint8_t num_hops)
Packit 13e616
{
Packit 13e616
	if (!lid_ho || lid_ho > p_sw->max_lid_ho)
Packit 13e616
		return -1;
Packit 13e616
	if (port_num >= p_sw->num_ports)
Packit 13e616
		return -1;
Packit 13e616
	if (!p_sw->hops[lid_ho]) {
Packit 13e616
		p_sw->hops[lid_ho] = malloc(p_sw->num_ports);
Packit 13e616
		if (!p_sw->hops[lid_ho])
Packit 13e616
			return -1;
Packit 13e616
		memset(p_sw->hops[lid_ho], OSM_NO_PATH, p_sw->num_ports);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	p_sw->hops[lid_ho][port_num] = num_hops;
Packit 13e616
	if (p_sw->hops[lid_ho][0] > num_hops)
Packit 13e616
		p_sw->hops[lid_ho][0] = num_hops;
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
void osm_switch_delete(IN OUT osm_switch_t ** pp_sw)
Packit 13e616
{
Packit 13e616
	osm_switch_t *p_sw = *pp_sw;
Packit 13e616
	unsigned i;
Packit 13e616
Packit 13e616
	osm_mcast_tbl_destroy(&p_sw->mcast_tbl);
Packit 13e616
	if (p_sw->p_prof)
Packit 13e616
		free(p_sw->p_prof);
Packit 13e616
	if (p_sw->search_ordering_ports)
Packit 13e616
		free(p_sw->search_ordering_ports);
Packit 13e616
	if (p_sw->lft)
Packit 13e616
		free(p_sw->lft);
Packit 13e616
	if (p_sw->new_lft)
Packit 13e616
		free(p_sw->new_lft);
Packit 13e616
	if (p_sw->hops) {
Packit 13e616
		for (i = 0; i < p_sw->num_hops; i++)
Packit 13e616
			if (p_sw->hops[i])
Packit 13e616
				free(p_sw->hops[i]);
Packit 13e616
		free(p_sw->hops);
Packit 13e616
	}
Packit 13e616
	free(*pp_sw);
Packit 13e616
	*pp_sw = NULL;
Packit 13e616
}
Packit 13e616
Packit 13e616
osm_switch_t *osm_switch_new(IN osm_node_t * p_node,
Packit 13e616
			     IN const osm_madw_t * p_madw)
Packit 13e616
{
Packit 13e616
	osm_switch_t *p_sw;
Packit 13e616
	ib_switch_info_t *p_si;
Packit 13e616
	ib_smp_t *p_smp;
Packit 13e616
	uint8_t num_ports;
Packit 13e616
	uint32_t port_num;
Packit 13e616
Packit 13e616
	CL_ASSERT(p_madw);
Packit 13e616
	CL_ASSERT(p_node);
Packit 13e616
Packit 13e616
	p_smp = osm_madw_get_smp_ptr(p_madw);
Packit 13e616
	p_si = ib_smp_get_payload_ptr(p_smp);
Packit 13e616
	num_ports = osm_node_get_num_physp(p_node);
Packit 13e616
Packit 13e616
	CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_SWITCH_INFO);
Packit 13e616
Packit 13e616
	if (!p_si->lin_cap) /* The switch doesn't support LFT */
Packit 13e616
		return NULL;
Packit 13e616
Packit 13e616
	p_sw = malloc(sizeof(*p_sw));
Packit 13e616
	if (!p_sw)
Packit 13e616
		return NULL;
Packit 13e616
Packit 13e616
	memset(p_sw, 0, sizeof(*p_sw));
Packit 13e616
Packit 13e616
	p_sw->p_node = p_node;
Packit 13e616
	p_sw->switch_info = *p_si;
Packit 13e616
	p_sw->num_ports = num_ports;
Packit 13e616
	p_sw->need_update = 2;
Packit 13e616
Packit 13e616
	p_sw->p_prof = malloc(sizeof(*p_sw->p_prof) * num_ports);
Packit 13e616
	if (!p_sw->p_prof)
Packit 13e616
		goto err;
Packit 13e616
Packit 13e616
	memset(p_sw->p_prof, 0, sizeof(*p_sw->p_prof) * num_ports);
Packit 13e616
Packit 13e616
	osm_mcast_tbl_init(&p_sw->mcast_tbl, osm_node_get_num_physp(p_node),
Packit 13e616
			   cl_ntoh16(p_si->mcast_cap));
Packit 13e616
Packit 13e616
	for (port_num = 0; port_num < num_ports; port_num++)
Packit 13e616
		osm_port_prof_construct(&p_sw->p_prof[port_num]);
Packit 13e616
Packit 13e616
	return p_sw;
Packit 13e616
Packit 13e616
err:
Packit 13e616
	osm_switch_delete(&p_sw);
Packit 13e616
	return NULL;
Packit 13e616
}
Packit 13e616
Packit 13e616
boolean_t osm_switch_get_lft_block(IN const osm_switch_t * p_sw,
Packit 13e616
				   IN uint16_t block_id, OUT uint8_t * p_block)
Packit 13e616
{
Packit 13e616
	uint16_t base_lid_ho = block_id * IB_SMP_DATA_SIZE;
Packit 13e616
Packit 13e616
	CL_ASSERT(p_sw);
Packit 13e616
	CL_ASSERT(p_block);
Packit 13e616
Packit 13e616
	if (base_lid_ho > p_sw->max_lid_ho)
Packit 13e616
		return FALSE;
Packit 13e616
Packit 13e616
	CL_ASSERT(base_lid_ho + IB_SMP_DATA_SIZE - 1 <= IB_LID_UCAST_END_HO);
Packit 13e616
	memcpy(p_block, &(p_sw->new_lft[base_lid_ho]), IB_SMP_DATA_SIZE);
Packit 13e616
	return TRUE;
Packit 13e616
}
Packit 13e616
Packit 13e616
static struct osm_remote_node *
Packit 13e616
switch_find_guid_common(IN const osm_switch_t * p_sw,
Packit 13e616
			IN struct osm_remote_guids_count *r,
Packit 13e616
			IN uint8_t port_num, IN int find_sys_guid,
Packit 13e616
			IN int find_node_guid)
Packit 13e616
{
Packit 13e616
	struct osm_remote_node *p_remote_guid = NULL;
Packit 13e616
	osm_physp_t *p_physp;
Packit 13e616
	osm_physp_t *p_rem_physp;
Packit 13e616
	osm_node_t *p_rem_node;
Packit 13e616
	uint64_t sys_guid;
Packit 13e616
	uint64_t node_guid;
Packit 13e616
	unsigned int i;
Packit 13e616
Packit 13e616
	CL_ASSERT(p_sw);
Packit 13e616
Packit 13e616
	if (!r)
Packit 13e616
		goto out;
Packit 13e616
Packit 13e616
	p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
Packit 13e616
	if (!p_physp)
Packit 13e616
		goto out;
Packit 13e616
Packit 13e616
	p_rem_physp = osm_physp_get_remote(p_physp);
Packit 13e616
	p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
Packit 13e616
	sys_guid = p_rem_node->node_info.sys_guid;
Packit 13e616
	node_guid = p_rem_node->node_info.node_guid;
Packit 13e616
Packit 13e616
	for (i = 0; i < r->count; i++) {
Packit 13e616
		if ((!find_sys_guid
Packit 13e616
		     || r->guids[i].node->node_info.sys_guid == sys_guid)
Packit 13e616
		    && (!find_node_guid
Packit 13e616
			|| r->guids[i].node->node_info.node_guid == node_guid)) {
Packit 13e616
			p_remote_guid = &r->guids[i];
Packit 13e616
			break;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
out:
Packit 13e616
	return p_remote_guid;
Packit 13e616
}
Packit 13e616
Packit 13e616
static struct osm_remote_node *
Packit 13e616
switch_find_sys_guid_count(IN const osm_switch_t * p_sw,
Packit 13e616
			   IN struct osm_remote_guids_count *r,
Packit 13e616
			   IN uint8_t port_num)
Packit 13e616
{
Packit 13e616
	return switch_find_guid_common(p_sw, r, port_num, 1, 0);
Packit 13e616
}
Packit 13e616
Packit 13e616
static struct osm_remote_node *
Packit 13e616
switch_find_node_guid_count(IN const osm_switch_t * p_sw,
Packit 13e616
			    IN struct osm_remote_guids_count *r,
Packit 13e616
			    IN uint8_t port_num)
Packit 13e616
{
Packit 13e616
	return switch_find_guid_common(p_sw, r, port_num, 0, 1);
Packit 13e616
}
Packit 13e616
Packit 13e616
uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw,
Packit 13e616
				  IN osm_port_t * p_port, IN uint16_t lid_ho,
Packit 13e616
				  IN unsigned start_from,
Packit 13e616
				  IN boolean_t ignore_existing,
Packit 13e616
				  IN boolean_t routing_for_lmc,
Packit 13e616
				  IN boolean_t dor,
Packit 13e616
				  IN boolean_t port_shifting,
Packit 13e616
				  IN uint32_t scatter_ports,
Packit 13e616
				  IN osm_lft_type_enum lft_enum)
Packit 13e616
{
Packit 13e616
	/*
Packit 13e616
	   We support an enhanced LMC aware routing mode:
Packit 13e616
	   In the case of LMC > 0, we can track the remote side
Packit 13e616
	   system and node for all of the lids of the target
Packit 13e616
	   and try and avoid routing again through the same
Packit 13e616
	   system / node.
Packit 13e616
Packit 13e616
	   Assume if routing_for_lmc is true that this procedure was
Packit 13e616
	   provided the tracking array and counter via p_port->priv,
Packit 13e616
	   and we can conduct this algorithm.
Packit 13e616
	 */
Packit 13e616
	uint16_t base_lid;
Packit 13e616
	uint8_t hops;
Packit 13e616
	uint8_t least_hops;
Packit 13e616
	uint8_t port_num;
Packit 13e616
	uint8_t num_ports;
Packit 13e616
	uint32_t least_paths = 0xFFFFFFFF;
Packit 13e616
	unsigned i;
Packit 13e616
	/*
Packit 13e616
	   The following will track the least paths if the
Packit 13e616
	   route should go through a new system/node
Packit 13e616
	 */
Packit 13e616
	uint32_t least_paths_other_sys = 0xFFFFFFFF;
Packit 13e616
	uint32_t least_paths_other_nodes = 0xFFFFFFFF;
Packit 13e616
	uint32_t least_forwarded_to = 0xFFFFFFFF;
Packit 13e616
	uint32_t check_count;
Packit 13e616
	uint8_t best_port = 0;
Packit 13e616
	/*
Packit 13e616
	   These vars track the best port if it connects to
Packit 13e616
	   not used system/node.
Packit 13e616
	 */
Packit 13e616
	uint8_t best_port_other_sys = 0;
Packit 13e616
	uint8_t best_port_other_node = 0;
Packit 13e616
	boolean_t port_found = FALSE;
Packit 13e616
	osm_physp_t *p_physp;
Packit 13e616
	osm_physp_t *p_rem_physp;
Packit 13e616
	osm_node_t *p_rem_node;
Packit 13e616
	osm_node_t *p_rem_node_first = NULL;
Packit 13e616
	struct osm_remote_node *p_remote_guid = NULL;
Packit 13e616
	struct osm_remote_node null_remote_node = {NULL, 0, 0};
Packit 13e616
	struct switch_port_path port_paths[IB_NODE_NUM_PORTS_MAX];
Packit 13e616
	unsigned int port_paths_total_paths = 0;
Packit 13e616
	unsigned int port_paths_count = 0;
Packit 13e616
	uint8_t scatter_possible_ports[IB_NODE_NUM_PORTS_MAX];
Packit 13e616
	unsigned int scatter_possible_ports_count = 0;
Packit 13e616
	int found_sys_guid = 0;
Packit 13e616
	int found_node_guid = 0;
Packit 13e616
Packit 13e616
	CL_ASSERT(lid_ho > 0);
Packit 13e616
Packit 13e616
	if (p_port->p_node->sw) {
Packit 13e616
		if (p_port->p_node->sw == p_sw)
Packit 13e616
			return 0;
Packit 13e616
		base_lid = osm_port_get_base_lid(p_port);
Packit 13e616
	} else {
Packit 13e616
		p_physp = p_port->p_physp;
Packit 13e616
		if (!p_physp || !p_physp->p_remote_physp ||
Packit 13e616
		    !p_physp->p_remote_physp->p_node->sw)
Packit 13e616
			return OSM_NO_PATH;
Packit 13e616
Packit 13e616
		if (p_physp->p_remote_physp->p_node->sw == p_sw)
Packit 13e616
			return p_physp->p_remote_physp->port_num;
Packit 13e616
		base_lid =
Packit 13e616
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
Packit 13e616
	}
Packit 13e616
	base_lid = cl_ntoh16(base_lid);
Packit 13e616
Packit 13e616
	num_ports = p_sw->num_ports;
Packit 13e616
Packit 13e616
	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
Packit 13e616
	if (least_hops == OSM_NO_PATH)
Packit 13e616
		return OSM_NO_PATH;
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   First, inquire with the forwarding table for an existing
Packit 13e616
	   route.  If one is found, honor it unless:
Packit 13e616
	   1. the ignore existing flag is set.
Packit 13e616
	   2. the physical port is not a valid one or not healthy
Packit 13e616
	   3. the physical port has a remote port (the link is up)
Packit 13e616
	   4. the port has min-hops to the target (avoid loops)
Packit 13e616
	 */
Packit 13e616
	if (!ignore_existing) {
Packit 13e616
		port_num = osm_switch_get_port_by_lid(p_sw, lid_ho, lft_enum);
Packit 13e616
Packit 13e616
		if (port_num != OSM_NO_PATH) {
Packit 13e616
			CL_ASSERT(port_num < num_ports);
Packit 13e616
Packit 13e616
			p_physp =
Packit 13e616
			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
Packit 13e616
			/*
Packit 13e616
			   Don't be too trusting of the current forwarding table!
Packit 13e616
			   Verify that the port number is legal and that the
Packit 13e616
			   LID is reachable through this port.
Packit 13e616
			 */
Packit 13e616
			if (p_physp && osm_physp_is_healthy(p_physp) &&
Packit 13e616
			    osm_physp_get_remote(p_physp)) {
Packit 13e616
				hops =
Packit 13e616
				    osm_switch_get_hop_count(p_sw, base_lid,
Packit 13e616
							     port_num);
Packit 13e616
				/*
Packit 13e616
				   If we aren't using pre-defined user routes
Packit 13e616
				   function, then we need to make sure that the
Packit 13e616
				   current path is the minimum one. In case of
Packit 13e616
				   having such a user function - this check will
Packit 13e616
				   not be done, and the old routing will be used.
Packit 13e616
				   Note: This means that it is the user's job to
Packit 13e616
				   clean all data in the forwarding tables that
Packit 13e616
				   he wants to be overridden by the minimum
Packit 13e616
				   hop function.
Packit 13e616
				 */
Packit 13e616
				if (hops == least_hops)
Packit 13e616
					return port_num;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   This algorithm selects a port based on a static load balanced
Packit 13e616
	   selection across equal hop-count ports.
Packit 13e616
	   There is lots of room for improved sophistication here,
Packit 13e616
	   possibly guided by user configuration info.
Packit 13e616
	 */
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   OpenSM routing is "local" - not considering a full lid to lid
Packit 13e616
	   path. As such we can not guarantee a path will not loop if we
Packit 13e616
	   do not always follow least hops.
Packit 13e616
	   So we must abort if not least hops.
Packit 13e616
	 */
Packit 13e616
Packit 13e616
	/* port number starts with one and num_ports is 1 + num phys ports */
Packit 13e616
	for (i = start_from; i < start_from + num_ports; i++) {
Packit 13e616
		port_num = osm_switch_get_dimn_port(p_sw, i % num_ports);
Packit 13e616
		if (!port_num ||
Packit 13e616
		    osm_switch_get_hop_count(p_sw, base_lid, port_num) !=
Packit 13e616
		    least_hops)
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		/* let us make sure it is not down or unhealthy */
Packit 13e616
		p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num);
Packit 13e616
		if (!p_physp || !osm_physp_is_healthy(p_physp) ||
Packit 13e616
		    /*
Packit 13e616
		       we require all - non sma ports to be linked
Packit 13e616
		       to be routed through
Packit 13e616
		     */
Packit 13e616
		    !osm_physp_get_remote(p_physp))
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		/*
Packit 13e616
		   We located a least-hop port, possibly one of many.
Packit 13e616
		   For this port, check the running total count of
Packit 13e616
		   the number of paths through this port.  Select
Packit 13e616
		   the port routing the least number of paths.
Packit 13e616
		 */
Packit 13e616
		check_count =
Packit 13e616
		    osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);
Packit 13e616
Packit 13e616
Packit 13e616
		if (dor) {
Packit 13e616
			/* Get the Remote Node */
Packit 13e616
			p_rem_physp = osm_physp_get_remote(p_physp);
Packit 13e616
			p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
Packit 13e616
			/* use the first dimension, but spread traffic
Packit 13e616
			 * out among the group of ports representing
Packit 13e616
			 * that dimension */
Packit 13e616
			if (!p_rem_node_first)
Packit 13e616
				p_rem_node_first = p_rem_node;
Packit 13e616
			else if (p_rem_node != p_rem_node_first)
Packit 13e616
				continue;
Packit 13e616
			if (routing_for_lmc) {
Packit 13e616
				struct osm_remote_guids_count *r = p_port->priv;
Packit 13e616
				uint8_t rem_port = osm_physp_get_port_num(p_rem_physp);
Packit 13e616
				unsigned int j;
Packit 13e616
Packit 13e616
				for (j = 0; j < r->count; j++) {
Packit 13e616
					p_remote_guid = &r->guids[j];
Packit 13e616
					if ((p_remote_guid->node == p_rem_node)
Packit 13e616
					    && (p_remote_guid->port == rem_port))
Packit 13e616
						break;
Packit 13e616
				}
Packit 13e616
				if (j == r->count)
Packit 13e616
					p_remote_guid = &null_remote_node;
Packit 13e616
			}
Packit 13e616
		/*
Packit 13e616
		   Advanced LMC routing requires tracking of the
Packit 13e616
		   best port by the node connected to the other side of
Packit 13e616
		   it.
Packit 13e616
		 */
Packit 13e616
		} else if (routing_for_lmc) {
Packit 13e616
			/* Is the sys guid already used ? */
Packit 13e616
			p_remote_guid = switch_find_sys_guid_count(p_sw,
Packit 13e616
								   p_port->priv,
Packit 13e616
								   port_num);
Packit 13e616
Packit 13e616
			/* If not update the least hops for this case */
Packit 13e616
			if (!p_remote_guid) {
Packit 13e616
				if (check_count < least_paths_other_sys) {
Packit 13e616
					least_paths_other_sys = check_count;
Packit 13e616
					best_port_other_sys = port_num;
Packit 13e616
					least_forwarded_to = 0;
Packit 13e616
				}
Packit 13e616
				found_sys_guid = 0;
Packit 13e616
			} else {	/* same sys found - try node */
Packit 13e616
Packit 13e616
Packit 13e616
				/* Else is the node guid already used ? */
Packit 13e616
				p_remote_guid = switch_find_node_guid_count(p_sw,
Packit 13e616
									    p_port->priv,
Packit 13e616
									    port_num);
Packit 13e616
Packit 13e616
				/* If not update the least hops for this case */
Packit 13e616
				if (!p_remote_guid
Packit 13e616
				    && check_count < least_paths_other_nodes) {
Packit 13e616
					least_paths_other_nodes = check_count;
Packit 13e616
					best_port_other_node = port_num;
Packit 13e616
					least_forwarded_to = 0;
Packit 13e616
				}
Packit 13e616
				/* else prior sys and node guid already used */
Packit 13e616
Packit 13e616
				if (!p_remote_guid)
Packit 13e616
					found_node_guid = 0;
Packit 13e616
				else
Packit 13e616
					found_node_guid = 1;
Packit 13e616
				found_sys_guid = 1;
Packit 13e616
			}	/* same sys found */
Packit 13e616
		}
Packit 13e616
Packit 13e616
		port_paths[port_paths_count].port_num = port_num;
Packit 13e616
		port_paths[port_paths_count].path_count = check_count;
Packit 13e616
		if (routing_for_lmc) {
Packit 13e616
			port_paths[port_paths_count].found_sys_guid = found_sys_guid;
Packit 13e616
			port_paths[port_paths_count].found_node_guid = found_node_guid;
Packit 13e616
		}
Packit 13e616
		if (routing_for_lmc && p_remote_guid)
Packit 13e616
			port_paths[port_paths_count].forwarded_to = p_remote_guid->forwarded_to;
Packit 13e616
		else
Packit 13e616
			port_paths[port_paths_count].forwarded_to = 0;
Packit 13e616
		port_paths_total_paths += check_count;
Packit 13e616
		port_paths_count++;
Packit 13e616
Packit 13e616
		/* routing for LMC mode */
Packit 13e616
		/*
Packit 13e616
		   the count is min but also lower then the max subscribed
Packit 13e616
		 */
Packit 13e616
		if (check_count < least_paths) {
Packit 13e616
			port_found = TRUE;
Packit 13e616
			best_port = port_num;
Packit 13e616
			least_paths = check_count;
Packit 13e616
			scatter_possible_ports_count = 0;
Packit 13e616
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
Packit 13e616
			if (routing_for_lmc
Packit 13e616
			    && p_remote_guid
Packit 13e616
			    && p_remote_guid->forwarded_to < least_forwarded_to)
Packit 13e616
				least_forwarded_to = p_remote_guid->forwarded_to;
Packit 13e616
		} else if (scatter_ports
Packit 13e616
			   && check_count == least_paths) {
Packit 13e616
			scatter_possible_ports[scatter_possible_ports_count++] = port_num;
Packit 13e616
		} else if (routing_for_lmc
Packit 13e616
			   && p_remote_guid
Packit 13e616
			   && check_count == least_paths
Packit 13e616
			   && p_remote_guid->forwarded_to < least_forwarded_to) {
Packit 13e616
			least_forwarded_to = p_remote_guid->forwarded_to;
Packit 13e616
			best_port = port_num;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (port_found == FALSE)
Packit 13e616
		return OSM_NO_PATH;
Packit 13e616
Packit 13e616
	if (port_shifting && port_paths_count) {
Packit 13e616
		/* In the port_paths[] array, we now have all the ports that we
Packit 13e616
		 * can route out of.  Using some shifting math below, possibly
Packit 13e616
		 * select a different one so that lids won't align in LFTs
Packit 13e616
		 *
Packit 13e616
		 * If lmc > 0, we need to loop through these ports to find the
Packit 13e616
		 * least_forwarded_to port, best_port_other_sys, and
Packit 13e616
		 * best_port_other_node just like before but through the different
Packit 13e616
		 * ordering.
Packit 13e616
		 */
Packit 13e616
Packit 13e616
		least_paths = 0xFFFFFFFF;
Packit 13e616
		least_paths_other_sys = 0xFFFFFFFF;
Packit 13e616
		least_paths_other_nodes = 0xFFFFFFFF;
Packit 13e616
	        least_forwarded_to = 0xFFFFFFFF;
Packit 13e616
		best_port = 0;
Packit 13e616
		best_port_other_sys = 0;
Packit 13e616
		best_port_other_node = 0;
Packit 13e616
Packit 13e616
		for (i = 0; i < port_paths_count; i++) {
Packit 13e616
			unsigned int idx;
Packit 13e616
Packit 13e616
			idx = (port_paths_total_paths/port_paths_count + i) % port_paths_count;
Packit 13e616
Packit 13e616
			if (routing_for_lmc) {
Packit 13e616
				if (!port_paths[idx].found_sys_guid
Packit 13e616
				    && port_paths[idx].path_count < least_paths_other_sys) {
Packit 13e616
					least_paths_other_sys = port_paths[idx].path_count;
Packit 13e616
					best_port_other_sys = port_paths[idx].port_num;
Packit 13e616
					least_forwarded_to = 0;
Packit 13e616
				}
Packit 13e616
				else if (!port_paths[idx].found_node_guid
Packit 13e616
					 && port_paths[idx].path_count < least_paths_other_nodes) {
Packit 13e616
					least_paths_other_nodes = port_paths[idx].path_count;
Packit 13e616
					best_port_other_node = port_paths[idx].port_num;
Packit 13e616
					least_forwarded_to = 0;
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
Packit 13e616
			if (port_paths[idx].path_count < least_paths) {
Packit 13e616
				best_port = port_paths[idx].port_num;
Packit 13e616
				least_paths = port_paths[idx].path_count;
Packit 13e616
				if (routing_for_lmc
Packit 13e616
				    && (port_paths[idx].found_sys_guid
Packit 13e616
					|| port_paths[idx].found_node_guid)
Packit 13e616
				    && port_paths[idx].forwarded_to < least_forwarded_to)
Packit 13e616
					least_forwarded_to = port_paths[idx].forwarded_to;
Packit 13e616
			}
Packit 13e616
			else if (routing_for_lmc
Packit 13e616
				 && (port_paths[idx].found_sys_guid
Packit 13e616
				     || port_paths[idx].found_node_guid)
Packit 13e616
				 && port_paths[idx].path_count == least_paths
Packit 13e616
				 && port_paths[idx].forwarded_to < least_forwarded_to) {
Packit 13e616
				least_forwarded_to = port_paths[idx].forwarded_to;
Packit 13e616
				best_port = port_paths[idx].port_num;
Packit 13e616
			}
Packit 13e616
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   if we are in enhanced routing mode and the best port is not
Packit 13e616
	   the local port 0
Packit 13e616
	 */
Packit 13e616
	if (routing_for_lmc && best_port && !scatter_ports) {
Packit 13e616
		/* Select the least hop port of the non used sys first */
Packit 13e616
		if (best_port_other_sys)
Packit 13e616
			best_port = best_port_other_sys;
Packit 13e616
		else if (best_port_other_node)
Packit 13e616
			best_port = best_port_other_node;
Packit 13e616
	} else if (scatter_ports) {
Packit 13e616
		/*
Packit 13e616
		 * There is some danger that this random could "rebalance" the routes
Packit 13e616
		 * every time, to combat this there is a global srandom that
Packit 13e616
		 * occurs at the start of every sweep.
Packit 13e616
		 */
Packit 13e616
		unsigned int idx = random() % scatter_possible_ports_count;
Packit 13e616
		best_port = scatter_possible_ports[idx];
Packit 13e616
	}
Packit 13e616
	return best_port;
Packit 13e616
}
Packit 13e616
Packit 13e616
void osm_switch_clear_hops(IN osm_switch_t * p_sw)
Packit 13e616
{
Packit 13e616
	unsigned i;
Packit 13e616
Packit 13e616
	for (i = 0; i < p_sw->num_hops; i++)
Packit 13e616
		if (p_sw->hops[i])
Packit 13e616
			memset(p_sw->hops[i], OSM_NO_PATH, p_sw->num_ports);
Packit 13e616
}
Packit 13e616
Packit 13e616
static int alloc_lft(IN osm_switch_t * p_sw, uint16_t lids)
Packit 13e616
{
Packit 13e616
	uint16_t lft_size;
Packit 13e616
Packit 13e616
	/* Ensure LFT is in units of LFT block size */
Packit 13e616
	lft_size = (lids / IB_SMP_DATA_SIZE + 1) * IB_SMP_DATA_SIZE;
Packit 13e616
	if (lft_size > p_sw->lft_size) {
Packit 13e616
		uint8_t *new_lft = realloc(p_sw->lft, lft_size);
Packit 13e616
		if (!new_lft)
Packit 13e616
			return -1;
Packit 13e616
		memset(new_lft + p_sw->lft_size, OSM_NO_PATH,
Packit 13e616
		       lft_size - p_sw->lft_size);
Packit 13e616
		p_sw->lft = new_lft;
Packit 13e616
		p_sw->lft_size = lft_size;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
int osm_switch_prepare_path_rebuild(IN osm_switch_t * p_sw, IN uint16_t max_lids)
Packit 13e616
{
Packit 13e616
	uint8_t **hops;
Packit 13e616
	uint8_t *new_lft;
Packit 13e616
	unsigned i;
Packit 13e616
Packit 13e616
	if (alloc_lft(p_sw, max_lids))
Packit 13e616
		return -1;
Packit 13e616
Packit 13e616
	for (i = 0; i < p_sw->num_ports; i++)
Packit 13e616
		osm_port_prof_construct(&p_sw->p_prof[i]);
Packit 13e616
Packit 13e616
	osm_switch_clear_hops(p_sw);
Packit 13e616
Packit 13e616
	if (!(new_lft = realloc(p_sw->new_lft, p_sw->lft_size)))
Packit 13e616
		return -1;
Packit 13e616
Packit 13e616
	p_sw->new_lft = new_lft;
Packit 13e616
Packit 13e616
	memset(p_sw->new_lft, OSM_NO_PATH, p_sw->lft_size);
Packit 13e616
Packit 13e616
	if (!p_sw->hops) {
Packit 13e616
		hops = malloc((max_lids + 1) * sizeof(hops[0]));
Packit 13e616
		if (!hops)
Packit 13e616
			return -1;
Packit 13e616
		memset(hops, 0, (max_lids + 1) * sizeof(hops[0]));
Packit 13e616
		p_sw->hops = hops;
Packit 13e616
		p_sw->num_hops = max_lids + 1;
Packit 13e616
	} else if (max_lids + 1 > p_sw->num_hops) {
Packit 13e616
		hops = realloc(p_sw->hops, (max_lids + 1) * sizeof(hops[0]));
Packit 13e616
		if (!hops)
Packit 13e616
			return -1;
Packit 13e616
		memset(hops + p_sw->num_hops, 0,
Packit 13e616
		       (max_lids + 1 - p_sw->num_hops) * sizeof(hops[0]));
Packit 13e616
		p_sw->hops = hops;
Packit 13e616
		p_sw->num_hops = max_lids + 1;
Packit 13e616
	}
Packit 13e616
	p_sw->max_lid_ho = max_lids;
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
uint8_t osm_switch_get_port_least_hops(IN const osm_switch_t * p_sw,
Packit 13e616
				       IN const osm_port_t * p_port)
Packit 13e616
{
Packit 13e616
	uint16_t lid;
Packit 13e616
Packit 13e616
	if (p_port->p_node->sw) {
Packit 13e616
		if (p_port->p_node->sw == p_sw)
Packit 13e616
			return 0;
Packit 13e616
		lid = osm_node_get_base_lid(p_port->p_node, 0);
Packit 13e616
		return osm_switch_get_least_hops(p_sw, cl_ntoh16(lid));
Packit 13e616
	} else {
Packit 13e616
		osm_physp_t *p = p_port->p_physp;
Packit 13e616
		uint8_t hops;
Packit 13e616
Packit 13e616
		if (!p || !p->p_remote_physp || !p->p_remote_physp->p_node->sw)
Packit 13e616
			return OSM_NO_PATH;
Packit 13e616
		if (p->p_remote_physp->p_node->sw == p_sw)
Packit 13e616
			return 1;
Packit 13e616
		lid = osm_node_get_base_lid(p->p_remote_physp->p_node, 0);
Packit 13e616
		hops = osm_switch_get_least_hops(p_sw, cl_ntoh16(lid));
Packit 13e616
		return hops != OSM_NO_PATH ? hops + 1 : OSM_NO_PATH;
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
uint8_t osm_switch_recommend_mcast_path(IN osm_switch_t * p_sw,
Packit 13e616
					IN osm_port_t * p_port,
Packit 13e616
					IN uint16_t mlid_ho,
Packit 13e616
					IN boolean_t ignore_existing)
Packit 13e616
{
Packit 13e616
	uint16_t base_lid;
Packit 13e616
	uint8_t hops;
Packit 13e616
	uint8_t port_num;
Packit 13e616
	uint8_t num_ports;
Packit 13e616
	uint8_t least_hops;
Packit 13e616
Packit 13e616
	CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO);
Packit 13e616
Packit 13e616
	if (p_port->p_node->sw) {
Packit 13e616
		if (p_port->p_node->sw == p_sw)
Packit 13e616
			return 0;
Packit 13e616
		base_lid = osm_port_get_base_lid(p_port);
Packit 13e616
	} else {
Packit 13e616
		osm_physp_t *p_physp = p_port->p_physp;
Packit 13e616
		if (!p_physp || !p_physp->p_remote_physp ||
Packit 13e616
		    !p_physp->p_remote_physp->p_node->sw)
Packit 13e616
			return OSM_NO_PATH;
Packit 13e616
		if (p_physp->p_remote_physp->p_node->sw == p_sw)
Packit 13e616
			return p_physp->p_remote_physp->port_num;
Packit 13e616
		base_lid =
Packit 13e616
		    osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0);
Packit 13e616
	}
Packit 13e616
	base_lid = cl_ntoh16(base_lid);
Packit 13e616
	num_ports = p_sw->num_ports;
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   If the user wants us to ignore existing multicast routes,
Packit 13e616
	   then simply return the shortest hop count path to the
Packit 13e616
	   target port.
Packit 13e616
Packit 13e616
	   Otherwise, return the first port that has a path to the target,
Packit 13e616
	   picking from the ports that are already in the multicast group.
Packit 13e616
	 */
Packit 13e616
	if (!ignore_existing) {
Packit 13e616
		for (port_num = 1; port_num < num_ports; port_num++) {
Packit 13e616
			if (!osm_mcast_tbl_is_port
Packit 13e616
			    (&p_sw->mcast_tbl, mlid_ho, port_num))
Packit 13e616
				continue;
Packit 13e616
			/*
Packit 13e616
			   Don't be too trusting of the current forwarding table!
Packit 13e616
			   Verify that the LID is reachable through this port.
Packit 13e616
			 */
Packit 13e616
			hops =
Packit 13e616
			    osm_switch_get_hop_count(p_sw, base_lid, port_num);
Packit 13e616
			if (hops != OSM_NO_PATH)
Packit 13e616
				return port_num;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Either no existing mcast paths reach this port or we are
Packit 13e616
	   ignoring existing paths.
Packit 13e616
Packit 13e616
	   Determine the best multicast path to the target.  Note that this
Packit 13e616
	   algorithm is slightly different from the one used for unicast route
Packit 13e616
	   recommendation.  In this case (multicast), we must NOT
Packit 13e616
	   perform any sort of load balancing.  We MUST take the FIRST
Packit 13e616
	   port found that has <= the lowest hop count path.  This prevents
Packit 13e616
	   more than one multicast path to the same remote switch which
Packit 13e616
	   prevents a multicast loop.  Multicast loops are bad since the same
Packit 13e616
	   multicast packet will go around and around, inevitably creating
Packit 13e616
	   a black hole that will destroy the Earth in a firey conflagration.
Packit 13e616
	 */
Packit 13e616
	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
Packit 13e616
	if (least_hops == OSM_NO_PATH)
Packit 13e616
		return OSM_NO_PATH;
Packit 13e616
	for (port_num = 1; port_num < num_ports; port_num++)
Packit 13e616
		if (osm_switch_get_hop_count(p_sw, base_lid, port_num) ==
Packit 13e616
		    least_hops)
Packit 13e616
			break;
Packit 13e616
Packit 13e616
	CL_ASSERT(port_num < num_ports);
Packit 13e616
	return port_num;
Packit 13e616
}