Blame opensm/osm_mcast_mgr.c

Packit 13e616
/*
Packit 13e616
 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
Packit 13e616
 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
Packit 13e616
 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
Packit 13e616
 * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
Packit 13e616
 * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
Packit 13e616
 * Copyright (c) 2010 HNR Consulting. All rights reserved.
Packit 13e616
 * Copyright (C) 2012-2013 Tokyo Institute of Technology. All rights reserved.
Packit 13e616
 *
Packit 13e616
 * This software is available to you under a choice of one of two
Packit 13e616
 * licenses.  You may choose to be licensed under the terms of the GNU
Packit 13e616
 * General Public License (GPL) Version 2, available from the file
Packit 13e616
 * COPYING in the main directory of this source tree, or the
Packit 13e616
 * OpenIB.org BSD license below:
Packit 13e616
 *
Packit 13e616
 *     Redistribution and use in source and binary forms, with or
Packit 13e616
 *     without modification, are permitted provided that the following
Packit 13e616
 *     conditions are met:
Packit 13e616
 *
Packit 13e616
 *      - Redistributions of source code must retain the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer.
Packit 13e616
 *
Packit 13e616
 *      - Redistributions in binary form must reproduce the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer in the documentation and/or other materials
Packit 13e616
 *        provided with the distribution.
Packit 13e616
 *
Packit 13e616
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Packit 13e616
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Packit 13e616
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Packit 13e616
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
Packit 13e616
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
Packit 13e616
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
Packit 13e616
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
Packit 13e616
 * SOFTWARE.
Packit 13e616
 *
Packit 13e616
 */
Packit 13e616
Packit 13e616
/*
Packit 13e616
 * Abstract:
Packit 13e616
 *    Implementation of osm_mcast_mgr_t.
Packit 13e616
 * This file implements the Multicast Manager object.
Packit 13e616
 */
Packit 13e616
Packit 13e616
#if HAVE_CONFIG_H
Packit 13e616
#  include <config.h>
Packit 13e616
#endif				/* HAVE_CONFIG_H */
Packit 13e616
Packit 13e616
#include <stdlib.h>
Packit 13e616
#include <string.h>
Packit 13e616
#include <iba/ib_types.h>
Packit 13e616
#include <complib/cl_debug.h>
Packit 13e616
#include <opensm/osm_file_ids.h>
Packit 13e616
#define FILE_ID OSM_FILE_MCAST_MGR_C
Packit 13e616
#include <opensm/osm_opensm.h>
Packit 13e616
#include <opensm/osm_sm.h>
Packit 13e616
#include <opensm/osm_multicast.h>
Packit 13e616
#include <opensm/osm_node.h>
Packit 13e616
#include <opensm/osm_switch.h>
Packit 13e616
#include <opensm/osm_helper.h>
Packit 13e616
#include <opensm/osm_msgdef.h>
Packit 13e616
#include <opensm/osm_mcast_mgr.h>
Packit 13e616
Packit 13e616
static osm_mcast_work_obj_t *mcast_work_obj_new(IN osm_port_t * p_port)
Packit 13e616
{
Packit 13e616
	osm_mcast_work_obj_t *p_obj;
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   clean allocated memory to avoid assertion when trying to insert to
Packit 13e616
	   qlist.
Packit 13e616
	   see cl_qlist_insert_tail(): CL_ASSERT(p_list_item->p_list != p_list)
Packit 13e616
	 */
Packit 13e616
	p_obj = malloc(sizeof(*p_obj));
Packit 13e616
	if (p_obj) {
Packit 13e616
		memset(p_obj, 0, sizeof(*p_obj));
Packit 13e616
		p_obj->p_port = p_port;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return p_obj;
Packit 13e616
}
Packit 13e616
Packit 13e616
static void mcast_work_obj_delete(IN osm_mcast_work_obj_t * p_wobj)
Packit 13e616
{
Packit 13e616
	free(p_wobj);
Packit 13e616
}
Packit 13e616
Packit 13e616
int osm_mcast_make_port_list_and_map(cl_qlist_t * list, cl_qmap_t * map,
Packit 13e616
				     osm_mgrp_box_t * mbox)
Packit 13e616
{
Packit 13e616
	cl_map_item_t *map_item;
Packit 13e616
	cl_list_item_t *list_item;
Packit 13e616
	osm_mgrp_t *mgrp;
Packit 13e616
	osm_mcm_port_t *mcm_port;
Packit 13e616
	osm_mcast_work_obj_t *wobj;
Packit 13e616
Packit 13e616
	cl_qmap_init(map);
Packit 13e616
	cl_qlist_init(list);
Packit 13e616
Packit 13e616
	for (list_item = cl_qlist_head(&mbox->mgrp_list);
Packit 13e616
	     list_item != cl_qlist_end(&mbox->mgrp_list);
Packit 13e616
	     list_item = cl_qlist_next(list_item)) {
Packit 13e616
		mgrp = cl_item_obj(list_item, mgrp, list_item);
Packit 13e616
		for (map_item = cl_qmap_head(&mgrp->mcm_port_tbl);
Packit 13e616
		     map_item != cl_qmap_end(&mgrp->mcm_port_tbl);
Packit 13e616
		     map_item = cl_qmap_next(map_item)) {
Packit 13e616
			/* Acquire the port object for this port guid, then
Packit 13e616
			   create the new worker object to build the list. */
Packit 13e616
			mcm_port = cl_item_obj(map_item, mcm_port, map_item);
Packit 13e616
			if (cl_qmap_get(map, mcm_port->port->guid) !=
Packit 13e616
			    cl_qmap_end(map))
Packit 13e616
				continue;
Packit 13e616
			wobj = mcast_work_obj_new(mcm_port->port);
Packit 13e616
			if (!wobj)
Packit 13e616
				return -1;
Packit 13e616
			cl_qlist_insert_tail(list, &wobj->list_item);
Packit 13e616
			cl_qmap_insert(map, mcm_port->port->guid,
Packit 13e616
				       &wobj->map_item);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
void osm_mcast_drop_port_list(cl_qlist_t * list)
Packit 13e616
{
Packit 13e616
	while (cl_qlist_count(list))
Packit 13e616
		mcast_work_obj_delete((osm_mcast_work_obj_t *)
Packit 13e616
				      cl_qlist_remove_head(list));
Packit 13e616
}
Packit 13e616
Packit 13e616
void osm_purge_mtree(osm_sm_t * sm, IN osm_mgrp_box_t * mbox)
Packit 13e616
{
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	if (mbox->root)
Packit 13e616
		osm_mtree_destroy(mbox->root);
Packit 13e616
	mbox->root = NULL;
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
}
Packit 13e616
Packit 13e616
static void create_mgrp_switch_map(cl_qmap_t * m, cl_qlist_t * port_list)
Packit 13e616
{
Packit 13e616
	osm_mcast_work_obj_t *wobj;
Packit 13e616
	osm_port_t *port;
Packit 13e616
	osm_switch_t *sw;
Packit 13e616
	ib_net64_t guid;
Packit 13e616
	cl_list_item_t *i;
Packit 13e616
Packit 13e616
	cl_qmap_init(m);
Packit 13e616
	for (i = cl_qlist_head(port_list); i != cl_qlist_end(port_list);
Packit 13e616
	     i = cl_qlist_next(i)) {
Packit 13e616
		wobj = cl_item_obj(i, wobj, list_item);
Packit 13e616
		port = wobj->p_port;
Packit 13e616
		if (port->p_node->sw) {
Packit 13e616
			sw = port->p_node->sw;
Packit 13e616
			sw->is_mc_member = 1;
Packit 13e616
		} else if (port->p_physp->p_remote_physp) {
Packit 13e616
			sw = port->p_physp->p_remote_physp->p_node->sw;
Packit 13e616
			sw->num_of_mcm++;
Packit 13e616
		} else
Packit 13e616
			continue;
Packit 13e616
		guid = osm_node_get_node_guid(sw->p_node);
Packit 13e616
		if (cl_qmap_get(m, guid) == cl_qmap_end(m))
Packit 13e616
			cl_qmap_insert(m, guid, &sw->mgrp_item);
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static void destroy_mgrp_switch_map(cl_qmap_t * m)
Packit 13e616
{
Packit 13e616
	osm_switch_t *sw;
Packit 13e616
	cl_map_item_t *i;
Packit 13e616
Packit 13e616
	for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
Packit 13e616
		sw = cl_item_obj(i, sw, mgrp_item);
Packit 13e616
		sw->num_of_mcm = 0;
Packit 13e616
		sw->is_mc_member = 0;
Packit 13e616
	}
Packit 13e616
	cl_qmap_remove_all(m);
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
 Calculate the maximal "min hops" from the given switch to any
Packit 13e616
 of the group HCAs
Packit 13e616
 **********************************************************************/
Packit 13e616
#ifdef OSM_VENDOR_INTF_ANAFA
Packit 13e616
static float mcast_mgr_compute_avg_hops(osm_sm_t * sm, cl_qmap_t * m,
Packit 13e616
					const osm_switch_t * this_sw)
Packit 13e616
{
Packit 13e616
	float avg_hops = 0;
Packit 13e616
	uint32_t hops = 0;
Packit 13e616
	uint32_t num_ports = 0;
Packit 13e616
	uint16_t lid;
Packit 13e616
	uint32_t least_hops;
Packit 13e616
	cl_map_item_t *i;
Packit 13e616
	osm_switch_t *sw;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
Packit 13e616
		sw = cl_item_obj(i, sw, mcast_item);
Packit 13e616
		lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
Packit 13e616
		least_hops = osm_switch_get_least_hops(this_sw, lid);
Packit 13e616
		/* for all host that are MC members and attached to the switch,
Packit 13e616
		   we should add the (least_hops + 1) * number_of_such_hosts.
Packit 13e616
		   If switch itself is in the MC, we should add the least_hops only */
Packit 13e616
		hops += (least_hops + 1) * sw->num_of_mcm +
Packit 13e616
		    least_hops * sw->is_mc_member;
Packit 13e616
		num_ports += sw->num_of_mcm + sw->is_mc_member;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* We shouldn't be here if there aren't any ports in the group. */
Packit 13e616
	CL_ASSERT(num_ports);
Packit 13e616
Packit 13e616
	avg_hops = (float)(hops / num_ports);
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return avg_hops;
Packit 13e616
}
Packit 13e616
#else
Packit 13e616
static float mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qmap_t * m,
Packit 13e616
					const osm_switch_t * this_sw)
Packit 13e616
{
Packit 13e616
	uint32_t max_hops = 0, hops;
Packit 13e616
	uint16_t lid;
Packit 13e616
	cl_map_item_t *i;
Packit 13e616
	osm_switch_t *sw;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   For each member of the multicast group, compute the
Packit 13e616
	   number of hops to its base LID.
Packit 13e616
	 */
Packit 13e616
	for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
Packit 13e616
		sw = cl_item_obj(i, sw, mgrp_item);
Packit 13e616
		lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
Packit 13e616
		hops = osm_switch_get_least_hops(this_sw, lid);
Packit 13e616
		if (!sw->is_mc_member)
Packit 13e616
			hops += 1;
Packit 13e616
		if (hops > max_hops)
Packit 13e616
			max_hops = hops;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* Note that at this point we might get (max_hops == 0),
Packit 13e616
	   which means that there's only one member in the mcast
Packit 13e616
	   group, and it's the current switch */
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return (float)max_hops;
Packit 13e616
}
Packit 13e616
#endif
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
   This function attempts to locate the optimal switch for the
Packit 13e616
   center of the spanning tree.  The current algorithm chooses
Packit 13e616
   a switch with the lowest average hop count to the members
Packit 13e616
   of the multicast group.
Packit 13e616
**********************************************************************/
Packit 13e616
static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
Packit 13e616
						   cl_qlist_t * list)
Packit 13e616
{
Packit 13e616
	cl_qmap_t mgrp_sw_map;
Packit 13e616
	cl_qmap_t *p_sw_tbl;
Packit 13e616
	osm_switch_t *p_sw, *p_best_sw = NULL;
Packit 13e616
	float hops = 0;
Packit 13e616
	float best_hops = 10000;	/* any big # will do */
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	p_sw_tbl = &sm->p_subn->sw_guid_tbl;
Packit 13e616
Packit 13e616
	create_mgrp_switch_map(&mgrp_sw_map, list);
Packit 13e616
	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
Packit 13e616
	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
Packit 13e616
	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
Packit 13e616
		if (!osm_switch_supports_mcast(p_sw))
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
#ifdef OSM_VENDOR_INTF_ANAFA
Packit 13e616
		hops = mcast_mgr_compute_avg_hops(sm, &mgrp_sw_map, p_sw);
Packit 13e616
#else
Packit 13e616
		hops = mcast_mgr_compute_max_hops(sm, &mgrp_sw_map, p_sw);
Packit 13e616
#endif
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Switch 0x%016" PRIx64 ", hops = %f\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), hops);
Packit 13e616
Packit 13e616
		if (hops < best_hops) {
Packit 13e616
			p_best_sw = p_sw;
Packit 13e616
			best_hops = hops;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (p_best_sw)
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Best switch is 0x%" PRIx64 " (%s), hops = %f\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(p_best_sw->p_node)),
Packit 13e616
			p_best_sw->p_node->print_desc, best_hops);
Packit 13e616
	else
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"No multicast capable switches detected\n");
Packit 13e616
Packit 13e616
	destroy_mgrp_switch_map(&mgrp_sw_map);
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return p_best_sw;
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
   This function returns the existing or optimal root switch for the tree.
Packit 13e616
**********************************************************************/
Packit 13e616
osm_switch_t *osm_mcast_mgr_find_root_switch(osm_sm_t * sm, cl_qlist_t *list)
Packit 13e616
{
Packit 13e616
	osm_switch_t *p_sw = NULL;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   We always look for the best multicast tree root switch.
Packit 13e616
	   Otherwise since we always start with a a single join
Packit 13e616
	   the root will be always on the first switch attached to it.
Packit 13e616
	   - Very bad ...
Packit 13e616
	 */
Packit 13e616
	p_sw = mcast_mgr_find_optimal_switch(sm, list);
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return p_sw;
Packit 13e616
}
Packit 13e616
Packit 13e616
static int mcast_mgr_set_mft_block(osm_sm_t * sm, IN osm_switch_t * p_sw,
Packit 13e616
				   uint32_t block_num, uint32_t position)
Packit 13e616
{
Packit 13e616
	osm_node_t *p_node;
Packit 13e616
	osm_physp_t *p_physp;
Packit 13e616
	osm_dr_path_t *p_path;
Packit 13e616
	osm_madw_context_t context;
Packit 13e616
	ib_api_status_t status;
Packit 13e616
	uint32_t block_id_ho;
Packit 13e616
	osm_mcast_tbl_t *p_tbl;
Packit 13e616
	ib_net16_t block[IB_MCAST_BLOCK_SIZE];
Packit 13e616
	int ret = 0;
Packit 13e616
Packit 13e616
	CL_ASSERT(sm);
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	CL_ASSERT(p_sw);
Packit 13e616
Packit 13e616
	p_node = p_sw->p_node;
Packit 13e616
Packit 13e616
	CL_ASSERT(p_node);
Packit 13e616
Packit 13e616
	p_physp = osm_node_get_physp_ptr(p_node, 0);
Packit 13e616
	p_path = osm_physp_get_dr_path_ptr(p_physp);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Send multicast forwarding table blocks to the switch
Packit 13e616
	   as long as the switch indicates it has blocks needing
Packit 13e616
	   configuration.
Packit 13e616
	 */
Packit 13e616
Packit 13e616
	context.mft_context.node_guid = osm_node_get_node_guid(p_node);
Packit 13e616
	context.mft_context.set_method = TRUE;
Packit 13e616
Packit 13e616
	p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
Packit 13e616
	if (osm_mcast_tbl_get_block(p_tbl, (uint16_t) block_num,
Packit 13e616
				    (uint8_t) position, block)) {
Packit 13e616
		block_id_ho = block_num + (position << 28);
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Writing MFT block %u position %u to switch 0x%" PRIx64
Packit 13e616
			"\n", block_num, position,
Packit 13e616
			cl_ntoh64(context.mft_context.node_guid));
Packit 13e616
Packit 13e616
		status = osm_req_set(sm, p_path, (void *)block, sizeof(block),
Packit 13e616
				     IB_MAD_ATTR_MCAST_FWD_TBL,
Packit 13e616
				     cl_hton32(block_id_ho), FALSE,
Packit 13e616
				     ib_port_info_get_m_key(&p_physp->port_info),
Packit 13e616
				     0, CL_DISP_MSGID_NONE, &context);
Packit 13e616
		if (status != IB_SUCCESS) {
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A02: "
Packit 13e616
				"Sending multicast fwd. tbl. block 0x%X to %s "
Packit 13e616
				"failed (%s)\n", block_id_ho,
Packit 13e616
				p_node->print_desc, ib_get_err_str(status));
Packit 13e616
			ret = -1;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return ret;
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
  This is part of the recursive function to compute the paths in the
Packit 13e616
  spanning tree that emanate from this switch.  On input, the p_list
Packit 13e616
  contains the group members that must be routed from this switch.
Packit 13e616
**********************************************************************/
Packit 13e616
static void mcast_mgr_subdivide(osm_sm_t * sm, uint16_t mlid_ho,
Packit 13e616
				osm_switch_t * p_sw, cl_qlist_t * p_list,
Packit 13e616
				cl_qlist_t * list_array, uint8_t array_size)
Packit 13e616
{
Packit 13e616
	uint8_t port_num;
Packit 13e616
	boolean_t ignore_existing;
Packit 13e616
	osm_mcast_work_obj_t *p_wobj;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   For Multicast Groups, we don't want to count on previous
Packit 13e616
	   configurations - since we can easily generate a storm
Packit 13e616
	   by loops.
Packit 13e616
	 */
Packit 13e616
	ignore_existing = TRUE;
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Subdivide the set of ports into non-overlapping subsets
Packit 13e616
	   that will be routed to other switches.
Packit 13e616
	 */
Packit 13e616
	while ((p_wobj =
Packit 13e616
		(osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) !=
Packit 13e616
	       (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) {
Packit 13e616
		port_num =
Packit 13e616
		    osm_switch_recommend_mcast_path(p_sw, p_wobj->p_port,
Packit 13e616
						    mlid_ho, ignore_existing);
Packit 13e616
		if (port_num == OSM_NO_PATH) {
Packit 13e616
			/*
Packit 13e616
			   This typically occurs if the switch does not support
Packit 13e616
			   multicast and the multicast tree must branch at this
Packit 13e616
			   switch.
Packit 13e616
			 */
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A03: "
Packit 13e616
				"Error routing MLID 0x%X through switch 0x%"
Packit 13e616
				PRIx64 " %s\n"
Packit 13e616
				"\t\t\t\tNo multicast paths from this switch "
Packit 13e616
				"for port with LID %u\n", mlid_ho,
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)),
Packit 13e616
				p_sw->p_node->print_desc,
Packit 13e616
				cl_ntoh16(osm_port_get_base_lid
Packit 13e616
					  (p_wobj->p_port)));
Packit 13e616
			mcast_work_obj_delete(p_wobj);
Packit 13e616
			continue;
Packit 13e616
		}
Packit 13e616
Packit 13e616
		if (port_num >= array_size) {
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A04: "
Packit 13e616
				"Error routing MLID 0x%X through switch 0x%"
Packit 13e616
				PRIx64 " %s\n"
Packit 13e616
				"\t\t\t\tNo multicast paths from this switch "
Packit 13e616
				"to port with LID %u\n", mlid_ho,
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)),
Packit 13e616
				p_sw->p_node->print_desc,
Packit 13e616
				cl_ntoh16(osm_port_get_base_lid
Packit 13e616
					  (p_wobj->p_port)));
Packit 13e616
			mcast_work_obj_delete(p_wobj);
Packit 13e616
			continue;
Packit 13e616
		}
Packit 13e616
Packit 13e616
		cl_qlist_insert_tail(&list_array[port_num], &p_wobj->list_item);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
}
Packit 13e616
Packit 13e616
static void mcast_mgr_purge_list(osm_sm_t * sm, uint16_t mlid, cl_qlist_t * list)
Packit 13e616
{
Packit 13e616
	if (OSM_LOG_IS_ACTIVE_V2(sm->p_log, OSM_LOG_ERROR)) {
Packit 13e616
		osm_mcast_work_obj_t *wobj;
Packit 13e616
		cl_list_item_t *i;
Packit 13e616
		for (i = cl_qlist_head(list); i != cl_qlist_end(list);
Packit 13e616
		     i = cl_qlist_next(i)) {
Packit 13e616
			wobj = cl_item_obj(i, wobj, list_item);
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A06: "
Packit 13e616
				"Unable to route MLID 0x%X for port 0x%" PRIx64 "\n",
Packit 13e616
				mlid, cl_ntoh64(osm_port_get_guid(wobj->p_port)));
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	osm_mcast_drop_port_list(list);
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
  This is the recursive function to compute the paths in the spanning
Packit 13e616
  tree that emanate from this switch.  On input, the p_list contains
Packit 13e616
  the group members that must be routed from this switch.
Packit 13e616
Packit 13e616
  The function returns the newly created mtree node element.
Packit 13e616
**********************************************************************/
Packit 13e616
static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm, uint16_t mlid_ho,
Packit 13e616
					  osm_switch_t * p_sw,
Packit 13e616
					  cl_qlist_t * p_list, uint8_t depth,
Packit 13e616
					  uint8_t upstream_port,
Packit 13e616
					  uint8_t * p_max_depth)
Packit 13e616
{
Packit 13e616
	uint8_t max_children;
Packit 13e616
	osm_mtree_node_t *p_mtn = NULL;
Packit 13e616
	cl_qlist_t *list_array = NULL;
Packit 13e616
	uint8_t i;
Packit 13e616
	ib_net64_t node_guid;
Packit 13e616
	osm_mcast_work_obj_t *p_wobj;
Packit 13e616
	cl_qlist_t *p_port_list;
Packit 13e616
	size_t count;
Packit 13e616
	osm_mcast_tbl_t *p_tbl;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	CL_ASSERT(p_sw);
Packit 13e616
	CL_ASSERT(p_list);
Packit 13e616
	CL_ASSERT(p_max_depth);
Packit 13e616
Packit 13e616
	node_guid = osm_node_get_node_guid(p_sw->p_node);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Routing MLID 0x%X through switch 0x%" PRIx64
Packit 13e616
		" %s, %u nodes at depth %u\n",
Packit 13e616
		mlid_ho, cl_ntoh64(node_guid), p_sw->p_node->print_desc,
Packit 13e616
		cl_qlist_count(p_list), depth);
Packit 13e616
Packit 13e616
	CL_ASSERT(cl_qlist_count(p_list) > 0);
Packit 13e616
Packit 13e616
	depth++;
Packit 13e616
Packit 13e616
	if (depth >= 64) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A21: "
Packit 13e616
			"Maximal hops number is reached for MLID 0x%x."
Packit 13e616
			" Break processing\n", mlid_ho);
Packit 13e616
		mcast_mgr_purge_list(sm, mlid_ho, p_list);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (depth > *p_max_depth) {
Packit 13e616
		CL_ASSERT(depth == *p_max_depth + 1);
Packit 13e616
		*p_max_depth = depth;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (osm_switch_supports_mcast(p_sw) == FALSE) {
Packit 13e616
		/*
Packit 13e616
		   This switch doesn't do multicast.  Clean-up.
Packit 13e616
		 */
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A14: "
Packit 13e616
			"Switch 0x%" PRIx64 " %s does not support multicast\n",
Packit 13e616
			cl_ntoh64(node_guid), p_sw->p_node->print_desc);
Packit 13e616
Packit 13e616
		/*
Packit 13e616
		   Deallocate all the work objects on this branch of the tree.
Packit 13e616
		 */
Packit 13e616
		mcast_mgr_purge_list(sm, mlid_ho, p_list);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	p_mtn = osm_mtree_node_new(p_sw);
Packit 13e616
	if (p_mtn == NULL) {
Packit 13e616
		/*
Packit 13e616
		   We are unable to continue routing down this
Packit 13e616
		   leg of the tree.  Clean-up.
Packit 13e616
		 */
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A15: "
Packit 13e616
			"Insufficient memory to build multicast tree\n");
Packit 13e616
Packit 13e616
		/*
Packit 13e616
		   Deallocate all the work objects on this branch of the tree.
Packit 13e616
		 */
Packit 13e616
		mcast_mgr_purge_list(sm, mlid_ho, p_list);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	max_children = osm_mtree_node_get_max_children(p_mtn);
Packit 13e616
Packit 13e616
	CL_ASSERT(max_children > 1);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Prepare an empty list for each port in the switch.
Packit 13e616
	   TO DO - this list array could probably be moved
Packit 13e616
	   inside the switch element to save on malloc thrashing.
Packit 13e616
	 */
Packit 13e616
	list_array = malloc(sizeof(cl_qlist_t) * max_children);
Packit 13e616
	if (list_array == NULL) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A16: "
Packit 13e616
			"Unable to allocate list array\n");
Packit 13e616
		mcast_mgr_purge_list(sm, mlid_ho, p_list);
Packit 13e616
		osm_mtree_destroy(p_mtn);
Packit 13e616
		p_mtn = NULL;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	memset(list_array, 0, sizeof(cl_qlist_t) * max_children);
Packit 13e616
Packit 13e616
	for (i = 0; i < max_children; i++)
Packit 13e616
		cl_qlist_init(&list_array[i]);
Packit 13e616
Packit 13e616
	mcast_mgr_subdivide(sm, mlid_ho, p_sw, p_list, list_array, max_children);
Packit 13e616
Packit 13e616
	p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Add the upstream port to the forwarding table unless
Packit 13e616
	   we're at the root of the spanning tree.
Packit 13e616
	 */
Packit 13e616
	if (depth > 1) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Adding upstream port %u\n", upstream_port);
Packit 13e616
Packit 13e616
		CL_ASSERT(upstream_port);
Packit 13e616
		osm_mcast_tbl_set(p_tbl, mlid_ho, upstream_port);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   For each port that was allocated some routes,
Packit 13e616
	   recurse into this function to continue building the tree
Packit 13e616
	   if the node on the other end of that port is another switch.
Packit 13e616
	   Otherwise, the node is an endpoint, and we've found a leaf
Packit 13e616
	   of the tree.  Mark leaves with our special pointer value.
Packit 13e616
	 */
Packit 13e616
Packit 13e616
	for (i = 0; i < max_children; i++) {
Packit 13e616
		const osm_physp_t *p_physp;
Packit 13e616
		const osm_physp_t *p_remote_physp;
Packit 13e616
		osm_node_t *p_node;
Packit 13e616
		const osm_node_t *p_remote_node;
Packit 13e616
Packit 13e616
		p_port_list = &list_array[i];
Packit 13e616
Packit 13e616
		count = cl_qlist_count(p_port_list);
Packit 13e616
Packit 13e616
		/*
Packit 13e616
		   There should be no children routed through the upstream port!
Packit 13e616
		 */
Packit 13e616
		CL_ASSERT(upstream_port == 0 || i != upstream_port ||
Packit 13e616
			  (i == upstream_port && count == 0));
Packit 13e616
Packit 13e616
		if (count == 0)
Packit 13e616
			continue;	/* No routes down this port. */
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Routing %zu destinations via switch port %u\n",
Packit 13e616
			count, i);
Packit 13e616
Packit 13e616
		if (i == 0) {
Packit 13e616
			/* This means we are adding the switch to the MC group.
Packit 13e616
			   We do not need to continue looking at the remote
Packit 13e616
			   port, just needed to add the port to the table */
Packit 13e616
			CL_ASSERT(count == 1);
Packit 13e616
Packit 13e616
			osm_mcast_tbl_set(p_tbl, mlid_ho, i);
Packit 13e616
Packit 13e616
			p_wobj = (osm_mcast_work_obj_t *)
Packit 13e616
			    cl_qlist_remove_head(p_port_list);
Packit 13e616
			mcast_work_obj_delete(p_wobj);
Packit 13e616
			continue;
Packit 13e616
		}
Packit 13e616
Packit 13e616
		p_node = p_sw->p_node;
Packit 13e616
		p_remote_node = osm_node_get_remote_node(p_node, i, NULL);
Packit 13e616
		if (!p_remote_node) {
Packit 13e616
			/*
Packit 13e616
			 * If we reached here, it means the minhop table has
Packit 13e616
			 * invalid entries that leads to disconnected ports.
Packit 13e616
			 *
Packit 13e616
			 * A possible reason for the code to reach here is
Packit 13e616
			 * that ucast cache is enabled, and a leaf switch that
Packit 13e616
			 * is used as a non-leaf switch in a multicast has been
Packit 13e616
			 * removed from the fabric.
Packit 13e616
			 *
Packit 13e616
			 * When it happens, we should invalidate the cache
Packit 13e616
			 * and force rerouting of the fabric.
Packit 13e616
			 */
Packit 13e616
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR 0A1E: Tried to route MLID 0x%X through "
Packit 13e616
				"disconnected switch 0x%" PRIx64 " port %d\n",
Packit 13e616
				mlid_ho, cl_ntoh64(node_guid), i);
Packit 13e616
Packit 13e616
			/* Free memory */
Packit 13e616
			mcast_mgr_purge_list(sm, mlid_ho, p_port_list);
Packit 13e616
Packit 13e616
			/* Invalidate ucast cache */
Packit 13e616
			if (sm->ucast_mgr.p_subn->opt.use_ucast_cache &&
Packit 13e616
			    sm->ucast_mgr.cache_valid) {
Packit 13e616
				OSM_LOG(sm->p_log, OSM_LOG_INFO,
Packit 13e616
					"Unicast Cache will be invalidated due "
Packit 13e616
					"to multicast routing errors\n");
Packit 13e616
				osm_ucast_cache_invalidate(&sm->ucast_mgr);
Packit 13e616
				sm->p_subn->force_heavy_sweep = TRUE;
Packit 13e616
			}
Packit 13e616
Packit 13e616
			continue;
Packit 13e616
		}
Packit 13e616
Packit 13e616
		/*
Packit 13e616
		   This port routes frames for this mcast group.  Therefore,
Packit 13e616
		   set the appropriate bit in the multicast forwarding
Packit 13e616
		   table for this switch.
Packit 13e616
		 */
Packit 13e616
		osm_mcast_tbl_set(p_tbl, mlid_ho, i);
Packit 13e616
Packit 13e616
		if (osm_node_get_type(p_remote_node) == IB_NODE_TYPE_SWITCH) {
Packit 13e616
			/*
Packit 13e616
			   Acquire a pointer to the remote switch then recurse.
Packit 13e616
			 */
Packit 13e616
			CL_ASSERT(p_remote_node->sw);
Packit 13e616
Packit 13e616
			p_physp = osm_node_get_physp_ptr(p_node, i);
Packit 13e616
			CL_ASSERT(p_physp);
Packit 13e616
Packit 13e616
			p_remote_physp = osm_physp_get_remote(p_physp);
Packit 13e616
			CL_ASSERT(p_remote_physp);
Packit 13e616
Packit 13e616
			p_mtn->child_array[i] =
Packit 13e616
			    mcast_mgr_branch(sm, mlid_ho, p_remote_node->sw,
Packit 13e616
					     p_port_list, depth,
Packit 13e616
					     osm_physp_get_port_num
Packit 13e616
					     (p_remote_physp), p_max_depth);
Packit 13e616
		} else {
Packit 13e616
			/*
Packit 13e616
			   The neighbor node is not a switch, so this
Packit 13e616
			   must be a leaf.
Packit 13e616
			 */
Packit 13e616
			CL_ASSERT(count == 1);
Packit 13e616
Packit 13e616
			p_mtn->child_array[i] = OSM_MTREE_LEAF;
Packit 13e616
			p_wobj = (osm_mcast_work_obj_t *)
Packit 13e616
			    cl_qlist_remove_head(p_port_list);
Packit 13e616
Packit 13e616
			CL_ASSERT(cl_is_qlist_empty(p_port_list));
Packit 13e616
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"Found leaf for port 0x%016" PRIx64
Packit 13e616
				" on switch port %u\n",
Packit 13e616
				cl_ntoh64(osm_port_get_guid(p_wobj->p_port)),
Packit 13e616
				i);
Packit 13e616
			mcast_work_obj_delete(p_wobj);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	free(list_array);
Packit 13e616
Exit:
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return p_mtn;
Packit 13e616
}
Packit 13e616
Packit 13e616
static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
Packit 13e616
						     osm_mgrp_box_t * mbox)
Packit 13e616
{
Packit 13e616
	cl_qlist_t port_list;
Packit 13e616
	cl_qmap_t port_map;
Packit 13e616
	uint32_t num_ports;
Packit 13e616
	osm_switch_t *p_sw;
Packit 13e616
	ib_api_status_t status = IB_SUCCESS;
Packit 13e616
	uint8_t max_depth = 0;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   TO DO - for now, just blow away the old tree.
Packit 13e616
	   In the future we'll need to construct the tree based
Packit 13e616
	   on multicast forwarding table information if the user wants to
Packit 13e616
	   preserve existing multicast routes.
Packit 13e616
	 */
Packit 13e616
	osm_purge_mtree(sm, mbox);
Packit 13e616
Packit 13e616
	/* build the first "subset" containing all member ports */
Packit 13e616
	if (osm_mcast_make_port_list_and_map(&port_list, &port_map, mbox)) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A10: "
Packit 13e616
			"Insufficient memory to make port list\n");
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	num_ports = cl_qlist_count(&port_list);
Packit 13e616
	if (num_ports < 2) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"MLID 0x%X has %u members - nothing to do\n",
Packit 13e616
			mbox->mlid, num_ports);
Packit 13e616
		osm_mcast_drop_port_list(&port_list);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   This function builds the single spanning tree recursively.
Packit 13e616
	   At each stage, the ports to be reached are divided into
Packit 13e616
	   non-overlapping subsets of member ports that can be reached through
Packit 13e616
	   a given switch port.  Construction then moves down each
Packit 13e616
	   branch, and the process starts again with each branch computing
Packit 13e616
	   for its own subset of the member ports.
Packit 13e616
Packit 13e616
	   The maximum recursion depth is at worst the maximum hop count in the
Packit 13e616
	   subnet, which is spec limited to 64.
Packit 13e616
	 */
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Locate the switch around which to create the spanning
Packit 13e616
	   tree for this multicast group.
Packit 13e616
	 */
Packit 13e616
	p_sw = osm_mcast_mgr_find_root_switch(sm, &port_list);
Packit 13e616
	if (p_sw == NULL) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A08: "
Packit 13e616
			"Unable to locate a suitable switch for group 0x%X\n",
Packit 13e616
			mbox->mlid);
Packit 13e616
		osm_mcast_drop_port_list(&port_list);
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	mbox->root = mcast_mgr_branch(sm, mbox->mlid, p_sw, &port_list, 0, 0,
Packit 13e616
				      &max_depth);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Configured MLID 0x%X for %u ports, max tree depth = %u\n",
Packit 13e616
		mbox->mlid, num_ports, max_depth);
Packit 13e616
Exit:
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return status;
Packit 13e616
}
Packit 13e616
Packit 13e616
#if 0
Packit 13e616
/* unused */
Packit 13e616
void osm_mcast_mgr_set_table(osm_sm_t * sm, IN const osm_mgrp_t * p_mgrp,
Packit 13e616
			     IN const osm_mtree_node_t * p_mtn)
Packit 13e616
{
Packit 13e616
	uint8_t i;
Packit 13e616
	uint8_t max_children;
Packit 13e616
	osm_mtree_node_t *p_child_mtn;
Packit 13e616
	uint16_t mlid_ho;
Packit 13e616
	osm_mcast_tbl_t *p_tbl;
Packit 13e616
	osm_switch_t *p_sw;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp));
Packit 13e616
	p_sw = osm_mtree_node_get_switch_ptr(p_mtn);
Packit 13e616
Packit 13e616
	CL_ASSERT(p_sw);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Configuring MLID 0x%X on switch 0x%" PRIx64 "\n",
Packit 13e616
		mlid_ho, osm_node_get_node_guid(p_sw->p_node));
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   For every child of this tree node, set the corresponding
Packit 13e616
	   bit in the switch's mcast table.
Packit 13e616
	 */
Packit 13e616
	p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
	max_children = osm_mtree_node_get_max_children(p_mtn);
Packit 13e616
Packit 13e616
	CL_ASSERT(max_children <= osm_switch_get_num_ports(p_sw));
Packit 13e616
Packit 13e616
	osm_mcast_tbl_clear_mlid(p_tbl, mlid_ho);
Packit 13e616
Packit 13e616
	for (i = 0; i < max_children; i++) {
Packit 13e616
		p_child_mtn = osm_mtree_node_get_child(p_mtn, i);
Packit 13e616
		if (p_child_mtn == NULL)
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		osm_mcast_tbl_set(p_tbl, mlid_ho, i);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
}
Packit 13e616
#endif
Packit 13e616
Packit 13e616
static void mcast_mgr_clear(osm_sm_t * sm, uint16_t mlid)
Packit 13e616
{
Packit 13e616
	osm_switch_t *p_sw;
Packit 13e616
	cl_qmap_t *p_sw_tbl;
Packit 13e616
	osm_mcast_tbl_t *p_mcast_tbl;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	/* Walk the switches and clear the routing entries for this MLID. */
Packit 13e616
	p_sw_tbl = &sm->p_subn->sw_guid_tbl;
Packit 13e616
	p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
Packit 13e616
	while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
Packit 13e616
		p_mcast_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
		osm_mcast_tbl_clear_mlid(p_mcast_tbl, mlid);
Packit 13e616
		p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
}
Packit 13e616
Packit 13e616
#if 0
Packit 13e616
/* TO DO - make this real -- at least update spanning tree */
Packit 13e616
/**********************************************************************
Packit 13e616
   Lock must be held on entry.
Packit 13e616
**********************************************************************/
Packit 13e616
ib_api_status_t osm_mcast_mgr_process_single(osm_sm_t * sm,
Packit 13e616
					     IN ib_net16_t const mlid,
Packit 13e616
					     IN ib_net64_t const port_guid,
Packit 13e616
					     IN uint8_t const join_state)
Packit 13e616
{
Packit 13e616
	uint8_t port_num;
Packit 13e616
	uint16_t mlid_ho;
Packit 13e616
	ib_net64_t sw_guid;
Packit 13e616
	osm_port_t *p_port;
Packit 13e616
	osm_physp_t *p_physp;
Packit 13e616
	osm_physp_t *p_remote_physp;
Packit 13e616
	osm_node_t *p_remote_node;
Packit 13e616
	osm_mcast_tbl_t *p_mcast_tbl;
Packit 13e616
	ib_api_status_t status = IB_SUCCESS;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	CL_ASSERT(mlid);
Packit 13e616
	CL_ASSERT(port_guid);
Packit 13e616
Packit 13e616
	mlid_ho = cl_ntoh16(mlid);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
		"Attempting to add port 0x%" PRIx64 " to MLID 0x%X, "
Packit 13e616
		"\n\t\t\t\tjoin state = 0x%X\n",
Packit 13e616
		cl_ntoh64(port_guid), mlid_ho, join_state);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Acquire the Port object.
Packit 13e616
	 */
Packit 13e616
	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
Packit 13e616
	if (!p_port) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A01: "
Packit 13e616
			"Unable to acquire port object for 0x%" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(port_guid));
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	p_physp = p_port->p_physp;
Packit 13e616
	if (p_physp == NULL) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A05: "
Packit 13e616
			"Unable to acquire phsyical port object for 0x%" PRIx64
Packit 13e616
			"\n", cl_ntoh64(port_guid));
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	p_remote_physp = osm_physp_get_remote(p_physp);
Packit 13e616
	if (p_remote_physp == NULL) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A11: "
Packit 13e616
			"Unable to acquire remote phsyical port object "
Packit 13e616
			"for 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
Packit 13e616
Packit 13e616
	CL_ASSERT(p_remote_node);
Packit 13e616
Packit 13e616
	sw_guid = osm_node_get_node_guid(p_remote_node);
Packit 13e616
Packit 13e616
	if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A22: "
Packit 13e616
			"Remote node not a switch node 0x%" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(sw_guid));
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (!p_remote_node->sw) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A12: "
Packit 13e616
			"No switch object 0x%" PRIx64 "\n", cl_ntoh64(sw_guid));
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (osm_switch_is_in_mcast_tree(p_remote_node->sw, mlid_ho)) {
Packit 13e616
		/*
Packit 13e616
		   We're in luck. The switch attached to this port
Packit 13e616
		   is already in the multicast group, so we can just
Packit 13e616
		   add the specified port as a new leaf of the tree.
Packit 13e616
		 */
Packit 13e616
		if (join_state & (IB_JOIN_STATE_FULL | IB_JOIN_STATE_NON)) {
Packit 13e616
			/*
Packit 13e616
			   This node wants to receive multicast frames.
Packit 13e616
			   Get the switch port number to which the new member port
Packit 13e616
			   is attached, then configure this single mcast table.
Packit 13e616
			 */
Packit 13e616
			port_num = osm_physp_get_port_num(p_remote_physp);
Packit 13e616
			CL_ASSERT(port_num);
Packit 13e616
Packit 13e616
			p_mcast_tbl =
Packit 13e616
			    osm_switch_get_mcast_tbl_ptr(p_remote_node->sw);
Packit 13e616
			osm_mcast_tbl_set(p_mcast_tbl, mlid_ho, port_num);
Packit 13e616
		} else {
Packit 13e616
			if (join_state & IB_JOIN_STATE_SEND_ONLY)
Packit 13e616
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
					"Success.  Nothing to do for send"
Packit 13e616
					"only member\n");
Packit 13e616
			else {
Packit 13e616
				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A13: "
Packit 13e616
					"Unknown join state 0x%X\n",
Packit 13e616
					join_state);
Packit 13e616
				status = IB_ERROR;
Packit 13e616
				goto Exit;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	} else
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Unable to add port\n");
Packit 13e616
Packit 13e616
Exit:
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return status;
Packit 13e616
}
Packit 13e616
#endif
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
 Process the entire group.
Packit 13e616
 NOTE : The lock should be held externally!
Packit 13e616
 **********************************************************************/
Packit 13e616
static ib_api_status_t mcast_mgr_process_mlid(osm_sm_t * sm, uint16_t mlid)
Packit 13e616
{
Packit 13e616
	ib_api_status_t status = IB_SUCCESS;
Packit 13e616
	struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used;
Packit 13e616
	osm_mgrp_box_t *mbox;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
		"Processing multicast group with mlid 0x%X\n", mlid);
Packit 13e616
Packit 13e616
	/* Clear the multicast tables to start clean, then build
Packit 13e616
	   the spanning tree which sets the mcast table bits for each
Packit 13e616
	   port in the group. */
Packit 13e616
	mcast_mgr_clear(sm, mlid);
Packit 13e616
Packit 13e616
	mbox = osm_get_mbox_by_mlid(sm->p_subn, cl_hton16(mlid));
Packit 13e616
	if (mbox) {
Packit 13e616
		if (re && re->mcast_build_stree)
Packit 13e616
			status = re->mcast_build_stree(re->context, mbox);
Packit 13e616
		else
Packit 13e616
			status = mcast_mgr_build_spanning_tree(sm, mbox);
Packit 13e616
Packit 13e616
		if (status != IB_SUCCESS)
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: "
Packit 13e616
				"Unable to create spanning tree (%s) for mlid "
Packit 13e616
				"0x%x\n", ib_get_err_str(status), mlid);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return status;
Packit 13e616
}
Packit 13e616
Packit 13e616
static void mcast_mgr_set_mfttop(IN osm_sm_t * sm, IN osm_switch_t * p_sw)
Packit 13e616
{
Packit 13e616
	osm_node_t *p_node;
Packit 13e616
	osm_dr_path_t *p_path;
Packit 13e616
	osm_physp_t *p_physp;
Packit 13e616
	osm_mcast_tbl_t *p_tbl;
Packit 13e616
	osm_madw_context_t context;
Packit 13e616
	ib_api_status_t status;
Packit 13e616
	ib_switch_info_t si;
Packit 13e616
	ib_net16_t mcast_top;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	CL_ASSERT(p_sw);
Packit 13e616
Packit 13e616
	p_node = p_sw->p_node;
Packit 13e616
Packit 13e616
	CL_ASSERT(p_node);
Packit 13e616
Packit 13e616
	p_physp = osm_node_get_physp_ptr(p_node, 0);
Packit 13e616
	p_path = osm_physp_get_dr_path_ptr(p_physp);
Packit 13e616
	p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
Packit 13e616
	if (sm->p_subn->opt.use_mfttop &&
Packit 13e616
	    p_physp->port_info.capability_mask & IB_PORT_CAP_HAS_MCAST_FDB_TOP) {
Packit 13e616
		/*
Packit 13e616
		   Set the top of the multicast forwarding table.
Packit 13e616
		 */
Packit 13e616
		si = p_sw->switch_info;
Packit 13e616
		if (sm->p_subn->first_time_master_sweep == TRUE)
Packit 13e616
			mcast_top = cl_hton16(sm->mlids_init_max);
Packit 13e616
		else {
Packit 13e616
			if (p_tbl->max_block_in_use == -1)
Packit 13e616
				mcast_top = cl_hton16(IB_LID_MCAST_START_HO - 1);
Packit 13e616
			else
Packit 13e616
				mcast_top = cl_hton16(IB_LID_MCAST_START_HO +
Packit 13e616
						      (p_tbl->max_block_in_use + 1) * IB_MCAST_BLOCK_SIZE - 1);
Packit 13e616
		}
Packit 13e616
		if (mcast_top == si.mcast_top)
Packit 13e616
			return;
Packit 13e616
Packit 13e616
		si.mcast_top = mcast_top;
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Setting switch MFT top to MLID 0x%x\n",
Packit 13e616
			cl_ntoh16(si.mcast_top));
Packit 13e616
Packit 13e616
		context.si_context.light_sweep = FALSE;
Packit 13e616
		context.si_context.node_guid = osm_node_get_node_guid(p_node);
Packit 13e616
		context.si_context.set_method = TRUE;
Packit 13e616
		context.si_context.lft_top_change = FALSE;
Packit 13e616
Packit 13e616
		status = osm_req_set(sm, p_path, (uint8_t *) & si,
Packit 13e616
				     sizeof(si), IB_MAD_ATTR_SWITCH_INFO,
Packit 13e616
				     0, FALSE,
Packit 13e616
				     ib_port_info_get_m_key(&p_physp->port_info),
Packit 13e616
				     0, CL_DISP_MSGID_NONE, &context);
Packit 13e616
Packit 13e616
		if (status != IB_SUCCESS)
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A1B: "
Packit 13e616
				"Sending SwitchInfo attribute failed (%s)\n",
Packit 13e616
				ib_get_err_str(status));
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static int mcast_mgr_set_mftables(osm_sm_t * sm)
Packit 13e616
{
Packit 13e616
	cl_qmap_t *p_sw_tbl = &sm->p_subn->sw_guid_tbl;
Packit 13e616
	osm_switch_t *p_sw;
Packit 13e616
	osm_mcast_tbl_t *p_tbl;
Packit 13e616
	int block_notdone, ret = 0;
Packit 13e616
	int16_t block_num, max_block = -1;
Packit 13e616
Packit 13e616
	p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
Packit 13e616
	while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
Packit 13e616
		p_sw->mft_block_num = 0;
Packit 13e616
		p_sw->mft_position = 0;
Packit 13e616
		p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
		if (osm_mcast_tbl_get_max_block_in_use(p_tbl) > max_block)
Packit 13e616
			max_block = osm_mcast_tbl_get_max_block_in_use(p_tbl);
Packit 13e616
		mcast_mgr_set_mfttop(sm, p_sw);
Packit 13e616
		p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* Stripe the MFT blocks across the switches */
Packit 13e616
	for (block_num = 0; block_num <= max_block; block_num++) {
Packit 13e616
		block_notdone = 1;
Packit 13e616
		while (block_notdone) {
Packit 13e616
			block_notdone = 0;
Packit 13e616
			p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
Packit 13e616
			while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
Packit 13e616
				if (p_sw->mft_block_num == block_num) {
Packit 13e616
					block_notdone = 1;
Packit 13e616
					if (mcast_mgr_set_mft_block(sm, p_sw,
Packit 13e616
								    p_sw->mft_block_num,
Packit 13e616
								    p_sw->mft_position))
Packit 13e616
						ret = -1;
Packit 13e616
					p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
					if (++p_sw->mft_position > p_tbl->max_position) {
Packit 13e616
						p_sw->mft_position = 0;
Packit 13e616
						p_sw->mft_block_num++;
Packit 13e616
					}
Packit 13e616
				}
Packit 13e616
				p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return ret;
Packit 13e616
}
Packit 13e616
Packit 13e616
static int alloc_mfts(osm_sm_t * sm)
Packit 13e616
{
Packit 13e616
	int i;
Packit 13e616
	cl_map_item_t *item;
Packit 13e616
	osm_switch_t *p_sw;
Packit 13e616
Packit 13e616
	for (i = sm->p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO; i >= 0;
Packit 13e616
	     i--)
Packit 13e616
		if (sm->p_subn->mboxes[i])
Packit 13e616
			break;
Packit 13e616
	if (i < 0)
Packit 13e616
		return 0;
Packit 13e616
Packit 13e616
	/* Now, walk switches and (re)allocate multicast tables */
Packit 13e616
	for (item = cl_qmap_head(&sm->p_subn->sw_guid_tbl);
Packit 13e616
	     item != cl_qmap_end(&sm->p_subn->sw_guid_tbl);
Packit 13e616
	     item = cl_qmap_next(item)) {
Packit 13e616
		p_sw = (osm_switch_t *) item;
Packit 13e616
		if (osm_mcast_tbl_realloc(&p_sw->mcast_tbl, i))
Packit 13e616
			return -1;
Packit 13e616
	}
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
  This is the function that is invoked during idle time and sweep to
Packit 13e616
  handle the process request for mcast groups where join/leave/delete
Packit 13e616
  was required.
Packit 13e616
 **********************************************************************/
Packit 13e616
int osm_mcast_mgr_process(osm_sm_t * sm, boolean_t config_all)
Packit 13e616
{
Packit 13e616
	int ret = 0;
Packit 13e616
	unsigned i;
Packit 13e616
	unsigned max_mlid;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
Packit 13e616
Packit 13e616
	/* If there are no switches in the subnet we have nothing to do. */
Packit 13e616
	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"No switches in subnet. Nothing to do\n");
Packit 13e616
		goto exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (alloc_mfts(sm)) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR 0A09: alloc_mfts failed\n");
Packit 13e616
		ret = -1;
Packit 13e616
		goto exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	max_mlid = config_all ? sm->p_subn->max_mcast_lid_ho
Packit 13e616
			- IB_LID_MCAST_START_HO : sm->mlids_req_max;
Packit 13e616
	for (i = 0; i <= max_mlid; i++) {
Packit 13e616
		if (sm->mlids_req[i] ||
Packit 13e616
		    (config_all && sm->p_subn->mboxes[i])) {
Packit 13e616
			sm->mlids_req[i] = 0;
Packit 13e616
			mcast_mgr_process_mlid(sm, i + IB_LID_MCAST_START_HO);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	sm->mlids_req_max = 0;
Packit 13e616
Packit 13e616
	ret = mcast_mgr_set_mftables(sm);
Packit 13e616
Packit 13e616
	osm_dump_mcast_routes(sm->p_subn->p_osm);
Packit 13e616
Packit 13e616
exit:
Packit 13e616
	CL_PLOCK_RELEASE(sm->p_lock);
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return ret;
Packit 13e616
}