Blame opensm/osm_drop_mgr.c

Packit 13e616
/*
Packit 13e616
 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
Packit 13e616
 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
Packit 13e616
 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
Packit 13e616
 * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
Packit 13e616
 * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
Packit 13e616
 *
Packit 13e616
 * This software is available to you under a choice of one of two
Packit 13e616
 * licenses.  You may choose to be licensed under the terms of the GNU
Packit 13e616
 * General Public License (GPL) Version 2, available from the file
Packit 13e616
 * COPYING in the main directory of this source tree, or the
Packit 13e616
 * OpenIB.org BSD license below:
Packit 13e616
 *
Packit 13e616
 *     Redistribution and use in source and binary forms, with or
Packit 13e616
 *     without modification, are permitted provided that the following
Packit 13e616
 *     conditions are met:
Packit 13e616
 *
Packit 13e616
 *      - Redistributions of source code must retain the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer.
Packit 13e616
 *
Packit 13e616
 *      - Redistributions in binary form must reproduce the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer in the documentation and/or other materials
Packit 13e616
 *        provided with the distribution.
Packit 13e616
 *
Packit 13e616
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Packit 13e616
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Packit 13e616
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Packit 13e616
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
Packit 13e616
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
Packit 13e616
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
Packit 13e616
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
Packit 13e616
 * SOFTWARE.
Packit 13e616
 *
Packit 13e616
 */
Packit 13e616
Packit 13e616
/*
Packit 13e616
 * Abstract:
Packit 13e616
 *    Implementation of osm_drop_mgr_t.
Packit 13e616
 * This object represents the Drop Manager object.
Packit 13e616
 * This object is part of the opensm family of objects.
Packit 13e616
 */
Packit 13e616
Packit 13e616
#if HAVE_CONFIG_H
Packit 13e616
#  include <config.h>
Packit 13e616
#endif				/* HAVE_CONFIG_H */
Packit 13e616
Packit 13e616
#include <stdlib.h>
Packit 13e616
#include <string.h>
Packit 13e616
#include <iba/ib_types.h>
Packit 13e616
#include <complib/cl_qmap.h>
Packit 13e616
#include <complib/cl_passivelock.h>
Packit 13e616
#include <complib/cl_debug.h>
Packit 13e616
#include <complib/cl_ptr_vector.h>
Packit 13e616
#include <opensm/osm_file_ids.h>
Packit 13e616
#define FILE_ID OSM_FILE_DROP_MGR_C
Packit 13e616
#include <opensm/osm_sm.h>
Packit 13e616
#include <opensm/osm_router.h>
Packit 13e616
#include <opensm/osm_switch.h>
Packit 13e616
#include <opensm/osm_node.h>
Packit 13e616
#include <opensm/osm_guid.h>
Packit 13e616
#include <opensm/osm_helper.h>
Packit 13e616
#include <opensm/osm_multicast.h>
Packit 13e616
#include <opensm/osm_remote_sm.h>
Packit 13e616
#include <opensm/osm_inform.h>
Packit 13e616
#include <opensm/osm_ucast_mgr.h>
Packit 13e616
Packit 13e616
static void drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid)
Packit 13e616
{
Packit 13e616
	osm_router_t *p_rtr;
Packit 13e616
	cl_qmap_t *p_rtr_guid_tbl;
Packit 13e616
Packit 13e616
	p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl;
Packit 13e616
	p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid);
Packit 13e616
	if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Cleaned router for port guid 0x%016" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(portguid));
Packit 13e616
		osm_router_delete(&p_rtr);
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp)
Packit 13e616
{
Packit 13e616
	osm_physp_t *p_remote_physp;
Packit 13e616
	osm_port_t *p_remote_port;
Packit 13e616
Packit 13e616
	p_remote_physp = osm_physp_get_remote(p_physp);
Packit 13e616
	if (p_remote_physp) {
Packit 13e616
		p_remote_port = osm_get_port_by_guid(sm->p_subn,
Packit 13e616
						     p_remote_physp->port_guid);
Packit 13e616
Packit 13e616
		if (p_remote_port) {
Packit 13e616
			/* Let's check if this is a case of link that is lost
Packit 13e616
			   (both ports weren't recognized), or a "hiccup" in the
Packit 13e616
			   subnet - in which case the remote port was
Packit 13e616
			   recognized, and its state is ACTIVE.
Packit 13e616
			   If this is just a "hiccup" - force a heavy sweep in
Packit 13e616
			   the next sweep. We don't want to lose that part of
Packit 13e616
			   the subnet. */
Packit 13e616
			if (p_remote_port->discovery_count &&
Packit 13e616
			    osm_physp_get_port_state(p_remote_physp) ==
Packit 13e616
			    IB_LINK_ACTIVE) {
Packit 13e616
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
					"Forcing new heavy sweep. Remote "
Packit 13e616
					"port 0x%016" PRIx64 " port num: %u "
Packit 13e616
					"was recognized in ACTIVE state\n",
Packit 13e616
					cl_ntoh64(p_remote_physp->port_guid),
Packit 13e616
					p_remote_physp->port_num);
Packit 13e616
				sm->p_subn->force_heavy_sweep = TRUE;
Packit 13e616
			}
Packit 13e616
Packit 13e616
			/* If the remote node is ca or router - need to remove
Packit 13e616
			   the remote port, since it is no longer reachable.
Packit 13e616
			   This can be done if we reset the discovery count
Packit 13e616
			   of the remote port. */
Packit 13e616
			if (!p_remote_physp->p_node->sw &&
Packit 13e616
                            p_remote_physp->port_guid != sm->p_subn->sm_port_guid) {
Packit 13e616
				p_remote_port->discovery_count = 0;
Packit 13e616
				OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
					"Resetting discovery count of node: "
Packit 13e616
					"0x%016" PRIx64 " port num:%u\n",
Packit 13e616
					cl_ntoh64(osm_node_get_node_guid
Packit 13e616
						  (p_remote_physp->p_node)),
Packit 13e616
					p_remote_physp->port_num);
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Unlinking local node 0x%016" PRIx64 ", port %u"
Packit 13e616
			"\n\t\t\t\tand remote node 0x%016" PRIx64
Packit 13e616
			", port %u\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
Packit 13e616
			p_physp->port_num,
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid
Packit 13e616
				  (p_remote_physp->p_node)),
Packit 13e616
			p_remote_physp->port_num);
Packit 13e616
Packit 13e616
		if (sm->ucast_mgr.cache_valid)
Packit 13e616
			osm_ucast_cache_add_link(&sm->ucast_mgr, p_physp,
Packit 13e616
						 p_remote_physp);
Packit 13e616
Packit 13e616
		osm_physp_unlink(p_physp, p_remote_physp);
Packit 13e616
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* Make port as undiscovered */
Packit 13e616
	p_physp->p_node->physp_discovered[p_physp->port_num] = 0;
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
		"Clearing node 0x%016" PRIx64 " physical port number %u\n",
Packit 13e616
		cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)),
Packit 13e616
		p_physp->port_num);
Packit 13e616
Packit 13e616
	osm_physp_destroy(p_physp);
Packit 13e616
}
Packit 13e616
Packit 13e616
static void drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
Packit 13e616
{
Packit 13e616
	ib_net64_t port_guid;
Packit 13e616
	osm_port_t *p_port_check;
Packit 13e616
	cl_qmap_t *p_alias_guid_tbl;
Packit 13e616
	cl_qmap_t *p_sm_guid_tbl;
Packit 13e616
	osm_mcm_port_t *mcm_port;
Packit 13e616
	cl_ptr_vector_t *p_port_lid_tbl;
Packit 13e616
	uint16_t min_lid_ho;
Packit 13e616
	uint16_t max_lid_ho;
Packit 13e616
	uint16_t lid_ho;
Packit 13e616
	osm_node_t *p_node;
Packit 13e616
	osm_remote_sm_t *p_sm;
Packit 13e616
	osm_alias_guid_t *p_alias_guid, *p_alias_guid_check;
Packit 13e616
	osm_guidinfo_work_obj_t *wobj;
Packit 13e616
	cl_list_item_t *item, *next_item;
Packit 13e616
	ib_gid_t port_gid;
Packit 13e616
	ib_mad_notice_attr_t notice;
Packit 13e616
	ib_api_status_t status;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	port_guid = osm_port_get_guid(p_port);
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid));
Packit 13e616
Packit 13e616
	p_port_check =
Packit 13e616
	    (osm_port_t *) cl_qmap_get(&sm->p_subn->port_guid_tbl, port_guid);
Packit 13e616
	if (p_port_check != p_port) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: "
Packit 13e616
			"Port 0x%016" PRIx64 " not in guid table\n",
Packit 13e616
			cl_ntoh64(port_guid));
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* issue a notice - trap 65 (SM_GID_OUT_OF_SERVICE_TRAP) */
Packit 13e616
	/* details of the notice */
Packit 13e616
	notice.generic_type = 0x80 | IB_NOTICE_TYPE_SUBN_MGMT;	/* is generic subn mgt type */
Packit 13e616
	ib_notice_set_prod_type_ho(&notice, 4);	/* A class manager generator */
Packit 13e616
	/* endport ceases to be reachable */
Packit 13e616
	notice.g_or_v.generic.trap_num = CL_HTON16(SM_GID_OUT_OF_SERVICE_TRAP); /* 65 */
Packit 13e616
	/* The sm_base_lid is saved in network order already. */
Packit 13e616
	notice.issuer_lid = sm->p_subn->sm_base_lid;
Packit 13e616
	/* following C14-72.1.2 and table 119 p725 */
Packit 13e616
	/* we need to provide the GID */
Packit 13e616
	port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
Packit 13e616
	port_gid.unicast.interface_id = port_guid;
Packit 13e616
	memcpy(&(notice.data_details.ntc_64_67.gid),
Packit 13e616
	       &(port_gid), sizeof(ib_gid_t));
Packit 13e616
Packit 13e616
	/* According to page 653 - the issuer gid in this case of trap
Packit 13e616
	   is the SM gid, since the SM is the initiator of this trap. */
Packit 13e616
	notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
Packit 13e616
	notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid;
Packit 13e616
Packit 13e616
	status = osm_report_notice(sm->p_log, sm->p_subn, &notice);
Packit 13e616
	if (status != IB_SUCCESS) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: "
Packit 13e616
			"Error sending trap reports (%s)\n",
Packit 13e616
			ib_get_err_str(status));
Packit 13e616
	}
Packit 13e616
Packit 13e616
	next_item = cl_qlist_head(&sm->p_subn->alias_guid_list);
Packit 13e616
	while (next_item != cl_qlist_end(&sm->p_subn->alias_guid_list)) {
Packit 13e616
		item = next_item;
Packit 13e616
		next_item = cl_qlist_next(item);
Packit 13e616
		wobj = cl_item_obj(item, wobj, list_item);
Packit 13e616
		if (wobj->p_port == p_port) {
Packit 13e616
			cl_qlist_remove_item(&sm->p_subn->alias_guid_list,
Packit 13e616
					     &wobj->list_item);
Packit 13e616
			osm_guid_work_obj_delete(wobj);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	while (!cl_is_qlist_empty(&p_port->mcm_list)) {
Packit 13e616
		mcm_port = cl_item_obj(cl_qlist_head(&p_port->mcm_list),
Packit 13e616
				       mcm_port, list_item);
Packit 13e616
		osm_mgrp_delete_port(sm->p_subn, sm->p_log, mcm_port->mgrp,
Packit 13e616
				     p_port);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	p_alias_guid_tbl = &sm->p_subn->alias_port_guid_tbl;
Packit 13e616
	p_alias_guid_check = (osm_alias_guid_t *) cl_qmap_head(p_alias_guid_tbl);
Packit 13e616
	while (p_alias_guid_check != (osm_alias_guid_t *) cl_qmap_end(p_alias_guid_tbl)) {
Packit 13e616
		if (p_alias_guid_check->p_base_port == p_port)
Packit 13e616
			p_alias_guid = p_alias_guid_check;
Packit 13e616
		else
Packit 13e616
			p_alias_guid = NULL;
Packit 13e616
		p_alias_guid_check = (osm_alias_guid_t *) cl_qmap_next(&p_alias_guid_check->map_item);
Packit 13e616
		if (p_alias_guid) {
Packit 13e616
			cl_qmap_remove_item(p_alias_guid_tbl,
Packit 13e616
					    &p_alias_guid->map_item);
Packit 13e616
			osm_alias_guid_delete(&p_alias_guid);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	cl_qmap_remove(&sm->p_subn->port_guid_tbl, port_guid);
Packit 13e616
Packit 13e616
	p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl;
Packit 13e616
	p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid);
Packit 13e616
	if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) {
Packit 13e616
		/* need to remove this item */
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Cleaned SM for port guid 0x%016" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(port_guid));
Packit 13e616
		free(p_sm);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	drop_mgr_remove_router(sm, port_guid);
Packit 13e616
Packit 13e616
	osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Clearing abandoned LID range [%u,%u]\n",
Packit 13e616
		min_lid_ho, max_lid_ho);
Packit 13e616
Packit 13e616
	p_port_lid_tbl = &sm->p_subn->port_lid_tbl;
Packit 13e616
	for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++)
Packit 13e616
		cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL);
Packit 13e616
Packit 13e616
	drop_mgr_clean_physp(sm, p_port->p_physp);
Packit 13e616
Packit 13e616
	/* Delete event forwarding subscriptions */
Packit 13e616
	if (sm->p_subn->opt.drop_event_subscriptions) {
Packit 13e616
		if (osm_infr_remove_subscriptions(sm->p_subn, sm->p_log, port_guid)
Packit 13e616
		    == CL_SUCCESS)
Packit 13e616
			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			    "Removed event subscriptions for port 0x%016" PRIx64 "\n",
Packit 13e616
			    cl_ntoh64(port_guid));
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* initialize the p_node - may need to get node_desc later */
Packit 13e616
	p_node = p_port->p_node;
Packit 13e616
Packit 13e616
	osm_port_delete(&p_port);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_INFO,
Packit 13e616
		"Removed port with GUID:0x%016" PRIx64
Packit 13e616
		" LID range [%u, %u] of node:%s\n",
Packit 13e616
		cl_ntoh64(port_gid.unicast.interface_id),
Packit 13e616
		min_lid_ho, max_lid_ho,
Packit 13e616
		p_node ? p_node->print_desc : "UNKNOWN");
Packit 13e616
Packit 13e616
Exit:
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
}
Packit 13e616
Packit 13e616
static void drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node)
Packit 13e616
{
Packit 13e616
	osm_switch_t *p_sw;
Packit 13e616
	cl_qmap_t *p_sw_guid_tbl;
Packit 13e616
	ib_net64_t node_guid;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	node_guid = osm_node_get_node_guid(p_node);
Packit 13e616
	p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl;
Packit 13e616
Packit 13e616
	p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid);
Packit 13e616
	if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: "
Packit 13e616
			"Node 0x%016" PRIx64 " not in switch table\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(p_node)));
Packit 13e616
	} else {
Packit 13e616
		p_node->sw = NULL;
Packit 13e616
		osm_switch_delete(&p_sw);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
}
Packit 13e616
Packit 13e616
static boolean_t drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node)
Packit 13e616
{
Packit 13e616
	osm_physp_t *p_physp;
Packit 13e616
	osm_port_t *p_port;
Packit 13e616
	osm_node_t *p_node_check;
Packit 13e616
	uint32_t port_num;
Packit 13e616
	uint32_t max_ports;
Packit 13e616
	ib_net64_t port_guid;
Packit 13e616
	boolean_t return_val = FALSE;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Unreachable node 0x%016" PRIx64 "\n",
Packit 13e616
		cl_ntoh64(osm_node_get_node_guid(p_node)));
Packit 13e616
Packit 13e616
	if (sm->ucast_mgr.cache_valid)
Packit 13e616
		osm_ucast_cache_add_node(&sm->ucast_mgr, p_node);
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Delete all the logical and physical port objects
Packit 13e616
	   associated with this node.
Packit 13e616
	 */
Packit 13e616
	max_ports = osm_node_get_num_physp(p_node);
Packit 13e616
	for (port_num = 0; port_num < max_ports; port_num++) {
Packit 13e616
		p_physp = osm_node_get_physp_ptr(p_node, port_num);
Packit 13e616
		if (p_physp) {
Packit 13e616
			port_guid = osm_physp_get_port_guid(p_physp);
Packit 13e616
Packit 13e616
			p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
Packit 13e616
Packit 13e616
			if (p_port)
Packit 13e616
				drop_mgr_remove_port(sm, p_port);
Packit 13e616
			else
Packit 13e616
				drop_mgr_clean_physp(sm, p_physp);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return_val = TRUE;
Packit 13e616
Packit 13e616
	if (p_node->sw)
Packit 13e616
		drop_mgr_remove_switch(sm, p_node);
Packit 13e616
Packit 13e616
	p_node_check =
Packit 13e616
	    (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl,
Packit 13e616
					  osm_node_get_node_guid(p_node));
Packit 13e616
	if (p_node_check != p_node) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: "
Packit 13e616
			"Node 0x%016" PRIx64 " not in guid table\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(p_node)));
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* free memory allocated to node */
Packit 13e616
	osm_node_delete(&p_node);
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return return_val;
Packit 13e616
}
Packit 13e616
Packit 13e616
static void drop_mgr_check_switch_node(osm_sm_t * sm, IN osm_node_t * p_node)
Packit 13e616
{
Packit 13e616
	ib_net64_t node_guid;
Packit 13e616
	osm_physp_t *p_physp, *p_remote_physp;
Packit 13e616
	osm_node_t *p_remote_node;
Packit 13e616
	osm_port_t *p_port;
Packit 13e616
	ib_net64_t port_guid;
Packit 13e616
	uint8_t port_num, remote_port_num;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	node_guid = osm_node_get_node_guid(p_node);
Packit 13e616
Packit 13e616
	/* Make sure we have a switch object for this node */
Packit 13e616
	if (!p_node->sw) {
Packit 13e616
		/* We do not have switch info for this node */
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Node 0x%016" PRIx64 " no switch in table\n",
Packit 13e616
			cl_ntoh64(node_guid));
Packit 13e616
Packit 13e616
		drop_mgr_process_node(sm, p_node);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* Make sure we have a port object for port zero */
Packit 13e616
	p_physp = osm_node_get_physp_ptr(p_node, 0);
Packit 13e616
	if (!p_physp) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Node 0x%016" PRIx64 " no valid physical port 0\n",
Packit 13e616
			cl_ntoh64(node_guid));
Packit 13e616
Packit 13e616
		drop_mgr_process_node(sm, p_node);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	port_guid = osm_physp_get_port_guid(p_physp);
Packit 13e616
Packit 13e616
	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
Packit 13e616
Packit 13e616
	if (!p_port) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Node 0x%016" PRIx64 " has no port object\n",
Packit 13e616
			cl_ntoh64(node_guid));
Packit 13e616
Packit 13e616
		drop_mgr_process_node(sm, p_node);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (!p_node->physp_discovered[0]) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Node 0x%016" PRIx64 " port has discovery count zero\n",
Packit 13e616
			cl_ntoh64(node_guid));
Packit 13e616
Packit 13e616
		drop_mgr_process_node(sm, p_node);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	 * Unlink all ports that havn't been discovered during the last sweep.
Packit 13e616
	 * Optimization: Skip the check if discovered all the ports of the switch.
Packit 13e616
	 */
Packit 13e616
	if (p_port->discovery_count < p_node->physp_tbl_size) {
Packit 13e616
		for (port_num = 1; port_num < p_node->physp_tbl_size; port_num++) {
Packit 13e616
			if (!p_node->physp_discovered[port_num]) {
Packit 13e616
				p_physp = osm_node_get_physp_ptr(p_node, port_num);
Packit 13e616
				if (!p_physp)
Packit 13e616
					continue;
Packit 13e616
				p_remote_physp = osm_physp_get_remote(p_physp);
Packit 13e616
				if (!p_remote_physp)
Packit 13e616
					continue;
Packit 13e616
Packit 13e616
				p_remote_node =
Packit 13e616
				    osm_physp_get_node_ptr(p_remote_physp);
Packit 13e616
				remote_port_num =
Packit 13e616
				    osm_physp_get_port_num(p_remote_physp);
Packit 13e616
Packit 13e616
				OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
					"Unlinking local node 0x%" PRIx64
Packit 13e616
					", port %u"
Packit 13e616
					"\n\t\t\t\tand remote node 0x%" PRIx64
Packit 13e616
					", port %u due to missing PortInfo\n",
Packit 13e616
					cl_ntoh64(osm_node_get_node_guid
Packit 13e616
						  (p_node)), port_num,
Packit 13e616
					cl_ntoh64(osm_node_get_node_guid
Packit 13e616
						  (p_remote_node)),
Packit 13e616
					remote_port_num);
Packit 13e616
Packit 13e616
				if (sm->ucast_mgr.cache_valid)
Packit 13e616
					osm_ucast_cache_add_link(&sm->ucast_mgr,
Packit 13e616
								 p_physp,
Packit 13e616
								 p_remote_physp);
Packit 13e616
Packit 13e616
				osm_node_unlink(p_node, (uint8_t) port_num,
Packit 13e616
						p_remote_node,
Packit 13e616
						(uint8_t) remote_port_num);
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Exit:
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return;
Packit 13e616
}
Packit 13e616
Packit 13e616
void osm_drop_mgr_process(osm_sm_t * sm)
Packit 13e616
{
Packit 13e616
	cl_qmap_t *p_node_guid_tbl, *p_port_guid_tbl;
Packit 13e616
	osm_port_t *p_port, *p_next_port;
Packit 13e616
	osm_node_t *p_node, *p_next_node;
Packit 13e616
	int max_ports, port_num;
Packit 13e616
	osm_physp_t *p_physp;
Packit 13e616
	ib_net64_t port_guid;
Packit 13e616
Packit 13e616
	CL_ASSERT(sm);
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	p_node_guid_tbl = &sm->p_subn->node_guid_tbl;
Packit 13e616
	p_port_guid_tbl = &sm->p_subn->port_guid_tbl;
Packit 13e616
Packit 13e616
	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
Packit 13e616
Packit 13e616
	p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
Packit 13e616
	while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
Packit 13e616
		p_node = p_next_node;
Packit 13e616
		p_next_node =
Packit 13e616
		    (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
Packit 13e616
Packit 13e616
		CL_ASSERT(cl_qmap_key(&p_node->map_item) ==
Packit 13e616
			  osm_node_get_node_guid(p_node));
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Checking node 0x%016" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(p_node)));
Packit 13e616
Packit 13e616
		/*
Packit 13e616
		   Check if this node was discovered during the last sweep.
Packit 13e616
		   If not, it is unreachable in the current subnet, and
Packit 13e616
		   should therefore be removed from the subnet object.
Packit 13e616
		 */
Packit 13e616
		if (p_node->discovery_count == 0)
Packit 13e616
			drop_mgr_process_node(sm, p_node);
Packit 13e616
		else {
Packit 13e616
			/*
Packit 13e616
			 * We want to preserve the configured pkey indexes,
Packit 13e616
			 * so if we don't receive GetResp P_KeyTable for some block,
Packit 13e616
			 * do the following:
Packit 13e616
			 *   1. Drop node if the node is sw and got timeout for port 0.
Packit 13e616
			 *   2. Drop node if node is HCA/RTR.
Packit 13e616
			 *   3. Drop only physp if got timeout for sw when the port isn't 0.
Packit 13e616
			 * We'll set error during initialization in order to
Packit 13e616
			 * cause an immediate heavy sweep and try to get the
Packit 13e616
			 * configured P_KeyTable again.
Packit 13e616
			 */
Packit 13e616
			if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH)
Packit 13e616
				port_num = 0;
Packit 13e616
			else
Packit 13e616
				port_num = 1;
Packit 13e616
			max_ports = osm_node_get_num_physp(p_node);
Packit 13e616
			for (; port_num < max_ports; port_num++) {
Packit 13e616
				p_physp = osm_node_get_physp_ptr(p_node, port_num);
Packit 13e616
				if (!p_physp || p_physp->pkeys.rcv_blocks_cnt == 0)
Packit 13e616
					continue;
Packit 13e616
				p_physp->pkeys.rcv_blocks_cnt = 0;
Packit 13e616
				p_physp->need_update = 2;
Packit 13e616
				sm->p_subn->subnet_initialization_error = TRUE;
Packit 13e616
				port_guid = osm_physp_get_port_guid(p_physp);
Packit 13e616
				p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
Packit 13e616
				CL_ASSERT(p_port);
Packit 13e616
				if (p_node->physp_discovered[port_num]) {
Packit 13e616
					p_node->physp_discovered[port_num] = 0;
Packit 13e616
					p_port->discovery_count--;
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/*
Packit 13e616
	   Go over all the nodes. If the node is a switch - make sure
Packit 13e616
	   there is also a switch record for it, and a portInfo record for
Packit 13e616
	   port zero of of the node.
Packit 13e616
	   If not - this means that there was some error in getting the data
Packit 13e616
	   of this node. Drop the node.
Packit 13e616
	 */
Packit 13e616
	p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
Packit 13e616
	while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) {
Packit 13e616
		p_node = p_next_node;
Packit 13e616
		p_next_node =
Packit 13e616
		    (osm_node_t *) cl_qmap_next(&p_next_node->map_item);
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Checking full discovery of node 0x%016" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(p_node)));
Packit 13e616
Packit 13e616
		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH)
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		/* We are handling a switch node */
Packit 13e616
		drop_mgr_check_switch_node(sm, p_node);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl);
Packit 13e616
	while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) {
Packit 13e616
		p_port = p_next_port;
Packit 13e616
		p_next_port =
Packit 13e616
		    (osm_port_t *) cl_qmap_next(&p_next_port->map_item);
Packit 13e616
Packit 13e616
		CL_ASSERT(cl_qmap_key(&p_port->map_item) ==
Packit 13e616
			  osm_port_get_guid(p_port));
Packit 13e616
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Checking port 0x%016" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(osm_port_get_guid(p_port)));
Packit 13e616
Packit 13e616
		/*
Packit 13e616
		   If the port is unreachable, remove it from the guid table.
Packit 13e616
		 */
Packit 13e616
		if (p_port->discovery_count == 0)
Packit 13e616
			drop_mgr_remove_port(sm, p_port);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	CL_PLOCK_RELEASE(sm->p_lock);
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
}