Blame opensm/osm_ucast_dfsssp.c

Packit 13e616
/*
Packit 13e616
 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
Packit 13e616
 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved.
Packit 13e616
 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
Packit 13e616
 * Copyright (c) 2009-2015 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved.
Packit 13e616
 * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved.
Packit 13e616
 *
Packit 13e616
 * This software is available to you under a choice of one of two
Packit 13e616
 * licenses.  You may choose to be licensed under the terms of the GNU
Packit 13e616
 * General Public License (GPL) Version 2, available from the file
Packit 13e616
 * COPYING in the main directory of this source tree, or the
Packit 13e616
 * OpenIB.org BSD license below:
Packit 13e616
 *
Packit 13e616
 *     Redistribution and use in source and binary forms, with or
Packit 13e616
 *     without modification, are permitted provided that the following
Packit 13e616
 *     conditions are met:
Packit 13e616
 *
Packit 13e616
 *      - Redistributions of source code must retain the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer.
Packit 13e616
 *
Packit 13e616
 *      - Redistributions in binary form must reproduce the above
Packit 13e616
 *        copyright notice, this list of conditions and the following
Packit 13e616
 *        disclaimer in the documentation and/or other materials
Packit 13e616
 *        provided with the distribution.
Packit 13e616
 *
Packit 13e616
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
Packit 13e616
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
Packit 13e616
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
Packit 13e616
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
Packit 13e616
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
Packit 13e616
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
Packit 13e616
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
Packit 13e616
 * SOFTWARE.
Packit 13e616
 *
Packit 13e616
 */
Packit 13e616
Packit 13e616
/*
Packit 13e616
 * Abstract:
Packit 13e616
 *    Implementation of OpenSM (deadlock-free) single-source-shortest-path routing
Packit 13e616
 *    (with dijkstra algorithm)
Packit 13e616
 */
Packit 13e616
Packit 13e616
#if HAVE_CONFIG_H
Packit 13e616
#  include <config.h>
Packit 13e616
#endif				/* HAVE_CONFIG_H */
Packit 13e616
Packit 13e616
#include <stdio.h>
Packit 13e616
#include <stdlib.h>
Packit 13e616
#include <string.h>
Packit 13e616
#include <complib/cl_heap.h>
Packit 13e616
#include <opensm/osm_file_ids.h>
Packit 13e616
#define FILE_ID OSM_FILE_UCAST_DFSSSP_C
Packit 13e616
#include <opensm/osm_ucast_mgr.h>
Packit 13e616
#include <opensm/osm_opensm.h>
Packit 13e616
#include <opensm/osm_node.h>
Packit 13e616
#include <opensm/osm_multicast.h>
Packit 13e616
#include <opensm/osm_mcast_mgr.h>
Packit 13e616
Packit 13e616
/* "infinity" for dijkstra */
Packit 13e616
#define INF      0x7FFFFFFF
Packit 13e616
Packit 13e616
enum {
Packit 13e616
	UNDISCOVERED = 0,
Packit 13e616
	DISCOVERED
Packit 13e616
};
Packit 13e616
Packit 13e616
enum {
Packit 13e616
	UNKNOWN = 0,
Packit 13e616
	GRAY,
Packit 13e616
	BLACK,
Packit 13e616
};
Packit 13e616
Packit 13e616
typedef struct link {
Packit 13e616
	uint64_t guid;		/* guid of the neighbor behind the link */
Packit 13e616
	uint32_t from;		/* base_index in the adjazenz list (start of the link) */
Packit 13e616
	uint8_t from_port;	/* port on the base_side (needed for weight update to identify the correct link for multigraphs) */
Packit 13e616
	uint32_t to;		/* index of the neighbor in the adjazenz list (end of the link) */
Packit 13e616
	uint8_t to_port;	/* port on the side of the neighbor (needed for the LFT) */
Packit 13e616
	uint64_t weight;	/* link weight */
Packit 13e616
	struct link *next;
Packit 13e616
} link_t;
Packit 13e616
Packit 13e616
typedef struct vertex {
Packit 13e616
	/* informations of the fabric */
Packit 13e616
	uint64_t guid;
Packit 13e616
	uint16_t lid;		/* for lft filling */
Packit 13e616
	uint32_t num_hca;	/* numbers of Hca/LIDs on the switch, for weight calculation */
Packit 13e616
	link_t *links;
Packit 13e616
	uint8_t hops;
Packit 13e616
	/* for dijkstra routing */
Packit 13e616
	link_t *used_link;	/* link between the vertex discovered before and this vertex */
Packit 13e616
	uint64_t distance;	/* distance from source to this vertex */
Packit 13e616
	uint8_t state;
Packit 13e616
	/* for the d-ary heap */
Packit 13e616
	size_t heap_index;
Packit 13e616
	/* for LFT writing and debug */
Packit 13e616
	osm_switch_t *sw;	/* selfpointer */
Packit 13e616
	boolean_t dropped;	/* indicate dropped switches (w/ ucast cache) */
Packit 13e616
} vertex_t;
Packit 13e616
Packit 13e616
typedef struct vltable {
Packit 13e616
	uint64_t num_lids;	/* size of the lids array */
Packit 13e616
	uint16_t *lids;		/* sorted array of all lids in the subnet */
Packit 13e616
	uint8_t *vls;		/* matrix form assignment lid X lid -> virtual lane */
Packit 13e616
} vltable_t;
Packit 13e616
Packit 13e616
typedef struct cdg_link {
Packit 13e616
	struct cdg_node *node;
Packit 13e616
	uint32_t num_pairs;	/* number of src->dest pairs incremented in path adding step */
Packit 13e616
	uint32_t max_len;	/* length of the srcdest array */
Packit 13e616
	uint32_t removed;	/* number of pairs removed in path deletion step */
Packit 13e616
	uint32_t *srcdest_pairs;
Packit 13e616
	struct cdg_link *next;
Packit 13e616
} cdg_link_t;
Packit 13e616
Packit 13e616
/* struct for a node of a binary tree with additional parent pointer */
Packit 13e616
typedef struct cdg_node {
Packit 13e616
	uint64_t channelID;	/* unique key consist of src lid + port + dest lid + port */
Packit 13e616
	cdg_link_t *linklist;	/* edges to adjazent nodes */
Packit 13e616
	uint8_t status;		/* node status in cycle search to avoid recursive function */
Packit 13e616
	uint8_t visited;	/* needed to traverse the binary tree */
Packit 13e616
	struct cdg_node *pre;	/* to save the path in cycle detection algorithm */
Packit 13e616
	struct cdg_node *left, *right, *parent;
Packit 13e616
} cdg_node_t;
Packit 13e616
Packit 13e616
typedef struct dfsssp_context {
Packit 13e616
	osm_routing_engine_type_t routing_type;
Packit 13e616
	osm_ucast_mgr_t *p_mgr;
Packit 13e616
	vertex_t *adj_list;
Packit 13e616
	uint32_t adj_list_size;
Packit 13e616
	vltable_t *srcdest2vl_table;
Packit 13e616
	uint8_t *vl_split_count;
Packit 13e616
} dfsssp_context_t;
Packit 13e616
Packit 13e616
/**************** set initial values for structs **********************
Packit 13e616
 **********************************************************************/
Packit 13e616
static inline void set_default_link(link_t * link)
Packit 13e616
{
Packit 13e616
	link->guid = 0;
Packit 13e616
	link->from = 0;
Packit 13e616
	link->from_port = 0;
Packit 13e616
	link->to = 0;
Packit 13e616
	link->to_port = 0;
Packit 13e616
	link->weight = 0;
Packit 13e616
	link->next = NULL;
Packit 13e616
}
Packit 13e616
Packit 13e616
static inline void set_default_vertex(vertex_t * vertex)
Packit 13e616
{
Packit 13e616
	vertex->guid = 0;
Packit 13e616
	vertex->lid = 0;
Packit 13e616
	vertex->num_hca = 0;
Packit 13e616
	vertex->links = NULL;
Packit 13e616
	vertex->hops = 0;
Packit 13e616
	vertex->used_link = NULL;
Packit 13e616
	vertex->distance = 0;
Packit 13e616
	vertex->state = UNDISCOVERED;
Packit 13e616
	vertex->heap_index = 0;
Packit 13e616
	vertex->sw = NULL;
Packit 13e616
	vertex->dropped = FALSE;
Packit 13e616
}
Packit 13e616
Packit 13e616
static inline void set_default_cdg_node(cdg_node_t * node)
Packit 13e616
{
Packit 13e616
	node->channelID = 0;
Packit 13e616
	node->linklist = NULL;
Packit 13e616
	node->status = UNKNOWN;
Packit 13e616
	node->visited = 0;
Packit 13e616
	node->pre = NULL;
Packit 13e616
	node->left = NULL;
Packit 13e616
	node->right = NULL;
Packit 13e616
	node->parent = NULL;
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
 **********************************************************************/
Packit 13e616
Packit 13e616
/************ helper functions to save src/dest X vl combination ******
Packit 13e616
 **********************************************************************/
Packit 13e616
/* compare function of two lids for stdlib qsort */
Packit 13e616
static int cmp_lids(const void *l1, const void *l2)
Packit 13e616
{
Packit 13e616
	ib_net16_t lid1 = *((ib_net16_t *) l1), lid2 = *((ib_net16_t *) l2);
Packit 13e616
Packit 13e616
	if (lid1 < lid2)
Packit 13e616
		return -1;
Packit 13e616
	else if (lid1 > lid2)
Packit 13e616
		return 1;
Packit 13e616
	else
Packit 13e616
		return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* use stdlib to sort the lid array */
Packit 13e616
static inline void vltable_sort_lids(vltable_t * vltable)
Packit 13e616
{
Packit 13e616
	qsort(vltable->lids, vltable->num_lids, sizeof(ib_net16_t), cmp_lids);
Packit 13e616
}
Packit 13e616
Packit 13e616
/* use stdlib to get index of key in lid array;
Packit 13e616
   return -1 if lid isn't found in lids array
Packit 13e616
*/
Packit 13e616
static inline int64_t vltable_get_lidindex(ib_net16_t * key, vltable_t * vltable)
Packit 13e616
{
Packit 13e616
	ib_net16_t *found_lid = NULL;
Packit 13e616
Packit 13e616
	found_lid =
Packit 13e616
	    (ib_net16_t *) bsearch(key, vltable->lids, vltable->num_lids,
Packit 13e616
				   sizeof(ib_net16_t), cmp_lids);
Packit 13e616
	if (found_lid)
Packit 13e616
		return found_lid - vltable->lids;
Packit 13e616
	else
Packit 13e616
		return -1;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* get virtual lane from src lid X dest lid combination;
Packit 13e616
   return -1 for invalid lids
Packit 13e616
*/
Packit 13e616
static int32_t vltable_get_vl(vltable_t * vltable, ib_net16_t slid, ib_net16_t dlid)
Packit 13e616
{
Packit 13e616
	int64_t ind1 = vltable_get_lidindex(&slid, vltable);
Packit 13e616
	int64_t ind2 = vltable_get_lidindex(&dlid, vltable);
Packit 13e616
Packit 13e616
	if (ind1 > -1 && ind2 > -1)
Packit 13e616
		return (int32_t) (vltable->
Packit 13e616
				  vls[ind1 + ind2 * vltable->num_lids]);
Packit 13e616
	else
Packit 13e616
		return -1;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* set a virtual lane in the matrix */
Packit 13e616
static inline void vltable_insert(vltable_t * vltable, ib_net16_t slid,
Packit 13e616
				  ib_net16_t dlid, uint8_t vl)
Packit 13e616
{
Packit 13e616
	int64_t ind1 = vltable_get_lidindex(&slid, vltable);
Packit 13e616
	int64_t ind2 = vltable_get_lidindex(&dlid, vltable);
Packit 13e616
Packit 13e616
	if (ind1 > -1 && ind2 > -1)
Packit 13e616
		vltable->vls[ind1 + ind2 * vltable->num_lids] = vl;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* change a number of lanes from lane xy to lane yz */
Packit 13e616
static void vltable_change_vl(vltable_t * vltable, uint8_t from, uint8_t to,
Packit 13e616
			      uint64_t count)
Packit 13e616
{
Packit 13e616
	uint64_t set = 0, stop = 0;
Packit 13e616
	uint64_t ind1 = 0, ind2 = 0;
Packit 13e616
Packit 13e616
	for (ind1 = 0; ind1 < vltable->num_lids; ind1++) {
Packit 13e616
		for (ind2 = 0; ind2 < vltable->num_lids; ind2++) {
Packit 13e616
			if (set == count) {
Packit 13e616
				stop = 1;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
			if (ind1 != ind2) {
Packit 13e616
				if (vltable->
Packit 13e616
				    vls[ind1 + ind2 * vltable->num_lids] ==
Packit 13e616
				    from) {
Packit 13e616
					vltable->vls[ind1 +
Packit 13e616
						     ind2 * vltable->num_lids] =
Packit 13e616
					    to;
Packit 13e616
					set++;
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
		if (stop)
Packit 13e616
			break;
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static void vltable_print(osm_ucast_mgr_t * p_mgr, vltable_t * vltable)
Packit 13e616
{
Packit 13e616
	uint64_t ind1 = 0, ind2 = 0;
Packit 13e616
Packit 13e616
	for (ind1 = 0; ind1 < vltable->num_lids; ind1++) {
Packit 13e616
		for (ind2 = 0; ind2 < vltable->num_lids; ind2++) {
Packit 13e616
			if (ind1 != ind2) {
Packit 13e616
				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
					"   route from src_lid=%" PRIu16
Packit 13e616
					" to dest_lid=%" PRIu16 " on vl=%" PRIu8
Packit 13e616
					"\n", cl_ntoh16(vltable->lids[ind1]),
Packit 13e616
					cl_ntoh16(vltable->lids[ind2]),
Packit 13e616
					vltable->vls[ind1 +
Packit 13e616
						     ind2 * vltable->num_lids]);
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static void vltable_dealloc(vltable_t ** vltable)
Packit 13e616
{
Packit 13e616
	if (*vltable) {
Packit 13e616
		if ((*vltable)->lids)
Packit 13e616
			free((*vltable)->lids);
Packit 13e616
		if ((*vltable)->vls)
Packit 13e616
			free((*vltable)->vls);
Packit 13e616
		free(*vltable);
Packit 13e616
		*vltable = NULL;
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static int vltable_alloc(vltable_t ** vltable, uint64_t size)
Packit 13e616
{
Packit 13e616
	/* allocate VL table and indexing array */
Packit 13e616
	*vltable = (vltable_t *) malloc(sizeof(vltable_t));
Packit 13e616
	if (!(*vltable))
Packit 13e616
		goto ERROR;
Packit 13e616
	(*vltable)->num_lids = size;
Packit 13e616
	(*vltable)->lids = (ib_net16_t *) malloc(size * sizeof(ib_net16_t));
Packit 13e616
	if (!((*vltable)->lids))
Packit 13e616
		goto ERROR;
Packit 13e616
	(*vltable)->vls = (uint8_t *) malloc(size * size * sizeof(uint8_t));
Packit 13e616
	if (!((*vltable)->vls))
Packit 13e616
		goto ERROR;
Packit 13e616
	memset((*vltable)->vls, OSM_DEFAULT_SL, size * size);
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
Packit 13e616
ERROR:
Packit 13e616
	vltable_dealloc(vltable);
Packit 13e616
Packit 13e616
	return 1;
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
 **********************************************************************/
Packit 13e616
Packit 13e616
/************ helper functions to save/manage the channel dep. graph **
Packit 13e616
 **********************************************************************/
Packit 13e616
/* update the srcdest array;
Packit 13e616
   realloc array (double the size) if size is not large enough
Packit 13e616
*/
Packit 13e616
static void set_next_srcdest_pair(cdg_link_t * link, uint32_t srcdest)
Packit 13e616
{
Packit 13e616
	uint32_t new_size = 0, start_size = 2;
Packit 13e616
	uint32_t *tmp = NULL, *tmp2 = NULL;
Packit 13e616
Packit 13e616
	if (link->num_pairs == 0) {
Packit 13e616
		link->srcdest_pairs =
Packit 13e616
		    (uint32_t *) malloc(start_size * sizeof(uint32_t));
Packit 13e616
		link->srcdest_pairs[link->num_pairs] = srcdest;
Packit 13e616
		link->max_len = start_size;
Packit 13e616
		link->removed = 0;
Packit 13e616
	} else if (link->num_pairs == link->max_len) {
Packit 13e616
		new_size = link->max_len << 1;
Packit 13e616
		tmp = (uint32_t *) malloc(new_size * sizeof(uint32_t));
Packit 13e616
		tmp =
Packit 13e616
		    memcpy(tmp, link->srcdest_pairs,
Packit 13e616
			   link->max_len * sizeof(uint32_t));
Packit 13e616
		tmp2 = link->srcdest_pairs;
Packit 13e616
		link->srcdest_pairs = tmp;
Packit 13e616
		link->srcdest_pairs[link->num_pairs] = srcdest;
Packit 13e616
		free(tmp2);
Packit 13e616
		link->max_len = new_size;
Packit 13e616
	} else {
Packit 13e616
		link->srcdest_pairs[link->num_pairs] = srcdest;
Packit 13e616
	}
Packit 13e616
	link->num_pairs++;
Packit 13e616
}
Packit 13e616
Packit 13e616
static inline uint32_t get_next_srcdest_pair(cdg_link_t * link, uint32_t index)
Packit 13e616
{
Packit 13e616
	return link->srcdest_pairs[index];
Packit 13e616
}
Packit 13e616
Packit 13e616
/* traverse binary tree to find a node */
Packit 13e616
static cdg_node_t *cdg_search(cdg_node_t * root, uint64_t channelID)
Packit 13e616
{
Packit 13e616
	while (root) {
Packit 13e616
		if (channelID < root->channelID)
Packit 13e616
			root = root->left;
Packit 13e616
		else if (channelID > root->channelID)
Packit 13e616
			root = root->right;
Packit 13e616
		else if (channelID == root->channelID)
Packit 13e616
			return root;
Packit 13e616
	}
Packit 13e616
	return NULL;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* insert new node into the binary tree */
Packit 13e616
static void cdg_insert(cdg_node_t ** root, cdg_node_t * new_node)
Packit 13e616
{
Packit 13e616
	cdg_node_t *current = *root;
Packit 13e616
Packit 13e616
	if (!current) {
Packit 13e616
		current = new_node;
Packit 13e616
		*root = current;
Packit 13e616
		return;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	while (current) {
Packit 13e616
		if (new_node->channelID < current->channelID) {
Packit 13e616
			if (current->left) {
Packit 13e616
				current = current->left;
Packit 13e616
			} else {
Packit 13e616
				current->left = new_node;
Packit 13e616
				new_node->parent = current;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
		} else if (new_node->channelID > current->channelID) {
Packit 13e616
			if (current->right) {
Packit 13e616
				current = current->right;
Packit 13e616
			} else {
Packit 13e616
				current->right = new_node;
Packit 13e616
				new_node->parent = current;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
		} else if (new_node->channelID == current->channelID) {
Packit 13e616
			/* not really possible, maybe programming error */
Packit 13e616
			break;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static void cdg_node_dealloc(cdg_node_t * node)
Packit 13e616
{
Packit 13e616
	cdg_link_t *link = node->linklist, *tmp = NULL;
Packit 13e616
Packit 13e616
	/* dealloc linklist */
Packit 13e616
	while (link) {
Packit 13e616
		tmp = link;
Packit 13e616
		link = link->next;
Packit 13e616
Packit 13e616
		if (tmp->num_pairs)
Packit 13e616
			free(tmp->srcdest_pairs);
Packit 13e616
		free(tmp);
Packit 13e616
	}
Packit 13e616
	/* dealloc node */
Packit 13e616
	free(node);
Packit 13e616
}
Packit 13e616
Packit 13e616
static void cdg_dealloc(cdg_node_t ** root)
Packit 13e616
{
Packit 13e616
	cdg_node_t *current = *root;
Packit 13e616
Packit 13e616
	while (current) {
Packit 13e616
		if (current->left) {
Packit 13e616
			current = current->left;
Packit 13e616
		} else if (current->right) {
Packit 13e616
			current = current->right;
Packit 13e616
		} else {
Packit 13e616
			if (current->parent == NULL) {
Packit 13e616
				cdg_node_dealloc(current);
Packit 13e616
				*root = NULL;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
			if (current->parent->left == current) {
Packit 13e616
				current = current->parent;
Packit 13e616
				cdg_node_dealloc(current->left);
Packit 13e616
				current->left = NULL;
Packit 13e616
			} else if (current->parent->right == current) {
Packit 13e616
				current = current->parent;
Packit 13e616
				cdg_node_dealloc(current->right);
Packit 13e616
				current->right = NULL;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
/* search for a edge in the cdg which should be removed to break a cycle */
Packit 13e616
static cdg_link_t *get_weakest_link_in_cycle(cdg_node_t * cycle)
Packit 13e616
{
Packit 13e616
	cdg_node_t *current = cycle, *node_with_weakest_link = NULL;
Packit 13e616
	cdg_link_t *link = NULL, *weakest_link = NULL;
Packit 13e616
Packit 13e616
	link = current->linklist;
Packit 13e616
	while (link) {
Packit 13e616
		if (link->node->status == GRAY) {
Packit 13e616
			weakest_link = link;
Packit 13e616
			node_with_weakest_link = current;
Packit 13e616
			current = link->node;
Packit 13e616
			break;
Packit 13e616
		}
Packit 13e616
		link = link->next;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	while (1) {
Packit 13e616
		current->status = UNKNOWN;
Packit 13e616
		link = current->linklist;
Packit 13e616
		while (link) {
Packit 13e616
			if (link->node->status == GRAY) {
Packit 13e616
				if ((link->num_pairs - link->removed) <
Packit 13e616
				    (weakest_link->num_pairs -
Packit 13e616
				     weakest_link->removed)) {
Packit 13e616
					weakest_link = link;
Packit 13e616
					node_with_weakest_link = current;
Packit 13e616
				}
Packit 13e616
				current = link->node;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
			link = link->next;
Packit 13e616
		}
Packit 13e616
		/* if complete cycle is traversed */
Packit 13e616
		if (current == cycle) {
Packit 13e616
			current->status = UNKNOWN;
Packit 13e616
			break;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (node_with_weakest_link->linklist == weakest_link) {
Packit 13e616
		node_with_weakest_link->linklist = weakest_link->next;
Packit 13e616
	} else {
Packit 13e616
		link = node_with_weakest_link->linklist;
Packit 13e616
		while (link) {
Packit 13e616
			if (link->next == weakest_link) {
Packit 13e616
				link->next = weakest_link->next;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
			link = link->next;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return weakest_link;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* search for nodes in the cdg not yet reached in the cycle search process;
Packit 13e616
   (some nodes are unreachable, e.g. a node is a source or the cdg has not connected parts)
Packit 13e616
*/
Packit 13e616
static cdg_node_t *get_next_cdg_node(cdg_node_t * root)
Packit 13e616
{
Packit 13e616
	cdg_node_t *current = root, *res = NULL;
Packit 13e616
Packit 13e616
	while (current) {
Packit 13e616
		current->visited = 1;
Packit 13e616
		if (current->status == UNKNOWN) {
Packit 13e616
			res = current;
Packit 13e616
			break;
Packit 13e616
		}
Packit 13e616
		if (current->left && !current->left->visited) {
Packit 13e616
			current = current->left;
Packit 13e616
		} else if (current->right && !current->right->visited) {
Packit 13e616
			current = current->right;
Packit 13e616
		} else {
Packit 13e616
			if (current->left)
Packit 13e616
				current->left->visited = 0;
Packit 13e616
			if (current->right)
Packit 13e616
				current->right->visited = 0;
Packit 13e616
			if (current->parent == NULL)
Packit 13e616
				break;
Packit 13e616
			else
Packit 13e616
				current = current->parent;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* Clean up */
Packit 13e616
	while (current) {
Packit 13e616
		current->visited = 0;
Packit 13e616
		if (current->left)
Packit 13e616
			current->left->visited = 0;
Packit 13e616
		if (current->right)
Packit 13e616
			current->right->visited = 0;
Packit 13e616
		current = current->parent;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return res;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* make a DFS on the cdg to check for a cycle */
Packit 13e616
static cdg_node_t *search_cycle_in_channel_dep_graph(cdg_node_t * cdg,
Packit 13e616
						     cdg_node_t * start_node)
Packit 13e616
{
Packit 13e616
	cdg_node_t *cycle = NULL;
Packit 13e616
	cdg_node_t *current = start_node, *next_node = NULL, *tmp = NULL;
Packit 13e616
	cdg_link_t *link = NULL;
Packit 13e616
Packit 13e616
	while (current) {
Packit 13e616
		current->status = GRAY;
Packit 13e616
		link = current->linklist;
Packit 13e616
		next_node = NULL;
Packit 13e616
		while (link) {
Packit 13e616
			if (link->node->status == UNKNOWN) {
Packit 13e616
				next_node = link->node;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
			if (link->node->status == GRAY) {
Packit 13e616
				cycle = link->node;
Packit 13e616
				goto Exit;
Packit 13e616
			}
Packit 13e616
			link = link->next;
Packit 13e616
		}
Packit 13e616
		if (next_node) {
Packit 13e616
			next_node->pre = current;
Packit 13e616
			current = next_node;
Packit 13e616
		} else {
Packit 13e616
			/* found a sink in the graph, go to last node */
Packit 13e616
			current->status = BLACK;
Packit 13e616
Packit 13e616
			/* srcdest_pairs of this node aren't relevant, free the allocated memory */
Packit 13e616
			link = current->linklist;
Packit 13e616
			while (link) {
Packit 13e616
				if (link->num_pairs)
Packit 13e616
					free(link->srcdest_pairs);
Packit 13e616
				link->srcdest_pairs = NULL;
Packit 13e616
				link->num_pairs = 0;
Packit 13e616
				link->removed = 0;
Packit 13e616
				link = link->next;
Packit 13e616
			}
Packit 13e616
Packit 13e616
			if (current->pre) {
Packit 13e616
				tmp = current;
Packit 13e616
				current = current->pre;
Packit 13e616
				tmp->pre = NULL;
Packit 13e616
			} else {
Packit 13e616
				/* search for other subgraphs in cdg */
Packit 13e616
				current = get_next_cdg_node(cdg);
Packit 13e616
				if (!current)
Packit 13e616
					break;	/* all relevant nodes traversed, no more cycles found */
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
Exit:
Packit 13e616
	return cycle;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* calculate the path from source to destination port;
Packit 13e616
   new channels are added directly to the cdg
Packit 13e616
*/
Packit 13e616
static int update_channel_dep_graph(cdg_node_t ** cdg_root,
Packit 13e616
				    osm_port_t * src_port, uint16_t slid,
Packit 13e616
				    osm_port_t * dest_port, uint16_t dlid)
Packit 13e616
{
Packit 13e616
	osm_node_t *local_node = NULL, *remote_node = NULL;
Packit 13e616
	uint16_t local_lid = 0, remote_lid = 0;
Packit 13e616
	uint32_t srcdest = 0;
Packit 13e616
	uint8_t local_port = 0, remote_port = 0;
Packit 13e616
	uint64_t channelID = 0;
Packit 13e616
Packit 13e616
	cdg_node_t *channel_head = NULL, *channel = NULL, *last_channel = NULL;
Packit 13e616
	cdg_link_t *linklist = NULL;
Packit 13e616
Packit 13e616
	/* set the identifier for the src/dest pair to save this on each edge of the cdg */
Packit 13e616
	srcdest = (((uint32_t) slid) << 16) + ((uint32_t) dlid);
Packit 13e616
Packit 13e616
	channel_head = (cdg_node_t *) malloc(sizeof(cdg_node_t));
Packit 13e616
	if (!channel_head)
Packit 13e616
		goto ERROR;
Packit 13e616
	set_default_cdg_node(channel_head);
Packit 13e616
	last_channel = channel_head;
Packit 13e616
Packit 13e616
	/* if src is a Hca, then the channel from Hca to switch would be a source in the graph
Packit 13e616
	   sources can't be part of a cycle -> skip this channel
Packit 13e616
	 */
Packit 13e616
	remote_node =
Packit 13e616
	    osm_node_get_remote_node(src_port->p_node,
Packit 13e616
				     src_port->p_physp->port_num, &remote_port);
Packit 13e616
Packit 13e616
	while (remote_node && remote_node->sw) {
Packit 13e616
		local_node = remote_node;
Packit 13e616
		local_port = local_node->sw->new_lft[dlid];
Packit 13e616
		/* sanity check: local_port must be set or routing is broken */
Packit 13e616
		if (local_port == OSM_NO_PATH)
Packit 13e616
			goto ERROR;
Packit 13e616
		local_lid = cl_ntoh16(osm_node_get_base_lid(local_node, 0));
Packit 13e616
		/* each port belonging to a switch has lmc==0 -> get_base_lid is fine
Packit 13e616
		   (local/remote port in this function are always part of a switch)
Packit 13e616
		 */
Packit 13e616
Packit 13e616
		remote_node =
Packit 13e616
		    osm_node_get_remote_node(local_node, local_port,
Packit 13e616
					     &remote_port);
Packit 13e616
		/* if remote_node is a Hca, then the last channel from switch to Hca would be a sink in the cdg -> skip */
Packit 13e616
		if (!remote_node || !remote_node->sw)
Packit 13e616
			break;
Packit 13e616
		remote_lid = cl_ntoh16(osm_node_get_base_lid(remote_node, 0));
Packit 13e616
Packit 13e616
		channelID =
Packit 13e616
		    (((uint64_t) local_lid) << 48) +
Packit 13e616
		    (((uint64_t) local_port) << 32) +
Packit 13e616
		    (((uint64_t) remote_lid) << 16) + ((uint64_t) remote_port);
Packit 13e616
		channel = cdg_search(*cdg_root, channelID);
Packit 13e616
		if (channel) {
Packit 13e616
			/* check whether last channel has connection to this channel, i.e. subpath already exists in cdg */
Packit 13e616
			linklist = last_channel->linklist;
Packit 13e616
			while (linklist && linklist->node != channel
Packit 13e616
			       && linklist->next)
Packit 13e616
				linklist = linklist->next;
Packit 13e616
			/* if there is no connection, add one */
Packit 13e616
			if (linklist) {
Packit 13e616
				if (linklist->node == channel) {
Packit 13e616
					set_next_srcdest_pair(linklist,
Packit 13e616
							      srcdest);
Packit 13e616
				} else {
Packit 13e616
					linklist->next =
Packit 13e616
					    (cdg_link_t *)
Packit 13e616
					    malloc(sizeof(cdg_link_t));
Packit 13e616
					if (!linklist->next)
Packit 13e616
						goto ERROR;
Packit 13e616
					linklist = linklist->next;
Packit 13e616
					linklist->node = channel;
Packit 13e616
					linklist->num_pairs = 0;
Packit 13e616
					linklist->srcdest_pairs = NULL;
Packit 13e616
					set_next_srcdest_pair(linklist,
Packit 13e616
							      srcdest);
Packit 13e616
					linklist->next = NULL;
Packit 13e616
				}
Packit 13e616
			} else {
Packit 13e616
				/* either this is the first channel of the path, or the last channel was a new channel, or last channel was a sink */
Packit 13e616
				last_channel->linklist =
Packit 13e616
				    (cdg_link_t *) malloc(sizeof(cdg_link_t));
Packit 13e616
				if (!last_channel->linklist)
Packit 13e616
					goto ERROR;
Packit 13e616
				last_channel->linklist->node = channel;
Packit 13e616
				last_channel->linklist->num_pairs = 0;
Packit 13e616
				last_channel->linklist->srcdest_pairs = NULL;
Packit 13e616
				set_next_srcdest_pair(last_channel->linklist,
Packit 13e616
						      srcdest);
Packit 13e616
				last_channel->linklist->next = NULL;
Packit 13e616
			}
Packit 13e616
		} else {
Packit 13e616
			/* create new channel */
Packit 13e616
			channel = (cdg_node_t *) malloc(sizeof(cdg_node_t));
Packit 13e616
			if (!channel)
Packit 13e616
				goto ERROR;
Packit 13e616
			set_default_cdg_node(channel);
Packit 13e616
			channel->channelID = channelID;
Packit 13e616
			cdg_insert(cdg_root, channel);
Packit 13e616
Packit 13e616
			/* go to end of link list of last channel */
Packit 13e616
			linklist = last_channel->linklist;
Packit 13e616
			while (linklist && linklist->next)
Packit 13e616
				linklist = linklist->next;
Packit 13e616
			if (linklist) {
Packit 13e616
				/* update last link of an existing channel */
Packit 13e616
				linklist->next =
Packit 13e616
				    (cdg_link_t *) malloc(sizeof(cdg_link_t));
Packit 13e616
				if (!linklist->next)
Packit 13e616
					goto ERROR;
Packit 13e616
				linklist = linklist->next;
Packit 13e616
				linklist->node = channel;
Packit 13e616
				linklist->num_pairs = 0;
Packit 13e616
				linklist->srcdest_pairs = NULL;
Packit 13e616
				set_next_srcdest_pair(linklist, srcdest);
Packit 13e616
				linklist->next = NULL;
Packit 13e616
			} else {
Packit 13e616
				/* either this is the first channel of the path, or the last channel was a new channel, or last channel was a sink */
Packit 13e616
				last_channel->linklist =
Packit 13e616
				    (cdg_link_t *) malloc(sizeof(cdg_link_t));
Packit 13e616
				if (!last_channel->linklist)
Packit 13e616
					goto ERROR;
Packit 13e616
				last_channel->linklist->node = channel;
Packit 13e616
				last_channel->linklist->num_pairs = 0;
Packit 13e616
				last_channel->linklist->srcdest_pairs = NULL;
Packit 13e616
				set_next_srcdest_pair(last_channel->linklist,
Packit 13e616
						      srcdest);
Packit 13e616
				last_channel->linklist->next = NULL;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
		last_channel = channel;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (channel_head->linklist) {
Packit 13e616
		if (channel_head->linklist->srcdest_pairs)
Packit 13e616
			free(channel_head->linklist->srcdest_pairs);
Packit 13e616
		free(channel_head->linklist);
Packit 13e616
	}
Packit 13e616
	free(channel_head);
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
Packit 13e616
ERROR:
Packit 13e616
	/* cleanup data and exit */
Packit 13e616
	if (channel_head) {
Packit 13e616
		if (channel_head->linklist)
Packit 13e616
			free(channel_head->linklist);
Packit 13e616
		free(channel_head);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return 1;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* calculate the path from source to destination port;
Packit 13e616
   the links in the cdg representing this path are decremented to simulate the removal
Packit 13e616
*/
Packit 13e616
static int remove_path_from_cdg(cdg_node_t ** cdg_root, osm_port_t * src_port,
Packit 13e616
				uint16_t slid, osm_port_t * dest_port,
Packit 13e616
				uint16_t dlid)
Packit 13e616
{
Packit 13e616
	osm_node_t *local_node = NULL, *remote_node = NULL;
Packit 13e616
	uint16_t local_lid = 0, remote_lid = 0;
Packit 13e616
	uint8_t local_port = 0, remote_port = 0;
Packit 13e616
	uint64_t channelID = 0;
Packit 13e616
Packit 13e616
	cdg_node_t *channel_head = NULL, *channel = NULL, *last_channel = NULL;
Packit 13e616
	cdg_link_t *linklist = NULL;
Packit 13e616
Packit 13e616
	channel_head = (cdg_node_t *) malloc(sizeof(cdg_node_t));
Packit 13e616
	if (!channel_head)
Packit 13e616
		goto ERROR;
Packit 13e616
	set_default_cdg_node(channel_head);
Packit 13e616
	last_channel = channel_head;
Packit 13e616
Packit 13e616
	/* if src is a Hca, then the channel from Hca to switch would be a source in the graph
Packit 13e616
	   sources can't be part of a cycle -> skip this channel
Packit 13e616
	 */
Packit 13e616
	remote_node =
Packit 13e616
	    osm_node_get_remote_node(src_port->p_node,
Packit 13e616
				     src_port->p_physp->port_num, &remote_port);
Packit 13e616
Packit 13e616
	while (remote_node && remote_node->sw) {
Packit 13e616
		local_node = remote_node;
Packit 13e616
		local_port = local_node->sw->new_lft[dlid];
Packit 13e616
		/* sanity check: local_port must be set or routing is broken */
Packit 13e616
		if (local_port == OSM_NO_PATH)
Packit 13e616
			goto ERROR;
Packit 13e616
		local_lid = cl_ntoh16(osm_node_get_base_lid(local_node, 0));
Packit 13e616
Packit 13e616
		remote_node =
Packit 13e616
		    osm_node_get_remote_node(local_node, local_port,
Packit 13e616
					     &remote_port);
Packit 13e616
		/* if remote_node is a Hca, then the last channel from switch to Hca would be a sink in the cdg -> skip */
Packit 13e616
		if (!remote_node || !remote_node->sw)
Packit 13e616
			break;
Packit 13e616
		remote_lid = cl_ntoh16(osm_node_get_base_lid(remote_node, 0));
Packit 13e616
Packit 13e616
		channelID =
Packit 13e616
		    (((uint64_t) local_lid) << 48) +
Packit 13e616
		    (((uint64_t) local_port) << 32) +
Packit 13e616
		    (((uint64_t) remote_lid) << 16) + ((uint64_t) remote_port);
Packit 13e616
		channel = cdg_search(*cdg_root, channelID);
Packit 13e616
		if (channel) {
Packit 13e616
			/* check whether last channel has connection to this channel, i.e. subpath already exists in cdg */
Packit 13e616
			linklist = last_channel->linklist;
Packit 13e616
			while (linklist && linklist->node != channel
Packit 13e616
			       && linklist->next)
Packit 13e616
				linklist = linklist->next;
Packit 13e616
			/* remove the srcdest from the link */
Packit 13e616
			if (linklist) {
Packit 13e616
				if (linklist->node == channel) {
Packit 13e616
					linklist->removed++;
Packit 13e616
				} else {
Packit 13e616
					/* may happen if the link is missing (thru cycle detect algorithm) */
Packit 13e616
				}
Packit 13e616
			} else {
Packit 13e616
				/* may happen if the link is missing (thru cycle detect algorithm or last_channel==channel_head (dummy channel)) */
Packit 13e616
			}
Packit 13e616
		} else {
Packit 13e616
			/* must be an error, channels for the path are added before, so a missing channel would be a corrupt data structure */
Packit 13e616
			goto ERROR;
Packit 13e616
		}
Packit 13e616
		last_channel = channel;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (channel_head->linklist)
Packit 13e616
		free(channel_head->linklist);
Packit 13e616
	free(channel_head);
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
Packit 13e616
ERROR:
Packit 13e616
	/* cleanup data and exit */
Packit 13e616
	if (channel_head) {
Packit 13e616
		if (channel_head->linklist)
Packit 13e616
			free(channel_head->linklist);
Packit 13e616
		free(channel_head);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return 1;
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
 **********************************************************************/
Packit 13e616
Packit 13e616
/************ helper functions to generate an ordered list of ports ***
Packit 13e616
 ************ (functions copied from osm_ucast_mgr.c and modified) ****
Packit 13e616
 **********************************************************************/
Packit 13e616
static void add_sw_endports_to_order_list(osm_switch_t * sw,
Packit 13e616
					  osm_ucast_mgr_t * m,
Packit 13e616
					  cl_qmap_t * guid_tbl,
Packit 13e616
					  boolean_t add_guids)
Packit 13e616
{
Packit 13e616
	osm_port_t *port;
Packit 13e616
	ib_net64_t port_guid;
Packit 13e616
	uint64_t sw_guid;
Packit 13e616
	osm_physp_t *p;
Packit 13e616
	int i;
Packit 13e616
	boolean_t found;
Packit 13e616
Packit 13e616
	for (i = 1; i < sw->num_ports; i++) {
Packit 13e616
		p = osm_node_get_physp_ptr(sw->p_node, i);
Packit 13e616
		if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw) {
Packit 13e616
			port_guid = p->p_remote_physp->port_guid;
Packit 13e616
			/* check if link is healthy, otherwise ignore CA */
Packit 13e616
			if (!osm_link_is_healthy(p)) {
Packit 13e616
				sw_guid =
Packit 13e616
				    cl_ntoh64(osm_node_get_node_guid
Packit 13e616
					      (sw->p_node));
Packit 13e616
				OSM_LOG(m->p_log, OSM_LOG_INFO,
Packit 13e616
					"WRN AD40: ignoring CA due to unhealthy"
Packit 13e616
					" link from switch 0x%016" PRIx64
Packit 13e616
					" port %" PRIu8 " to CA 0x%016" PRIx64
Packit 13e616
					"\n", sw_guid, i, cl_ntoh64(port_guid));
Packit 13e616
			}
Packit 13e616
			port = osm_get_port_by_guid(m->p_subn, port_guid);
Packit 13e616
			if (!port)
Packit 13e616
				continue;
Packit 13e616
			if (!cl_is_qmap_empty(guid_tbl)) {
Packit 13e616
				found = (cl_qmap_get(guid_tbl, port_guid)
Packit 13e616
					 != cl_qmap_end(guid_tbl));
Packit 13e616
				if ((add_guids && !found)
Packit 13e616
				    || (!add_guids && found))
Packit 13e616
					continue;
Packit 13e616
			}
Packit 13e616
			if (!cl_is_item_in_qlist(&m->port_order_list,
Packit 13e616
						 &port->list_item))
Packit 13e616
				cl_qlist_insert_tail(&m->port_order_list,
Packit 13e616
						     &port->list_item);
Packit 13e616
			else
Packit 13e616
				OSM_LOG(m->p_log, OSM_LOG_INFO,
Packit 13e616
					"WRN AD37: guid 0x%016" PRIx64
Packit 13e616
					" already in list\n", port_guid);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static void add_guid_to_order_list(uint64_t guid, osm_ucast_mgr_t * m)
Packit 13e616
{
Packit 13e616
	osm_port_t *port = osm_get_port_by_guid(m->p_subn, cl_hton64(guid));
Packit 13e616
Packit 13e616
	if (!port) {
Packit 13e616
		 OSM_LOG(m->p_log, OSM_LOG_DEBUG,
Packit 13e616
			 "port guid not found: 0x%016" PRIx64 "\n", guid);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	if (!cl_is_item_in_qlist(&m->port_order_list, &port->list_item))
Packit 13e616
		cl_qlist_insert_tail(&m->port_order_list, &port->list_item);
Packit 13e616
	else
Packit 13e616
		OSM_LOG(m->p_log, OSM_LOG_INFO,
Packit 13e616
			"WRN AD38: guid 0x%016" PRIx64 " already in list\n",
Packit 13e616
			guid);
Packit 13e616
}
Packit 13e616
Packit 13e616
/* compare function of #Hca attached to a switch for stdlib qsort */
Packit 13e616
static int cmp_num_hca(const void * l1, const void * l2)
Packit 13e616
{
Packit 13e616
	vertex_t *sw1 = *((vertex_t **) l1);
Packit 13e616
	vertex_t *sw2 = *((vertex_t **) l2);
Packit 13e616
	uint32_t num_hca1 = 0, num_hca2 = 0;
Packit 13e616
Packit 13e616
	if (sw1)
Packit 13e616
		num_hca1 = sw1->num_hca;
Packit 13e616
	if (sw2)
Packit 13e616
		num_hca2 = sw2->num_hca;
Packit 13e616
Packit 13e616
	if (num_hca1 > num_hca2)
Packit 13e616
		return -1;
Packit 13e616
	else if (num_hca1 < num_hca2)
Packit 13e616
		return 1;
Packit 13e616
	else
Packit 13e616
		return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* use stdlib to sort the switch array depending on num_hca */
Packit 13e616
static inline void sw_list_sort_by_num_hca(vertex_t ** sw_list,
Packit 13e616
					   uint32_t sw_list_size)
Packit 13e616
{
Packit 13e616
	qsort(sw_list, sw_list_size, sizeof(vertex_t *), cmp_num_hca);
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
 **********************************************************************/
Packit 13e616
Packit 13e616
/************ helper functions to manage a map of CN and I/O guids ****
Packit 13e616
 **********************************************************************/
Packit 13e616
static int add_guid_to_map(void * cxt, uint64_t guid, char * p)
Packit 13e616
{
Packit 13e616
	cl_qmap_t *map = cxt;
Packit 13e616
	name_map_item_t *item;
Packit 13e616
	name_map_item_t *inserted_item;
Packit 13e616
Packit 13e616
	item = malloc(sizeof(*item));
Packit 13e616
	if (!item)
Packit 13e616
		return -1;
Packit 13e616
Packit 13e616
	item->guid = cl_hton64(guid);	/* internal: network byte order */
Packit 13e616
	item->name = NULL;		/* name isn't needed */
Packit 13e616
	inserted_item = (name_map_item_t *) cl_qmap_insert(map, item->guid, &item->item);
Packit 13e616
	if (inserted_item != item)
Packit 13e616
                free(item);
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
static void destroy_guid_map(cl_qmap_t * guid_tbl)
Packit 13e616
{
Packit 13e616
	name_map_item_t *p_guid = NULL, *p_next_guid = NULL;
Packit 13e616
Packit 13e616
	p_next_guid = (name_map_item_t *) cl_qmap_head(guid_tbl);
Packit 13e616
	while (p_next_guid != (name_map_item_t *) cl_qmap_end(guid_tbl)) {
Packit 13e616
		p_guid = p_next_guid;
Packit 13e616
		p_next_guid = (name_map_item_t *) cl_qmap_next(&p_guid->item);
Packit 13e616
		free(p_guid);
Packit 13e616
	}
Packit 13e616
	cl_qmap_remove_all(guid_tbl);
Packit 13e616
}
Packit 13e616
Packit 13e616
/**********************************************************************
Packit 13e616
 **********************************************************************/
Packit 13e616
Packit 13e616
static void dfsssp_print_graph(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list,
Packit 13e616
			       uint32_t size)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0, c = 0;
Packit 13e616
	link_t *link = NULL;
Packit 13e616
Packit 13e616
	/* index 0 is for the source in dijkstra -> ignore */
Packit 13e616
	for (i = 1; i < size; i++) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "adj_list[%" PRIu32 "]:\n",
Packit 13e616
			i);
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"   guid = 0x%" PRIx64 " lid = %" PRIu16 " (%s)\n",
Packit 13e616
			adj_list[i].guid, adj_list[i].lid,
Packit 13e616
			adj_list[i].sw->p_node->print_desc);
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"   num_hca = %" PRIu32 "\n", adj_list[i].num_hca);
Packit 13e616
Packit 13e616
		c = 1;
Packit 13e616
		for (link = adj_list[i].links; link != NULL;
Packit 13e616
		     link = link->next, c++) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"   link[%" PRIu32 "]:\n", c);
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"      to guid = 0x%" PRIx64 " (%s) port %"
Packit 13e616
				PRIu8 "\n", link->guid,
Packit 13e616
				adj_list[link->to].sw->p_node->print_desc,
Packit 13e616
				link->to_port);
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"      weight on this link = %" PRIu64 "\n",
Packit 13e616
				link->weight);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
/* predefine, to use this in next function */
Packit 13e616
static void dfsssp_context_destroy(void *context);
Packit 13e616
static int dijkstra(osm_ucast_mgr_t * p_mgr, cl_heap_t * p_heap,
Packit 13e616
		    vertex_t * adj_list, uint32_t adj_list_size,
Packit 13e616
		    osm_port_t * port, uint16_t lid);
Packit 13e616
Packit 13e616
/* traverse subnet to gather information about the connected switches */
Packit 13e616
static int dfsssp_build_graph(void *context)
Packit 13e616
{
Packit 13e616
	dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context;
Packit 13e616
	osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) (dfsssp_ctx->p_mgr);
Packit 13e616
	boolean_t has_fdr10 = (1 == p_mgr->p_subn->opt.fdr10) ? TRUE : FALSE;
Packit 13e616
	cl_qmap_t *port_tbl = &p_mgr->p_subn->port_guid_tbl;	/* 1 management port per switch + 1 or 2 ports for each Hca */
Packit 13e616
	osm_port_t *p_port = NULL;
Packit 13e616
	cl_qmap_t *sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
Packit 13e616
	cl_map_item_t *item = NULL;
Packit 13e616
	osm_switch_t *sw = NULL;
Packit 13e616
	osm_node_t *remote_node = NULL;
Packit 13e616
	uint8_t port = 0, remote_port = 0;
Packit 13e616
	uint32_t i = 0, j = 0, err = 0, undiscov = 0, max_num_undiscov = 0;
Packit 13e616
	uint64_t total_num_hca = 0;
Packit 13e616
	vertex_t *adj_list = NULL;
Packit 13e616
	osm_physp_t *p_physp = NULL;
Packit 13e616
	link_t *link = NULL, *head = NULL;
Packit 13e616
	uint32_t num_sw = 0, adj_list_size = 0;
Packit 13e616
	uint8_t lmc = 0;
Packit 13e616
	uint16_t sm_lid = 0;
Packit 13e616
	cl_heap_t heap;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(p_mgr->p_log);
Packit 13e616
	OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Building graph for df-/sssp routing\n");
Packit 13e616
Packit 13e616
	/* if this pointer isn't NULL, this is a reroute step;
Packit 13e616
	   old context will be destroyed (adj_list and srcdest2vl_table)
Packit 13e616
	 */
Packit 13e616
	if (dfsssp_ctx->adj_list)
Packit 13e616
		dfsssp_context_destroy(context);
Packit 13e616
Packit 13e616
	/* construct the generic heap opject to use it in dijkstra */
Packit 13e616
	cl_heap_construct(&heap;;
Packit 13e616
Packit 13e616
	num_sw = cl_qmap_count(sw_tbl);
Packit 13e616
	adj_list_size = num_sw + 1;
Packit 13e616
	/* allocate an adjazenz list (array), 0. element is reserved for the source (Hca) in the routing algo, others are switches */
Packit 13e616
	adj_list = (vertex_t *) malloc(adj_list_size * sizeof(vertex_t));
Packit 13e616
	if (!adj_list) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD02: cannot allocate memory for adj_list\n");
Packit 13e616
		goto ERROR;
Packit 13e616
	}
Packit 13e616
	for (i = 0; i < adj_list_size; i++)
Packit 13e616
		set_default_vertex(&adj_list[i]);
Packit 13e616
Packit 13e616
	dfsssp_ctx->adj_list = adj_list;
Packit 13e616
	dfsssp_ctx->adj_list_size = adj_list_size;
Packit 13e616
Packit 13e616
	/* count the total number of Hca / LIDs (for lmc>0) in the fabric;
Packit 13e616
	   even include base/enhanced switch port 0; base SP0 will have lmc=0
Packit 13e616
	 */
Packit 13e616
	for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl);
Packit 13e616
	     item = cl_qmap_next(item)) {
Packit 13e616
		p_port = (osm_port_t *) item;
Packit 13e616
		if (osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_CA ||
Packit 13e616
		    osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_SWITCH) {
Packit 13e616
			lmc = osm_port_get_lmc(p_port);
Packit 13e616
			total_num_hca += (1 << lmc);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	i = 1;			/* fill adj_list -> start with index 1 */
Packit 13e616
	for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl);
Packit 13e616
	     item = cl_qmap_next(item), i++) {
Packit 13e616
		sw = (osm_switch_t *) item;
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Processing switch with GUID 0x%" PRIx64 "\n",
Packit 13e616
			cl_ntoh64(osm_node_get_node_guid(sw->p_node)));
Packit 13e616
Packit 13e616
		adj_list[i].guid =
Packit 13e616
		    cl_ntoh64(osm_node_get_node_guid(sw->p_node));
Packit 13e616
		adj_list[i].lid =
Packit 13e616
		    cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
Packit 13e616
		adj_list[i].sw = sw;
Packit 13e616
Packit 13e616
		link = (link_t *) malloc(sizeof(link_t));
Packit 13e616
		if (!link) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD03: cannot allocate memory for a link\n");
Packit 13e616
			goto ERROR;
Packit 13e616
		}
Packit 13e616
		head = link;
Packit 13e616
		head->next = NULL;
Packit 13e616
Packit 13e616
		/* add SP0 to number of CA connected to a switch */
Packit 13e616
		lmc = osm_node_get_lmc(sw->p_node, 0);
Packit 13e616
		adj_list[i].num_hca += (1 << lmc);
Packit 13e616
Packit 13e616
		/* iterate over all ports in the switch, start with port 1 (port 0 is a management port) */
Packit 13e616
		for (port = 1; port < sw->num_ports; port++) {
Packit 13e616
			/* get the node behind the port */
Packit 13e616
			remote_node =
Packit 13e616
			    osm_node_get_remote_node(sw->p_node, port,
Packit 13e616
						     &remote_port);
Packit 13e616
			/* if there is no remote node on this port or it's the same switch -> try next port */
Packit 13e616
			if (!remote_node || remote_node->sw == sw)
Packit 13e616
				continue;
Packit 13e616
			/* make sure the link is healthy */
Packit 13e616
			p_physp = osm_node_get_physp_ptr(sw->p_node, port);
Packit 13e616
			if (!p_physp || !osm_link_is_healthy(p_physp))
Packit 13e616
				continue;
Packit 13e616
			/* if there is a Hca connected -> count and cycle */
Packit 13e616
			if (!remote_node->sw) {
Packit 13e616
				lmc = osm_node_get_lmc(remote_node, (uint32_t)remote_port);
Packit 13e616
				adj_list[i].num_hca += (1 << lmc);
Packit 13e616
				continue;
Packit 13e616
			}
Packit 13e616
			/* filter out throttled links to improve performance */
Packit 13e616
			if (p_mgr->p_subn->opt.avoid_throttled_links &&
Packit 13e616
			    osm_link_is_throttled(p_physp, has_fdr10)) {
Packit 13e616
				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
					"Detected and ignoring throttled link:"
Packit 13e616
					" 0x%" PRIx64 "/P%" PRIu8
Packit 13e616
					" <--> 0x%" PRIx64 "/P%" PRIu8 "\n",
Packit 13e616
					cl_ntoh64(osm_node_get_node_guid(sw->p_node)),
Packit 13e616
					port,
Packit 13e616
					cl_ntoh64(osm_node_get_node_guid(remote_node)),
Packit 13e616
					remote_port);
Packit 13e616
				continue;
Packit 13e616
			}
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"Node 0x%" PRIx64 ", remote node 0x%" PRIx64
Packit 13e616
				", port %" PRIu8 ", remote port %" PRIu8 "\n",
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid(sw->p_node)),
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid(remote_node)),
Packit 13e616
				port, remote_port);
Packit 13e616
Packit 13e616
			link->next = (link_t *) malloc(sizeof(link_t));
Packit 13e616
			if (!link->next) {
Packit 13e616
				OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
					"ERR AD08: cannot allocate memory for a link\n");
Packit 13e616
				while (head) {
Packit 13e616
					link = head;
Packit 13e616
					head = head->next;
Packit 13e616
					free(link);
Packit 13e616
				}
Packit 13e616
				goto ERROR;
Packit 13e616
			}
Packit 13e616
			link = link->next;
Packit 13e616
			set_default_link(link);
Packit 13e616
			link->guid =
Packit 13e616
			    cl_ntoh64(osm_node_get_node_guid(remote_node));
Packit 13e616
			link->from = i;
Packit 13e616
			link->from_port = port;
Packit 13e616
			link->to_port = remote_port;
Packit 13e616
			link->weight = total_num_hca * total_num_hca;	/* initialize with P^2 to force shortest paths */
Packit 13e616
		}
Packit 13e616
Packit 13e616
		adj_list[i].links = head->next;
Packit 13e616
		free(head);
Packit 13e616
	}
Packit 13e616
	/* connect the links with it's second adjacent node in the list */
Packit 13e616
	for (i = 1; i < adj_list_size; i++) {
Packit 13e616
		link = adj_list[i].links;
Packit 13e616
		while (link) {
Packit 13e616
			for (j = 1; j < adj_list_size; j++) {
Packit 13e616
				if (link->guid == adj_list[j].guid) {
Packit 13e616
					link->to = j;
Packit 13e616
					break;
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
			link = link->next;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* do one dry run to determine connectivity issues */
Packit 13e616
	sm_lid = p_mgr->p_subn->master_sm_base_lid;
Packit 13e616
	p_port = osm_get_port_by_lid(p_mgr->p_subn, sm_lid);
Packit 13e616
	err = dijkstra(p_mgr, &heap, adj_list, adj_list_size, p_port, sm_lid);
Packit 13e616
	if (err) {
Packit 13e616
		goto ERROR;
Packit 13e616
	} else {
Packit 13e616
		/* if sm is running on a switch, then dijkstra doesn't
Packit 13e616
		   initialize the used_link for this switch
Packit 13e616
		 */
Packit 13e616
		if (osm_node_get_type(p_port->p_node) != IB_NODE_TYPE_CA)
Packit 13e616
			max_num_undiscov = 1;
Packit 13e616
		for (i = 1; i < adj_list_size; i++)
Packit 13e616
			undiscov += (adj_list[i].used_link) ? 0 : 1;
Packit 13e616
		if (max_num_undiscov < undiscov) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD0C: unsupported network state (detached"
Packit 13e616
				" and inaccessible switches found; gracefully"
Packit 13e616
				" shutdown this routing engine)\n");
Packit 13e616
			goto ERROR;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	/* delete the heap which is not needed anymore */
Packit 13e616
	cl_heap_destroy(&heap;;
Packit 13e616
Packit 13e616
	/* print the discovered graph */
Packit 13e616
	if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG))
Packit 13e616
		dfsssp_print_graph(p_mgr, adj_list, adj_list_size);
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(p_mgr->p_log);
Packit 13e616
	return 0;
Packit 13e616
Packit 13e616
ERROR:
Packit 13e616
	if (cl_is_heap_inited(&heap))
Packit 13e616
		cl_heap_destroy(&heap;;
Packit 13e616
	dfsssp_context_destroy(context);
Packit 13e616
	return -1;
Packit 13e616
}
Packit 13e616
Packit 13e616
static void print_routes(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list,
Packit 13e616
			 uint32_t adj_list_size, osm_port_t * port)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0, j = 0;
Packit 13e616
Packit 13e616
	for (i = 1; i < adj_list_size; i++) {
Packit 13e616
		if (adj_list[i].state == DISCOVERED) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"Route from 0x%" PRIx64 " (%s) to 0x%" PRIx64
Packit 13e616
				" (%s):\n", adj_list[i].guid,
Packit 13e616
				adj_list[i].sw->p_node->print_desc,
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid(port->p_node)),
Packit 13e616
				port->p_node->print_desc);
Packit 13e616
			j = i;
Packit 13e616
			while (adj_list[j].used_link) {
Packit 13e616
				if (j > 0) {
Packit 13e616
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
						"   0x%" PRIx64
Packit 13e616
						" (%s) routes thru port %" PRIu8
Packit 13e616
						"\n", adj_list[j].guid,
Packit 13e616
						adj_list[j].sw->p_node->
Packit 13e616
						print_desc,
Packit 13e616
						adj_list[j].used_link->to_port);
Packit 13e616
				} else {
Packit 13e616
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
						"   0x%" PRIx64
Packit 13e616
						" (%s) routes thru port %" PRIu8
Packit 13e616
						"\n", adj_list[j].guid,
Packit 13e616
						port->p_node->print_desc,
Packit 13e616
						adj_list[j].used_link->to_port);
Packit 13e616
				}
Packit 13e616
				j = adj_list[j].used_link->from;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
/* callback function for the cl_heap to update the index */
Packit 13e616
static void apply_index_update(const void * context, const size_t new_index)
Packit 13e616
{
Packit 13e616
	vertex_t *heap_elem = (vertex_t *) context;
Packit 13e616
	if (heap_elem)
Packit 13e616
		heap_elem->heap_index = new_index;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* dijkstra step from one source to all switches in the df-/sssp graph */
Packit 13e616
static int dijkstra(osm_ucast_mgr_t * p_mgr, cl_heap_t * p_heap,
Packit 13e616
		    vertex_t * adj_list, uint32_t adj_list_size,
Packit 13e616
		    osm_port_t * port, uint16_t lid)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0, j = 0, index = 0;
Packit 13e616
	osm_node_t *remote_node = NULL;
Packit 13e616
	uint8_t remote_port = 0;
Packit 13e616
	vertex_t *current = NULL;
Packit 13e616
	link_t *link = NULL;
Packit 13e616
	uint64_t guid = 0;
Packit 13e616
	cl_status_t ret = CL_SUCCESS;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(p_mgr->p_log);
Packit 13e616
Packit 13e616
	/* build an 4-ary heap to find the node with minimum distance */
Packit 13e616
	if (!cl_is_heap_inited(p_heap))
Packit 13e616
		ret = cl_heap_init(p_heap, adj_list_size, 4,
Packit 13e616
				   &apply_index_update, NULL);
Packit 13e616
	else
Packit 13e616
		ret = cl_heap_resize(p_heap, adj_list_size);
Packit 13e616
	if (ret != CL_SUCCESS) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD09: cannot allocate memory or resize heap\n");
Packit 13e616
		return ret;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* reset all switches for new round with a new source for dijkstra */
Packit 13e616
	for (i = 1; i < adj_list_size; i++) {
Packit 13e616
		adj_list[i].hops = 0;
Packit 13e616
		adj_list[i].used_link = NULL;
Packit 13e616
		adj_list[i].distance = INF;
Packit 13e616
		adj_list[i].state = UNDISCOVERED;
Packit 13e616
		ret = cl_heap_insert(p_heap, INF, &adj_list[i]);
Packit 13e616
		if (ret != CL_SUCCESS) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD11: cl_heap_insert failed\n");
Packit 13e616
			return ret;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* if behind port is a Hca -> set adj_list[0] */
Packit 13e616
	if (osm_node_get_type(port->p_node) == IB_NODE_TYPE_CA) {
Packit 13e616
		/* save old link to prevent many mallocs after set_default_... */
Packit 13e616
		link = adj_list[0].links;
Packit 13e616
		/* initialize adj_list[0] (the source for the routing, a Hca) */
Packit 13e616
		set_default_vertex(&adj_list[0]);
Packit 13e616
		adj_list[0].guid =
Packit 13e616
		    cl_ntoh64(osm_node_get_node_guid(port->p_node));
Packit 13e616
		adj_list[0].lid = lid;
Packit 13e616
		index = 0;
Packit 13e616
		/* write saved link back to new adj_list[0] */
Packit 13e616
		adj_list[0].links = link;
Packit 13e616
Packit 13e616
		/* initialize link to neighbor for adj_list[0];
Packit 13e616
		   make sure the link is healthy
Packit 13e616
		 */
Packit 13e616
		if (port->p_physp && osm_link_is_healthy(port->p_physp)) {
Packit 13e616
			remote_node =
Packit 13e616
			    osm_node_get_remote_node(port->p_node,
Packit 13e616
						     port->p_physp->port_num,
Packit 13e616
						     &remote_port);
Packit 13e616
			/* if there is no remote node on this port or it's the same Hca -> ignore */
Packit 13e616
			if (remote_node
Packit 13e616
			    && (osm_node_get_type(remote_node) ==
Packit 13e616
				IB_NODE_TYPE_SWITCH)) {
Packit 13e616
				if (!(adj_list[0].links)) {
Packit 13e616
					adj_list[0].links =
Packit 13e616
					    (link_t *) malloc(sizeof(link_t));
Packit 13e616
					if (!(adj_list[0].links)) {
Packit 13e616
						OSM_LOG(p_mgr->p_log,
Packit 13e616
							OSM_LOG_ERROR,
Packit 13e616
							"ERR AD07: cannot allocate memory for a link\n");
Packit 13e616
						return 1;
Packit 13e616
					}
Packit 13e616
				}
Packit 13e616
				set_default_link(adj_list[0].links);
Packit 13e616
				adj_list[0].links->guid =
Packit 13e616
				    cl_ntoh64(osm_node_get_node_guid
Packit 13e616
					      (remote_node));
Packit 13e616
				adj_list[0].links->from_port =
Packit 13e616
				    port->p_physp->port_num;
Packit 13e616
				adj_list[0].links->to_port = remote_port;
Packit 13e616
				adj_list[0].links->weight = 1;
Packit 13e616
				for (j = 1; j < adj_list_size; j++) {
Packit 13e616
					if (adj_list[0].links->guid ==
Packit 13e616
					    adj_list[j].guid) {
Packit 13e616
						adj_list[0].links->to = j;
Packit 13e616
						break;
Packit 13e616
					}
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
		} else {
Packit 13e616
			/* if link is unhealthy then there's a severe issue */
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD0B: unsupported network state (CA with"
Packit 13e616
				" unhealthy link state discovered; should have"
Packit 13e616
				" been filtered out before already; gracefully"
Packit 13e616
				" shutdown this routing engine)\n");
Packit 13e616
			return 1;
Packit 13e616
		}
Packit 13e616
		ret = cl_heap_insert(p_heap, INF, &adj_list[0]);
Packit 13e616
		if (ret != CL_SUCCESS) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD13: cl_heap_insert failed\n");
Packit 13e616
			return ret;
Packit 13e616
		}
Packit 13e616
		/* if behind port is a switch -> search switch in adj_list */
Packit 13e616
	} else {
Packit 13e616
		/* reset adj_list[0], if links=NULL reset was done before, then skip */
Packit 13e616
		if (adj_list[0].links) {
Packit 13e616
			free(adj_list[0].links);
Packit 13e616
			set_default_vertex(&adj_list[0]);
Packit 13e616
		}
Packit 13e616
		/* search for the switch which is the source in this round */
Packit 13e616
		guid = cl_ntoh64(osm_node_get_node_guid(port->p_node));
Packit 13e616
		for (i = 1; i < adj_list_size; i++) {
Packit 13e616
			if (guid == adj_list[i].guid) {
Packit 13e616
				index = i;
Packit 13e616
				break;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* source in dijkstra */
Packit 13e616
	adj_list[index].distance = 0;
Packit 13e616
	adj_list[index].state = DISCOVERED;
Packit 13e616
	adj_list[index].hops = 0;	/* the source has hop count = 0 */
Packit 13e616
	ret = cl_heap_modify_key(p_heap, adj_list[index].distance,
Packit 13e616
				 adj_list[index].heap_index);
Packit 13e616
	if (ret != CL_SUCCESS) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD10: index out of bounds in cl_heap_modify_key\n");
Packit 13e616
		return ret;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	current = (vertex_t *) cl_heap_extract_root(p_heap);
Packit 13e616
	while (current) {
Packit 13e616
		current->state = DISCOVERED;
Packit 13e616
		if (current->used_link)	/* increment the number of hops to the source for each new node */
Packit 13e616
			current->hops =
Packit 13e616
			    adj_list[current->used_link->from].hops + 1;
Packit 13e616
Packit 13e616
		/* add/update nodes which aren't discovered but accessible */
Packit 13e616
		for (link = current->links; link != NULL; link = link->next) {
Packit 13e616
			if ((adj_list[link->to].state != DISCOVERED)
Packit 13e616
			    && (current->distance + link->weight <
Packit 13e616
				adj_list[link->to].distance)) {
Packit 13e616
				adj_list[link->to].used_link = link;
Packit 13e616
				adj_list[link->to].distance =
Packit 13e616
				    current->distance + link->weight;
Packit 13e616
				ret = cl_heap_modify_key(p_heap,
Packit 13e616
							 adj_list[link->to].distance,
Packit 13e616
							 adj_list[link->to].heap_index);
Packit 13e616
				if (ret != CL_SUCCESS) {
Packit 13e616
					OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
						"ERR AD12: index out of bounds in cl_heap_modify_key\n");
Packit 13e616
					return ret;
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
Packit 13e616
		current = (vertex_t *) cl_heap_extract_root(p_heap);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(p_mgr->p_log);
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* update the linear forwarding tables of all switches with the informations
Packit 13e616
   from the last dijsktra step
Packit 13e616
*/
Packit 13e616
static int update_lft(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list,
Packit 13e616
		      uint32_t adj_list_size, osm_port_t * p_port, uint16_t lid)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0;
Packit 13e616
	uint8_t port = 0;
Packit 13e616
	uint8_t hops = 0;
Packit 13e616
	osm_switch_t *p_sw = NULL;
Packit 13e616
	boolean_t is_ignored_by_port_prof = FALSE;
Packit 13e616
	osm_physp_t *p = NULL;
Packit 13e616
	cl_status_t ret;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(p_mgr->p_log);
Packit 13e616
Packit 13e616
	for (i = 1; i < adj_list_size; i++) {
Packit 13e616
		/* if no route goes thru this switch -> cycle */
Packit 13e616
		if (!(adj_list[i].used_link))
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		p_sw = adj_list[i].sw;
Packit 13e616
		hops = adj_list[i].hops;
Packit 13e616
		port = adj_list[i].used_link->to_port;
Packit 13e616
		/* the used_link is the link that was used in dijkstra to reach this node,
Packit 13e616
		   so the to_port is the local port on this node
Packit 13e616
		 */
Packit 13e616
Packit 13e616
		if (port == OSM_NO_PATH) {	/* if clause shouldn't be possible in this routing, but who cares */
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD06: No path to get to LID %" PRIu16
Packit 13e616
				" from switch 0x%" PRIx64 "\n", lid,
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid
Packit 13e616
					  (p_sw->p_node)));
Packit 13e616
Packit 13e616
			/* do not try to overwrite the ppro of non existing port ... */
Packit 13e616
			is_ignored_by_port_prof = TRUE;
Packit 13e616
			return 1;
Packit 13e616
		} else {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"Routing LID %" PRIu16 " to port %" PRIu8
Packit 13e616
				" for switch 0x%" PRIx64 "\n", lid, port,
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid
Packit 13e616
					  (p_sw->p_node)));
Packit 13e616
Packit 13e616
			p = osm_node_get_physp_ptr(p_sw->p_node, port);
Packit 13e616
			if (!p) {
Packit 13e616
				OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
					"ERR AD0A: Physical port %d of Node GUID 0x%"
Packit 13e616
					PRIx64 "not found\n", port,
Packit 13e616
					cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
Packit 13e616
				return 1;
Packit 13e616
			}
Packit 13e616
Packit 13e616
			/* we would like to optionally ignore this port in equalization
Packit 13e616
			   as in the case of the Mellanox Anafa Internal PCI TCA port
Packit 13e616
			 */
Packit 13e616
			is_ignored_by_port_prof = p->is_prof_ignored;
Packit 13e616
Packit 13e616
			/* We also would ignore this route if the target lid is of
Packit 13e616
			   a switch and the port_profile_switch_node is not TRUE
Packit 13e616
			 */
Packit 13e616
			if (!p_mgr->p_subn->opt.port_profile_switch_nodes)
Packit 13e616
				is_ignored_by_port_prof |=
Packit 13e616
				    (osm_node_get_type(p_port->p_node) ==
Packit 13e616
				     IB_NODE_TYPE_SWITCH);
Packit 13e616
		}
Packit 13e616
Packit 13e616
		/* to support lmc > 0 the functions alloc_ports_priv, free_ports_priv, find_and_add_remote_sys
Packit 13e616
		   from minhop aren't needed cause osm_switch_recommend_path is implicitly calculated
Packit 13e616
		   for each LID pair thru dijkstra;
Packit 13e616
		   for each port the dijkstra algorithm calculates (max_lid_ho - min_lid_ho)-times maybe
Packit 13e616
		   disjoint routes to spread the bandwidth -> diffent routes for one port and lmc>0
Packit 13e616
		 */
Packit 13e616
Packit 13e616
		/* set port in LFT */
Packit 13e616
		p_sw->new_lft[lid] = port;
Packit 13e616
		if (!is_ignored_by_port_prof) {
Packit 13e616
			/* update the number of path routing thru this port */
Packit 13e616
			osm_switch_count_path(p_sw, port);
Packit 13e616
		}
Packit 13e616
		/* set the hop count from this switch to the lid */
Packit 13e616
		ret = osm_switch_set_hops(p_sw, lid, port, hops);
Packit 13e616
		if (ret != CL_SUCCESS)
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD05: cannot set hops for LID %" PRIu16
Packit 13e616
				" at switch 0x%" PRIx64 "\n", lid,
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid
Packit 13e616
					  (p_sw->p_node)));
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(p_mgr->p_log);
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* the function updates the multicast group membership information
Packit 13e616
   similar to create_mgrp_switch_map (osm_mcast_mgr.c)
Packit 13e616
   => with it we can identify if a switch needs to be processed
Packit 13e616
   or not in update_mcft
Packit 13e616
*/
Packit 13e616
static void update_mgrp_membership(cl_qlist_t * port_list)
Packit 13e616
{
Packit 13e616
	osm_mcast_work_obj_t *wobj = NULL;
Packit 13e616
	osm_port_t *port = NULL;
Packit 13e616
	osm_switch_t *sw = NULL;
Packit 13e616
	cl_list_item_t *i = NULL;
Packit 13e616
Packit 13e616
	for (i = cl_qlist_head(port_list); i != cl_qlist_end(port_list);
Packit 13e616
	     i = cl_qlist_next(i)) {
Packit 13e616
		wobj = cl_item_obj(i, wobj, list_item);
Packit 13e616
		port = wobj->p_port;
Packit 13e616
		if (port->p_node->sw) {
Packit 13e616
			sw = port->p_node->sw;
Packit 13e616
			sw->is_mc_member = 1;
Packit 13e616
		} else {
Packit 13e616
			sw = port->p_physp->p_remote_physp->p_node->sw;
Packit 13e616
			sw->num_of_mcm++;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
/* reset is_mc_member and num_of_mcm for future computations */
Packit 13e616
static void reset_mgrp_membership(vertex_t * adj_list, uint32_t adj_list_size)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0;
Packit 13e616
Packit 13e616
	for (i = 1; i < adj_list_size; i++) {
Packit 13e616
		if (adj_list[i].dropped)
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		adj_list[i].sw->is_mc_member = 0;
Packit 13e616
		adj_list[i].sw->num_of_mcm = 0;
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
/* update the multicast forwarding tables of all switches with the informations
Packit 13e616
   from the previous dijsktra step for the current mlid
Packit 13e616
*/
Packit 13e616
static int update_mcft(osm_sm_t * p_sm, vertex_t * adj_list,
Packit 13e616
		       uint32_t adj_list_size, uint16_t mlid_ho,
Packit 13e616
		       cl_qmap_t * port_map, osm_switch_t * root_sw)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0;
Packit 13e616
	uint8_t port = 0, remote_port = 0;
Packit 13e616
	uint8_t upstream_port = 0, downstream_port = 0;
Packit 13e616
	ib_net64_t guid = 0;
Packit 13e616
	osm_switch_t *p_sw = NULL;
Packit 13e616
	osm_node_t *remote_node = NULL;
Packit 13e616
	osm_physp_t *p_physp = NULL;
Packit 13e616
	osm_mcast_tbl_t *p_tbl = NULL;
Packit 13e616
	vertex_t *curr_adj = NULL;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(p_sm->p_log);
Packit 13e616
Packit 13e616
	for (i = 1; i < adj_list_size; i++) {
Packit 13e616
		if (adj_list[i].dropped)
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		p_sw = adj_list[i].sw;
Packit 13e616
		OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"Processing switch 0x%016" PRIx64
Packit 13e616
			" (%s) for MLID 0x%X\n", cl_ntoh64(adj_list[i].guid),
Packit 13e616
			p_sw->p_node->print_desc, mlid_ho);
Packit 13e616
Packit 13e616
		/* if a) the switch does not support mcast  or
Packit 13e616
		      b) no ports of this switch are part or the mcast group
Packit 13e616
		   then cycle
Packit 13e616
		 */
Packit 13e616
		if (osm_switch_supports_mcast(p_sw) == FALSE ||
Packit 13e616
		    (p_sw->num_of_mcm == 0 && !(p_sw->is_mc_member)))
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
Packit 13e616
Packit 13e616
		/* add all ports of this sw to the mcast table,
Packit 13e616
		   if they are part of the mcast grp
Packit 13e616
		 */
Packit 13e616
		if (p_sw->is_mc_member)
Packit 13e616
			osm_mcast_tbl_set(p_tbl, mlid_ho, 0);
Packit 13e616
		for (port = 1; port < p_sw->num_ports; port++) {
Packit 13e616
			/* get the node behind the port */
Packit 13e616
			remote_node =
Packit 13e616
				osm_node_get_remote_node(p_sw->p_node, port,
Packit 13e616
							 &remote_port);
Packit 13e616
			/* check if connected and its not the same switch */
Packit 13e616
			if (!remote_node || remote_node->sw == p_sw)
Packit 13e616
				continue;
Packit 13e616
			/* make sure the link is healthy */
Packit 13e616
			p_physp = osm_node_get_physp_ptr(p_sw->p_node, port);
Packit 13e616
			if (!p_physp || !osm_link_is_healthy(p_physp))
Packit 13e616
				continue;
Packit 13e616
			/* we don't add upstream ports in this step */
Packit 13e616
			if (osm_node_get_type(remote_node) != IB_NODE_TYPE_CA)
Packit 13e616
				continue;
Packit 13e616
Packit 13e616
			guid = osm_physp_get_port_guid(osm_node_get_physp_ptr(
Packit 13e616
						       remote_node,
Packit 13e616
						       remote_port));
Packit 13e616
			if (cl_qmap_get(port_map, guid)
Packit 13e616
			    != cl_qmap_end(port_map))
Packit 13e616
				osm_mcast_tbl_set(p_tbl, mlid_ho, port);
Packit 13e616
		}
Packit 13e616
Packit 13e616
		/* now we have to add the upstream port of 'this' switch and
Packit 13e616
		   the downstream port of the next switch to the mcast table
Packit 13e616
		   until we reach the root_sw
Packit 13e616
		 */
Packit 13e616
		curr_adj = &adj_list[i];
Packit 13e616
		while (curr_adj->sw != root_sw) {
Packit 13e616
			/* the used_link is the link that was used in dijkstra to reach this node,
Packit 13e616
			   so the to_port is the local (upstream) port on curr_adj->sw
Packit 13e616
			 */
Packit 13e616
			upstream_port = curr_adj->used_link->to_port;
Packit 13e616
			osm_mcast_tbl_set(p_tbl, mlid_ho, upstream_port);
Packit 13e616
Packit 13e616
			/* now we go one step in direction root_sw and add the
Packit 13e616
			   downstream port for the spanning tree
Packit 13e616
			 */
Packit 13e616
			downstream_port = curr_adj->used_link->from_port;
Packit 13e616
			p_tbl = osm_switch_get_mcast_tbl_ptr(
Packit 13e616
				adj_list[curr_adj->used_link->from].sw);
Packit 13e616
			osm_mcast_tbl_set(p_tbl, mlid_ho, downstream_port);
Packit 13e616
Packit 13e616
			curr_adj = &adj_list[curr_adj->used_link->from];
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(p_sm->p_log);
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* increment the edge weights of the df-/sssp graph which represent the number
Packit 13e616
   of paths on this link
Packit 13e616
*/
Packit 13e616
static void update_weights(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list,
Packit 13e616
			   uint32_t adj_list_size)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0, j = 0;
Packit 13e616
	uint32_t additional_weight = 0;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(p_mgr->p_log);
Packit 13e616
Packit 13e616
	for (i = 1; i < adj_list_size; i++) {
Packit 13e616
		/* if no route goes thru this switch -> cycle */
Packit 13e616
		if (!(adj_list[i].used_link))
Packit 13e616
			continue;
Packit 13e616
		additional_weight = adj_list[i].num_hca;
Packit 13e616
Packit 13e616
		j = i;
Packit 13e616
		while (adj_list[j].used_link) {
Packit 13e616
			/* update the link from pre to this node */
Packit 13e616
			adj_list[j].used_link->weight += additional_weight;
Packit 13e616
Packit 13e616
			j = adj_list[j].used_link->from;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(p_mgr->p_log);
Packit 13e616
}
Packit 13e616
Packit 13e616
/* get the largest number of virtual lanes which is supported by all switches
Packit 13e616
   in the subnet
Packit 13e616
*/
Packit 13e616
static uint8_t get_avail_vl_in_subn(osm_ucast_mgr_t * p_mgr)
Packit 13e616
{
Packit 13e616
	uint32_t i = 0;
Packit 13e616
	uint8_t vls_avail = 0xFF, port_vls_avail = 0;
Packit 13e616
	cl_qmap_t *sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
Packit 13e616
	cl_map_item_t *item = NULL;
Packit 13e616
	osm_switch_t *sw = NULL;
Packit 13e616
Packit 13e616
	/* traverse all switches to get the number of available virtual lanes in the subnet */
Packit 13e616
	for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl);
Packit 13e616
	     item = cl_qmap_next(item)) {
Packit 13e616
		sw = (osm_switch_t *) item;
Packit 13e616
Packit 13e616
		/* ignore management port 0 */
Packit 13e616
		for (i = 1; i < osm_node_get_num_physp(sw->p_node); i++) {
Packit 13e616
			osm_physp_t *p_physp =
Packit 13e616
			    osm_node_get_physp_ptr(sw->p_node, i);
Packit 13e616
Packit 13e616
			if (p_physp && p_physp->p_remote_physp) {
Packit 13e616
				port_vls_avail =
Packit 13e616
				    ib_port_info_get_op_vls(&p_physp->
Packit 13e616
							    port_info);
Packit 13e616
				if (port_vls_avail
Packit 13e616
				    && port_vls_avail < vls_avail)
Packit 13e616
					vls_avail = port_vls_avail;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* ib_port_info_get_op_vls gives values 1 ... 5 (s. IBAS 14.2.5.6) */
Packit 13e616
	vls_avail = 1 << (vls_avail - 1);
Packit 13e616
Packit 13e616
	/* set boundaries (s. IBAS 3.5.7) */
Packit 13e616
	if (vls_avail > 15)
Packit 13e616
		vls_avail = 15;
Packit 13e616
	if (vls_avail < 1)
Packit 13e616
		vls_avail = 1;
Packit 13e616
Packit 13e616
	return vls_avail;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* search for cycles in the channel dependency graph to identify possible
Packit 13e616
   deadlocks in the network;
Packit 13e616
   assign new virtual lanes to some paths to break the deadlocks
Packit 13e616
*/
Packit 13e616
static int dfsssp_remove_deadlocks(dfsssp_context_t * dfsssp_ctx)
Packit 13e616
{
Packit 13e616
	osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr;
Packit 13e616
Packit 13e616
	cl_qlist_t *port_tbl = &p_mgr->port_order_list;	/* 1 management port per switch + 1 or 2 ports for each Hca */
Packit 13e616
	cl_list_item_t *item1 = NULL, *item2 = NULL;
Packit 13e616
	osm_port_t *src_port = NULL, *dest_port = NULL;
Packit 13e616
Packit 13e616
	uint32_t i = 0, j = 0, err = 0;
Packit 13e616
	uint8_t vl = 0, test_vl = 0, vl_avail = 0, vl_needed = 1;
Packit 13e616
	double most_avg_paths = 0.0;
Packit 13e616
	cdg_node_t **cdg = NULL, *start_here = NULL, *cycle = NULL;
Packit 13e616
	cdg_link_t *weakest_link = NULL;
Packit 13e616
	uint32_t srcdest = 0;
Packit 13e616
Packit 13e616
	vltable_t *srcdest2vl_table = NULL;
Packit 13e616
	uint8_t lmc = 0;
Packit 13e616
	uint16_t slid = 0, dlid = 0, min_lid_ho = 0, max_lid_ho =
Packit 13e616
	    0, min_lid_ho2 = 0, max_lid_ho2 = 0;;
Packit 13e616
	uint64_t *paths_per_vl = NULL;
Packit 13e616
	uint64_t from = 0, to = 0, count = 0;
Packit 13e616
	uint8_t *split_count = NULL;
Packit 13e616
	uint8_t ntype = 0;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(p_mgr->p_log);
Packit 13e616
	OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Assign each src/dest pair a Virtual Lanes, to remove deadlocks in the routing\n");
Packit 13e616
Packit 13e616
	vl_avail = get_avail_vl_in_subn(p_mgr);
Packit 13e616
	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
		"Virtual Lanes available: %" PRIu8 "\n", vl_avail);
Packit 13e616
Packit 13e616
	paths_per_vl = (uint64_t *) malloc(vl_avail * sizeof(uint64_t));
Packit 13e616
	if (!paths_per_vl) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD22: cannot allocate memory for paths_per_vl\n");
Packit 13e616
		return 1;
Packit 13e616
	}
Packit 13e616
	memset(paths_per_vl, 0, vl_avail * sizeof(uint64_t));
Packit 13e616
Packit 13e616
	cdg = (cdg_node_t **) malloc(vl_avail * sizeof(cdg_node_t *));
Packit 13e616
	if (!cdg) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD23: cannot allocate memory for cdg\n");
Packit 13e616
		free(paths_per_vl);
Packit 13e616
		return 1;
Packit 13e616
	}
Packit 13e616
	for (i = 0; i < vl_avail; i++)
Packit 13e616
		cdg[i] = NULL;
Packit 13e616
Packit 13e616
	count = 0;
Packit 13e616
	/* count all ports (also multiple LIDs) of type CA or SP0 for size of VL table */
Packit 13e616
	for (item1 = cl_qlist_head(port_tbl); item1 != cl_qlist_end(port_tbl);
Packit 13e616
	     item1 = cl_qlist_next(item1)) {
Packit 13e616
		dest_port = (osm_port_t *)cl_item_obj(item1, dest_port,
Packit 13e616
						      list_item);
Packit 13e616
		ntype = osm_node_get_type(dest_port->p_node);
Packit 13e616
		if (ntype == IB_NODE_TYPE_CA || ntype == IB_NODE_TYPE_SWITCH) {
Packit 13e616
			/* only SP0 with SLtoVLMapping support will be processed */
Packit 13e616
			if (ntype == IB_NODE_TYPE_SWITCH
Packit 13e616
			    && !(dest_port->p_physp->port_info.capability_mask
Packit 13e616
			    & IB_PORT_CAP_HAS_SL_MAP))
Packit 13e616
				continue;
Packit 13e616
Packit 13e616
			lmc = osm_port_get_lmc(dest_port);
Packit 13e616
			count += (1 << lmc);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	/* allocate VL table and indexing array */
Packit 13e616
	err = vltable_alloc(&srcdest2vl_table, count);
Packit 13e616
	if (err) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD26: cannot allocate memory for srcdest2vl_table\n");
Packit 13e616
		goto ERROR;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	i = 0;
Packit 13e616
	/* fill lids into indexing array */
Packit 13e616
	for (item1 = cl_qlist_head(port_tbl); item1 != cl_qlist_end(port_tbl);
Packit 13e616
	     item1 = cl_qlist_next(item1)) {
Packit 13e616
		dest_port = (osm_port_t *)cl_item_obj(item1, dest_port,
Packit 13e616
						      list_item);
Packit 13e616
		ntype = osm_node_get_type(dest_port->p_node);
Packit 13e616
		if (ntype == IB_NODE_TYPE_CA || ntype == IB_NODE_TYPE_SWITCH) {
Packit 13e616
			/* only SP0 with SLtoVLMapping support will be processed */
Packit 13e616
			if (ntype == IB_NODE_TYPE_SWITCH
Packit 13e616
			    && !(dest_port->p_physp->port_info.capability_mask
Packit 13e616
			    & IB_PORT_CAP_HAS_SL_MAP))
Packit 13e616
				continue;
Packit 13e616
Packit 13e616
			osm_port_get_lid_range_ho(dest_port, &min_lid_ho,
Packit 13e616
						  &max_lid_ho);
Packit 13e616
			for (dlid = min_lid_ho; dlid <= max_lid_ho; dlid++, i++)
Packit 13e616
				srcdest2vl_table->lids[i] = cl_hton16(dlid);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	/* sort lids */
Packit 13e616
	vltable_sort_lids(srcdest2vl_table);
Packit 13e616
Packit 13e616
	test_vl = 0;
Packit 13e616
	/* fill cdg[0] with routes from each src/dest port combination for all Hca/SP0 in the subnet */
Packit 13e616
	for (item1 = cl_qlist_head(port_tbl); item1 != cl_qlist_end(port_tbl);
Packit 13e616
	     item1 = cl_qlist_next(item1)) {
Packit 13e616
		dest_port = (osm_port_t *)cl_item_obj(item1, dest_port,
Packit 13e616
						      list_item);
Packit 13e616
		ntype = osm_node_get_type(dest_port->p_node);
Packit 13e616
		if ((ntype != IB_NODE_TYPE_CA && ntype != IB_NODE_TYPE_SWITCH)
Packit 13e616
		    || !(dest_port->p_physp->port_info.capability_mask
Packit 13e616
		    & IB_PORT_CAP_HAS_SL_MAP))
Packit 13e616
			continue;
Packit 13e616
Packit 13e616
		for (item2 = cl_qlist_head(port_tbl);
Packit 13e616
		     item2 != cl_qlist_end(port_tbl);
Packit 13e616
		     item2 = cl_qlist_next(item2)) {
Packit 13e616
			src_port = (osm_port_t *)cl_item_obj(item2, src_port,
Packit 13e616
							     list_item);
Packit 13e616
			ntype = osm_node_get_type(src_port->p_node);
Packit 13e616
			if ((ntype != IB_NODE_TYPE_CA
Packit 13e616
			    && ntype != IB_NODE_TYPE_SWITCH)
Packit 13e616
			    || !(src_port->p_physp->port_info.capability_mask
Packit 13e616
			    & IB_PORT_CAP_HAS_SL_MAP))
Packit 13e616
				continue;
Packit 13e616
Packit 13e616
			if (src_port != dest_port) {
Packit 13e616
				/* iterate over LIDs of src and dest port */
Packit 13e616
				osm_port_get_lid_range_ho(src_port, &min_lid_ho,
Packit 13e616
							  &max_lid_ho);
Packit 13e616
				for (slid = min_lid_ho; slid <= max_lid_ho;
Packit 13e616
				     slid++) {
Packit 13e616
					osm_port_get_lid_range_ho
Packit 13e616
					    (dest_port, &min_lid_ho2,
Packit 13e616
					     &max_lid_ho2);
Packit 13e616
					for (dlid = min_lid_ho2;
Packit 13e616
					     dlid <= max_lid_ho2;
Packit 13e616
					     dlid++) {
Packit 13e616
Packit 13e616
						/* try to add the path to cdg[0] */
Packit 13e616
						err =
Packit 13e616
						    update_channel_dep_graph
Packit 13e616
						    (&(cdg[test_vl]),
Packit 13e616
						     src_port, slid,
Packit 13e616
						     dest_port, dlid);
Packit 13e616
						if (err) {
Packit 13e616
							OSM_LOG(p_mgr->
Packit 13e616
								p_log,
Packit 13e616
								OSM_LOG_ERROR,
Packit 13e616
								"ERR AD14: cannot allocate memory for cdg node or link in update_channel_dep_graph(...)\n");
Packit 13e616
							goto ERROR;
Packit 13e616
						}
Packit 13e616
						/* add the <s,d> combination / corresponding virtual lane to the VL table */
Packit 13e616
						vltable_insert
Packit 13e616
						    (srcdest2vl_table,
Packit 13e616
						     cl_hton16(slid),
Packit 13e616
						     cl_hton16(dlid),
Packit 13e616
						     test_vl);
Packit 13e616
						paths_per_vl[test_vl]++;
Packit 13e616
Packit 13e616
					}
Packit 13e616
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	dfsssp_ctx->srcdest2vl_table = srcdest2vl_table;
Packit 13e616
Packit 13e616
	/* test all cdg for cycles and break the cycles by moving paths on the weakest link to the next cdg */
Packit 13e616
	for (test_vl = 0; test_vl < vl_avail - 1; test_vl++) {
Packit 13e616
		start_here = cdg[test_vl];
Packit 13e616
		while (start_here) {
Packit 13e616
			cycle =
Packit 13e616
			    search_cycle_in_channel_dep_graph(cdg[test_vl],
Packit 13e616
							      start_here);
Packit 13e616
Packit 13e616
			if (cycle) {
Packit 13e616
				vl_needed = test_vl + 2;
Packit 13e616
Packit 13e616
				/* calc weakest link n cycle */
Packit 13e616
				weakest_link = get_weakest_link_in_cycle(cycle);
Packit 13e616
				if (!weakest_link) {
Packit 13e616
					OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
						"ERR AD27: something went wrong in get_weakest_link_in_cycle(...)\n");
Packit 13e616
					err = 1;
Packit 13e616
					goto ERROR;
Packit 13e616
				}
Packit 13e616
Packit 13e616
				paths_per_vl[test_vl] -=
Packit 13e616
				    weakest_link->num_pairs;
Packit 13e616
				paths_per_vl[test_vl + 1] +=
Packit 13e616
				    weakest_link->num_pairs;
Packit 13e616
Packit 13e616
				/* move all <s,d> paths on this link to the next cdg */
Packit 13e616
				for (i = 0; i < weakest_link->num_pairs; i++) {
Packit 13e616
					srcdest =
Packit 13e616
					    get_next_srcdest_pair(weakest_link,
Packit 13e616
								  i);
Packit 13e616
					slid = (uint16_t) (srcdest >> 16);
Packit 13e616
					dlid =
Packit 13e616
					    (uint16_t) ((srcdest << 16) >> 16);
Packit 13e616
Packit 13e616
					/* only move if not moved in a previous step */
Packit 13e616
					if (test_vl !=
Packit 13e616
					    (uint8_t)
Packit 13e616
					    vltable_get_vl(srcdest2vl_table,
Packit 13e616
							   cl_hton16(slid),
Packit 13e616
							   cl_hton16(dlid))) {
Packit 13e616
						/* this path has been moved
Packit 13e616
						   before -> don't count
Packit 13e616
						 */
Packit 13e616
						paths_per_vl[test_vl]++;
Packit 13e616
						paths_per_vl[test_vl + 1]--;
Packit 13e616
						continue;
Packit 13e616
					}
Packit 13e616
Packit 13e616
					src_port =
Packit 13e616
					    osm_get_port_by_lid(p_mgr->p_subn,
Packit 13e616
								cl_hton16
Packit 13e616
								(slid));
Packit 13e616
					dest_port =
Packit 13e616
					    osm_get_port_by_lid(p_mgr->p_subn,
Packit 13e616
								cl_hton16
Packit 13e616
								(dlid));
Packit 13e616
Packit 13e616
					/* remove path from current cdg / vl */
Packit 13e616
					err =
Packit 13e616
					    remove_path_from_cdg(&
Packit 13e616
								 (cdg[test_vl]),
Packit 13e616
								 src_port, slid,
Packit 13e616
								 dest_port,
Packit 13e616
								 dlid);
Packit 13e616
					if (err) {
Packit 13e616
						OSM_LOG(p_mgr->p_log,
Packit 13e616
							OSM_LOG_ERROR,
Packit 13e616
							"ERR AD44: something went wrong in remove_path_from_cdg(...)\n");
Packit 13e616
						goto ERROR;
Packit 13e616
					}
Packit 13e616
Packit 13e616
					/* add path to next cdg / vl */
Packit 13e616
					err =
Packit 13e616
					    update_channel_dep_graph(&
Packit 13e616
								     (cdg
Packit 13e616
								      [test_vl +
Packit 13e616
								       1]),
Packit 13e616
								     src_port,
Packit 13e616
								     slid,
Packit 13e616
								     dest_port,
Packit 13e616
								     dlid);
Packit 13e616
					if (err) {
Packit 13e616
						OSM_LOG(p_mgr->p_log,
Packit 13e616
							OSM_LOG_ERROR,
Packit 13e616
							"ERR AD14: cannot allocate memory for cdg node or link in update_channel_dep_graph(...)\n");
Packit 13e616
						goto ERROR;
Packit 13e616
					}
Packit 13e616
					vltable_insert(srcdest2vl_table,
Packit 13e616
						       cl_hton16(slid),
Packit 13e616
						       cl_hton16(dlid),
Packit 13e616
						       test_vl + 1);
Packit 13e616
				}
Packit 13e616
Packit 13e616
				if (weakest_link->num_pairs)
Packit 13e616
					free(weakest_link->srcdest_pairs);
Packit 13e616
				if (weakest_link)
Packit 13e616
					free(weakest_link);
Packit 13e616
			}
Packit 13e616
Packit 13e616
			start_here = cycle;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* test the last avail cdg for a cycle;
Packit 13e616
	   if there is one, than vl_needed > vl_avail
Packit 13e616
	 */
Packit 13e616
	start_here = cdg[vl_avail - 1];
Packit 13e616
	if (start_here) {
Packit 13e616
		cycle =
Packit 13e616
		    search_cycle_in_channel_dep_graph(cdg[vl_avail - 1],
Packit 13e616
						      start_here);
Packit 13e616
		if (cycle) {
Packit 13e616
			vl_needed = vl_avail + 1;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
		"Virtual Lanes needed: %" PRIu8 "\n", vl_needed);
Packit 13e616
	if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_INFO)) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
			"Paths per VL (before balancing):\n");
Packit 13e616
		for (i = 0; i < vl_avail; i++)
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
				"   %" PRIu32 ". lane: %" PRIu64 "\n", i,
Packit 13e616
				paths_per_vl[i]);
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Balancing the paths on the available Virtual Lanes\n");
Packit 13e616
Packit 13e616
	/* optimal balancing virtual lanes, under condition: no additional cycle checks;
Packit 13e616
	   sl/vl != 0 might be assigned to loopback packets (i.e. slid/dlid on the
Packit 13e616
	   same port for lmc>0), but thats no problem, see IBAS 10.2.2.3
Packit 13e616
	 */
Packit 13e616
	split_count = (uint8_t *) calloc(vl_avail, sizeof(uint8_t));
Packit 13e616
	if (!split_count) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD24: cannot allocate memory for split_count, skip balancing\n");
Packit 13e616
		err = 1;
Packit 13e616
		goto ERROR;
Packit 13e616
	}
Packit 13e616
	/* initial state: paths for VLs won't be separated */
Packit 13e616
	for (i = 0; i < ((vl_needed < vl_avail) ? vl_needed : vl_avail); i++)
Packit 13e616
		split_count[i] = 1;
Packit 13e616
	dfsssp_ctx->vl_split_count = split_count;
Packit 13e616
	/* balancing is necessary if we have empty VLs */
Packit 13e616
	if (vl_needed < vl_avail) {
Packit 13e616
		/* split paths of VLs until we find an equal distribution */
Packit 13e616
		for (i = vl_needed; i < vl_avail; i++) {
Packit 13e616
			/* find VL with most paths in it */
Packit 13e616
			vl = 0;
Packit 13e616
			most_avg_paths = 0.0;
Packit 13e616
			for (test_vl = 0; test_vl < vl_needed; test_vl++) {
Packit 13e616
				if (most_avg_paths <
Packit 13e616
				    ((double)paths_per_vl[test_vl] /
Packit 13e616
				    split_count[test_vl])) {
Packit 13e616
					vl = test_vl;
Packit 13e616
					most_avg_paths =
Packit 13e616
						(double)paths_per_vl[test_vl] /
Packit 13e616
						split_count[test_vl];
Packit 13e616
				}
Packit 13e616
			}
Packit 13e616
			split_count[vl]++;
Packit 13e616
		}
Packit 13e616
		/* change the VL assignment depending on split_count for
Packit 13e616
		   all VLs except VL 0
Packit 13e616
		 */
Packit 13e616
		for (from = vl_needed - 1; from > 0; from--) {
Packit 13e616
			/* how much space needed for others? */
Packit 13e616
			to = 0;
Packit 13e616
			for (i = 0; i < from; i++)
Packit 13e616
				to += split_count[i];
Packit 13e616
			count = paths_per_vl[from];
Packit 13e616
			vltable_change_vl(srcdest2vl_table, from, to, count);
Packit 13e616
			/* change also the information within the split_count
Packit 13e616
			   array; this is important for fast calculation later
Packit 13e616
			 */
Packit 13e616
			split_count[to] = split_count[from];
Packit 13e616
			split_count[from] = 0;
Packit 13e616
			paths_per_vl[to] = paths_per_vl[from];
Packit 13e616
			paths_per_vl[from] = 0;
Packit 13e616
		}
Packit 13e616
	} else if (vl_needed > vl_avail) {
Packit 13e616
		/* routing not possible, a further development would be the LASH-TOR approach (update: LASH-TOR isn't possible, there is a mistake in the theory) */
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD25: Not enough VLs available (avail=%d, needed=%d); Stopping dfsssp routing!\n",
Packit 13e616
			vl_avail, vl_needed);
Packit 13e616
		err = 1;
Packit 13e616
		goto ERROR;
Packit 13e616
	}
Packit 13e616
	/* else { no balancing } */
Packit 13e616
Packit 13e616
	if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Virtual Lanes per src/dest combination after balancing:\n");
Packit 13e616
		vltable_print(p_mgr, srcdest2vl_table);
Packit 13e616
	}
Packit 13e616
	if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_INFO)) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
			"Approx. #paths per VL (after balancing):\n");
Packit 13e616
		j = 0;
Packit 13e616
		count = 1; /* to prevent div. by 0 */
Packit 13e616
		for (i = 0; i < vl_avail; i++) {
Packit 13e616
			if (split_count[i] > 0) {
Packit 13e616
				j = i;
Packit 13e616
				count = split_count[i];
Packit 13e616
			}
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
				"   %" PRIu32 ". lane: %" PRIu64 "\n", i,
Packit 13e616
				paths_per_vl[j] / count);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	free(paths_per_vl);
Packit 13e616
Packit 13e616
	/* deallocate channel dependency graphs */
Packit 13e616
	for (i = 0; i < vl_avail; i++)
Packit 13e616
		cdg_dealloc(&cdg[i]);
Packit 13e616
	free(cdg);
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(p_mgr->p_log);
Packit 13e616
	return 0;
Packit 13e616
Packit 13e616
ERROR:
Packit 13e616
	free(paths_per_vl);
Packit 13e616
Packit 13e616
	for (i = 0; i < vl_avail; i++)
Packit 13e616
		cdg_dealloc(&cdg[i]);
Packit 13e616
	free(cdg);
Packit 13e616
Packit 13e616
	vltable_dealloc(&srcdest2vl_table);
Packit 13e616
	dfsssp_ctx->srcdest2vl_table = NULL;
Packit 13e616
Packit 13e616
	return err;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* meta function which calls subfunctions for dijkstra, update lft and weights,
Packit 13e616
   (and remove deadlocks) to calculate the routing for the subnet
Packit 13e616
*/
Packit 13e616
static int dfsssp_do_dijkstra_routing(void *context)
Packit 13e616
{
Packit 13e616
	dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context;
Packit 13e616
	osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr;
Packit 13e616
	vertex_t *adj_list = (vertex_t *) dfsssp_ctx->adj_list;
Packit 13e616
	uint32_t adj_list_size = dfsssp_ctx->adj_list_size;
Packit 13e616
	cl_heap_t heap;
Packit 13e616
Packit 13e616
	vertex_t **sw_list = NULL;
Packit 13e616
	uint32_t sw_list_size = 0;
Packit 13e616
	uint64_t guid = 0;
Packit 13e616
	cl_qlist_t *qlist = NULL;
Packit 13e616
	cl_list_item_t *qlist_item = NULL;
Packit 13e616
Packit 13e616
	cl_qmap_t *sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
Packit 13e616
	cl_qmap_t cn_tbl, io_tbl, *p_mixed_tbl = NULL;
Packit 13e616
	cl_map_item_t *item = NULL;
Packit 13e616
	osm_switch_t *sw = NULL;
Packit 13e616
	osm_port_t *port = NULL;
Packit 13e616
	uint32_t i = 0, err = 0;
Packit 13e616
	uint16_t lid = 0, min_lid_ho = 0, max_lid_ho = 0;
Packit 13e616
	uint8_t lmc = 0;
Packit 13e616
	boolean_t cn_nodes_provided = FALSE, io_nodes_provided = FALSE;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(p_mgr->p_log);
Packit 13e616
	OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
Packit 13e616
		"Calculating shortest path from all Hca/switches to all\n");
Packit 13e616
Packit 13e616
	cl_qmap_init(&cn_tbl);
Packit 13e616
	cl_qmap_init(&io_tbl);
Packit 13e616
	p_mixed_tbl = &cn_tbl;
Packit 13e616
Packit 13e616
	cl_qlist_init(&p_mgr->port_order_list);
Packit 13e616
Packit 13e616
	/* reset the new_lft for each switch */
Packit 13e616
	for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl);
Packit 13e616
	     item = cl_qmap_next(item)) {
Packit 13e616
		sw = (osm_switch_t *) item;
Packit 13e616
		/* initialize LIDs in buffer to invalid port number */
Packit 13e616
		memset(sw->new_lft, OSM_NO_PATH, sw->max_lid_ho + 1);
Packit 13e616
		/* initialize LFT and hop count for bsp0/esp0 of the switch */
Packit 13e616
		min_lid_ho = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
Packit 13e616
		lmc = osm_node_get_lmc(sw->p_node, 0);
Packit 13e616
		for (i = min_lid_ho; i < min_lid_ho + (1 << lmc); i++) {
Packit 13e616
			/* for each switch the port to the 'self'lid is the management port 0 */
Packit 13e616
			sw->new_lft[i] = 0;
Packit 13e616
			/* the hop count to the 'self'lid is 0 for each switch */
Packit 13e616
			osm_switch_set_hops(sw, i, 0, 0);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* construct the generic heap opject to use it in dijkstra */
Packit 13e616
	cl_heap_construct(&heap;;
Packit 13e616
Packit 13e616
	/* we need an intermediate array of pointers to switches in adj_list;
Packit 13e616
	   this array will be sorted in respect to num_hca (descending)
Packit 13e616
	 */
Packit 13e616
	sw_list_size = adj_list_size - 1;
Packit 13e616
	sw_list = (vertex_t **)malloc(sw_list_size * sizeof(vertex_t *));
Packit 13e616
	if (!sw_list) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD29: cannot allocate memory for sw_list in dfsssp_do_dijkstra_routing\n");
Packit 13e616
		goto ERROR;
Packit 13e616
	}
Packit 13e616
	memset(sw_list, 0, sw_list_size * sizeof(vertex_t *));
Packit 13e616
Packit 13e616
	/* fill the array with references to the 'real' sw in adj_list */
Packit 13e616
	for (i = 0; i < sw_list_size; i++)
Packit 13e616
		sw_list[i] = &(adj_list[i + 1]);
Packit 13e616
Packit 13e616
	/* sort the sw_list in descending order */
Packit 13e616
	sw_list_sort_by_num_hca(sw_list, sw_list_size);
Packit 13e616
Packit 13e616
	/* parse compute node guid file, if provided by the user */
Packit 13e616
	if (p_mgr->p_subn->opt.cn_guid_file) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Parsing compute nodes from file %s\n",
Packit 13e616
			p_mgr->p_subn->opt.cn_guid_file);
Packit 13e616
Packit 13e616
		if (parse_node_map(p_mgr->p_subn->opt.cn_guid_file,
Packit 13e616
				   add_guid_to_map, &cn_tbl)) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD33: Problem parsing compute node guid file\n");
Packit 13e616
			goto ERROR;
Packit 13e616
		}
Packit 13e616
Packit 13e616
		if (cl_is_qmap_empty(&cn_tbl))
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
				"WRN AD34: compute node guids file contains no valid guids\n");
Packit 13e616
		else
Packit 13e616
			cn_nodes_provided = TRUE;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* parse I/O guid file, if provided by the user */
Packit 13e616
	if (p_mgr->p_subn->opt.io_guid_file) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
			"Parsing I/O nodes from file %s\n",
Packit 13e616
			p_mgr->p_subn->opt.io_guid_file);
Packit 13e616
Packit 13e616
		if (parse_node_map(p_mgr->p_subn->opt.io_guid_file,
Packit 13e616
				   add_guid_to_map, &io_tbl)) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD35: Problem parsing I/O guid file\n");
Packit 13e616
			goto ERROR;
Packit 13e616
		}
Packit 13e616
Packit 13e616
		if (cl_is_qmap_empty(&io_tbl))
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
				"WRN AD36: I/O node guids file contains no valid guids\n");
Packit 13e616
		else
Packit 13e616
			io_nodes_provided = TRUE;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* if we mix Hca/Tca/SP0 during the dijkstra routing, we might end up
Packit 13e616
	   in rare cases with a bad balancing for Hca<->Hca connections, i.e.
Packit 13e616
	   some inter-switch links get oversubscribed with paths;
Packit 13e616
	   therefore: add Hca ports first to ensure good Hca<->Hca balancing
Packit 13e616
	 */
Packit 13e616
	if (cn_nodes_provided) {
Packit 13e616
		for (i = 0; i < adj_list_size - 1; i++) {
Packit 13e616
			if (sw_list[i] && sw_list[i]->sw) {
Packit 13e616
				sw = (osm_switch_t *)(sw_list[i]->sw);
Packit 13e616
				add_sw_endports_to_order_list(sw, p_mgr,
Packit 13e616
							      &cn_tbl, TRUE);
Packit 13e616
			} else {
Packit 13e616
				OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
					"ERR AD30: corrupted sw_list array in dfsssp_do_dijkstra_routing\n");
Packit 13e616
				goto ERROR;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	/* then: add Tca ports to ensure good Hca->Tca balancing and separate
Packit 13e616
	   paths towards I/O nodes on the same switch (if possible)
Packit 13e616
	 */
Packit 13e616
	if (io_nodes_provided) {
Packit 13e616
		for (i = 0; i < adj_list_size - 1; i++) {
Packit 13e616
			if (sw_list[i] && sw_list[i]->sw) {
Packit 13e616
				sw = (osm_switch_t *)(sw_list[i]->sw);
Packit 13e616
				add_sw_endports_to_order_list(sw, p_mgr,
Packit 13e616
							      &io_tbl, TRUE);
Packit 13e616
			} else {
Packit 13e616
				OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
					"ERR AD32: corrupted sw_list array in dfsssp_do_dijkstra_routing\n");
Packit 13e616
				goto ERROR;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	/* then: add anything else, such as administration nodes, ... */
Packit 13e616
	if (cn_nodes_provided && io_nodes_provided) {
Packit 13e616
		cl_qmap_merge(&cn_tbl, &io_tbl);
Packit 13e616
	} else if (io_nodes_provided) {
Packit 13e616
		p_mixed_tbl = &io_tbl;
Packit 13e616
	}
Packit 13e616
	for (i = 0; i < adj_list_size - 1; i++) {
Packit 13e616
		if (sw_list[i] && sw_list[i]->sw) {
Packit 13e616
			sw = (osm_switch_t *)(sw_list[i]->sw);
Packit 13e616
			add_sw_endports_to_order_list(sw, p_mgr, p_mixed_tbl,
Packit 13e616
						      FALSE);
Packit 13e616
		} else {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD39: corrupted sw_list array in dfsssp_do_dijkstra_routing\n");
Packit 13e616
			goto ERROR;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	/* last: add SP0 afterwards which have lower priority for balancing */
Packit 13e616
	for (i = 0; i < sw_list_size; i++) {
Packit 13e616
		if (sw_list[i] && sw_list[i]->sw) {
Packit 13e616
			sw = (osm_switch_t *)(sw_list[i]->sw);
Packit 13e616
			guid = cl_ntoh64(osm_node_get_node_guid(sw->p_node));
Packit 13e616
			add_guid_to_order_list(guid, p_mgr);
Packit 13e616
		} else {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
				"ERR AD31: corrupted sw_list array in dfsssp_do_dijkstra_routing\n");
Packit 13e616
			goto ERROR;
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* the intermediate array lived long enough */
Packit 13e616
	free(sw_list);
Packit 13e616
	sw_list = NULL;
Packit 13e616
	/* same is true for the compute node and I/O guid map */
Packit 13e616
	destroy_guid_map(&cn_tbl);
Packit 13e616
	cn_nodes_provided = FALSE;
Packit 13e616
	destroy_guid_map(&io_tbl);
Packit 13e616
	io_nodes_provided = FALSE;
Packit 13e616
Packit 13e616
	/* do the routing for the each Hca in the subnet and each switch
Packit 13e616
	   in the subnet (to add the routes to base/enhanced SP0)
Packit 13e616
	 */
Packit 13e616
	qlist = &p_mgr->port_order_list;
Packit 13e616
	for (qlist_item = cl_qlist_head(qlist);
Packit 13e616
	     qlist_item != cl_qlist_end(qlist);
Packit 13e616
	     qlist_item = cl_qlist_next(qlist_item)) {
Packit 13e616
		port = (osm_port_t *)cl_item_obj(qlist_item, port, list_item);
Packit 13e616
Packit 13e616
		/* calculate shortest path with dijkstra from node to all switches/Hca */
Packit 13e616
		if (osm_node_get_type(port->p_node) == IB_NODE_TYPE_CA) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"Processing Hca with GUID 0x%" PRIx64 "\n",
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid
Packit 13e616
					  (port->p_node)));
Packit 13e616
		} else if (osm_node_get_type(port->p_node) == IB_NODE_TYPE_SWITCH) {
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"Processing switch with GUID 0x%" PRIx64 "\n",
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid
Packit 13e616
					  (port->p_node)));
Packit 13e616
		} else {
Packit 13e616
			/* we don't handle routers, in case they show up */
Packit 13e616
			continue;
Packit 13e616
		}
Packit 13e616
Packit 13e616
		/* distribute the LID range across the ports that can reach those LIDs
Packit 13e616
		   to have disjoint paths for one destination port with lmc>0;
Packit 13e616
		   for switches with bsp0: min=max; with esp0: max>min if lmc>0
Packit 13e616
		 */
Packit 13e616
		osm_port_get_lid_range_ho(port, &min_lid_ho,
Packit 13e616
					  &max_lid_ho);
Packit 13e616
		for (lid = min_lid_ho; lid <= max_lid_ho; lid++) {
Packit 13e616
			/* do dijkstra from this Hca/LID/SP0 to each switch */
Packit 13e616
			err =
Packit 13e616
			    dijkstra(p_mgr, &heap, adj_list, adj_list_size,
Packit 13e616
				     port, lid);
Packit 13e616
			if (err)
Packit 13e616
				goto ERROR;
Packit 13e616
			if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG))
Packit 13e616
				print_routes(p_mgr, adj_list, adj_list_size,
Packit 13e616
					     port);
Packit 13e616
Packit 13e616
			/* make an update for the linear forwarding tables of the switches */
Packit 13e616
			err =
Packit 13e616
			    update_lft(p_mgr, adj_list, adj_list_size, port, lid);
Packit 13e616
			if (err)
Packit 13e616
				goto ERROR;
Packit 13e616
Packit 13e616
			/* add weights for calculated routes to adjust the weights for the next cycle */
Packit 13e616
			update_weights(p_mgr, adj_list, adj_list_size);
Packit 13e616
Packit 13e616
			if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG))
Packit 13e616
				dfsssp_print_graph(p_mgr, adj_list,
Packit 13e616
						   adj_list_size);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* try deadlock removal only for the dfsssp routing (not for the sssp case, which is a subset of the dfsssp algorithm) */
Packit 13e616
	if (dfsssp_ctx->routing_type == OSM_ROUTING_ENGINE_TYPE_DFSSSP) {
Packit 13e616
		/* remove potential deadlocks by assigning different virtual lanes to src/dest paths and balance the lanes */
Packit 13e616
		err = dfsssp_remove_deadlocks(dfsssp_ctx);
Packit 13e616
		if (err)
Packit 13e616
			goto ERROR;
Packit 13e616
	} else if (dfsssp_ctx->routing_type == OSM_ROUTING_ENGINE_TYPE_SSSP) {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
Packit 13e616
			"SSSP routing specified -> skipping deadlock removal thru dfsssp_remove_deadlocks(...)\n");
Packit 13e616
	} else {
Packit 13e616
		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD28: wrong routing engine specified in dfsssp_ctx\n");
Packit 13e616
		goto ERROR;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* list not needed after the dijkstra steps and deadlock removal */
Packit 13e616
	cl_qlist_remove_all(&p_mgr->port_order_list);
Packit 13e616
Packit 13e616
	/* delete the heap which is not needed anymore */
Packit 13e616
	cl_heap_destroy(&heap;;
Packit 13e616
Packit 13e616
	/* print the new_lft for each switch after routing is done */
Packit 13e616
	if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) {
Packit 13e616
		for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl);
Packit 13e616
		     item = cl_qmap_next(item)) {
Packit 13e616
			sw = (osm_switch_t *) item;
Packit 13e616
			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
				"Summary of the (new) LFT for switch 0x%" PRIx64
Packit 13e616
				" (%s):\n",
Packit 13e616
				cl_ntoh64(osm_node_get_node_guid(sw->p_node)),
Packit 13e616
				sw->p_node->print_desc);
Packit 13e616
			for (i = 0; i < sw->max_lid_ho + 1; i++)
Packit 13e616
				if (sw->new_lft[i] != OSM_NO_PATH) {
Packit 13e616
					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
Packit 13e616
						"   for LID=%" PRIu32
Packit 13e616
						" use port=%" PRIu8 "\n", i,
Packit 13e616
						sw->new_lft[i]);
Packit 13e616
				}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	OSM_LOG_EXIT(p_mgr->p_log);
Packit 13e616
	return 0;
Packit 13e616
Packit 13e616
ERROR:
Packit 13e616
	if (!cl_is_qlist_empty(&p_mgr->port_order_list))
Packit 13e616
		cl_qlist_remove_all(&p_mgr->port_order_list);
Packit 13e616
	if (cn_nodes_provided)
Packit 13e616
		destroy_guid_map(&cn_tbl);
Packit 13e616
	if (io_nodes_provided)
Packit 13e616
		destroy_guid_map(&io_tbl);
Packit 13e616
	if (sw_list)
Packit 13e616
		free(sw_list);
Packit 13e616
	if (cl_is_heap_inited(&heap))
Packit 13e616
		cl_heap_destroy(&heap;;
Packit 13e616
	return -1;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* meta function which calls subfunctions for finding the optimal switch
Packit 13e616
   for the spanning tree, performing a dijkstra step with this sw as root,
Packit 13e616
   and calculating the mcast table for MLID
Packit 13e616
*/
Packit 13e616
static ib_api_status_t dfsssp_do_mcast_routing(void * context,
Packit 13e616
					       osm_mgrp_box_t * mbox)
Packit 13e616
{
Packit 13e616
	dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context;
Packit 13e616
	osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr;
Packit 13e616
	osm_sm_t *sm = (osm_sm_t *) p_mgr->sm;
Packit 13e616
	vertex_t *adj_list = (vertex_t *) dfsssp_ctx->adj_list;
Packit 13e616
	uint32_t adj_list_size = dfsssp_ctx->adj_list_size;
Packit 13e616
	cl_qlist_t mcastgrp_port_list;
Packit 13e616
	cl_qmap_t mcastgrp_port_map;
Packit 13e616
	osm_switch_t *root_sw = NULL, *p_sw = NULL;
Packit 13e616
	osm_port_t *port = NULL;
Packit 13e616
	ib_net16_t lid = 0;
Packit 13e616
	uint32_t err = 0, num_ports = 0, i = 0;
Packit 13e616
	ib_net64_t guid = 0;
Packit 13e616
	ib_api_status_t status = IB_SUCCESS;
Packit 13e616
	cl_heap_t heap;
Packit 13e616
Packit 13e616
	OSM_LOG_ENTER(sm->p_log);
Packit 13e616
Packit 13e616
	/* using the ucast cache feature with dfsssp might mean that a leaf sw
Packit 13e616
	   got removed (and got back) without calling dfsssp_build_graph
Packit 13e616
	   and therefore the adj_list (and pointers to osm's internal switches)
Packit 13e616
	   could be outdated (here we have no knowledge if it has happened, so
Packit 13e616
	   unfortunately a check is necessary... still better than rebuilding
Packit 13e616
	   adj_list every time we arrive here)
Packit 13e616
	 */
Packit 13e616
	if (p_mgr->p_subn->opt.use_ucast_cache && p_mgr->cache_valid) {
Packit 13e616
		for (i = 1; i < adj_list_size; i++) {
Packit 13e616
			guid = cl_hton64(adj_list[i].guid);
Packit 13e616
			p_sw = osm_get_switch_by_guid(p_mgr->p_subn, guid);
Packit 13e616
			if (p_sw) {
Packit 13e616
				/* check if switch came back from the dead */
Packit 13e616
				if (adj_list[i].dropped)
Packit 13e616
					adj_list[i].dropped = FALSE;
Packit 13e616
Packit 13e616
				/* verify that sw object has not been moved
Packit 13e616
				   (this can happen for a leaf switch, if it
Packit 13e616
				   was dropped and came back later without a
Packit 13e616
				   rerouting), otherwise we have to update
Packit 13e616
				   dfsssp's internal switch list with the new
Packit 13e616
				   sw pointer
Packit 13e616
				 */
Packit 13e616
				if (p_sw == adj_list[i].sw)
Packit 13e616
					continue;
Packit 13e616
				else
Packit 13e616
					adj_list[i].sw = p_sw;
Packit 13e616
			} else {
Packit 13e616
				/* if a switch from adj_list is not in the
Packit 13e616
				   sw_guid_tbl anymore, then the only reason is
Packit 13e616
				   that it was a leaf switch and opensm dropped
Packit 13e616
				   it without calling a rerouting
Packit 13e616
				   -> calling dijkstra is no problem, since it
Packit 13e616
				      is a leaf and different from root_sw
Packit 13e616
				   -> only update_mcft and reset_mgrp_membership
Packit 13e616
				      need to be aware of these dropped switches
Packit 13e616
				 */
Packit 13e616
				if (!adj_list[i].dropped)
Packit 13e616
					adj_list[i].dropped = TRUE;
Packit 13e616
			}
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* construct the generic heap opject to use it in dijkstra */
Packit 13e616
	cl_heap_construct(&heap;;
Packit 13e616
Packit 13e616
	/* create a map and a list of all ports which are member in the mcast
Packit 13e616
	   group; map for searching elements and list for iteration
Packit 13e616
	 */
Packit 13e616
	if (osm_mcast_make_port_list_and_map(&mcastgrp_port_list,
Packit 13e616
					     &mcastgrp_port_map, mbox)) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD50: "
Packit 13e616
			"Insufficient memory to make port list\n");
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	num_ports = cl_qlist_count(&mcastgrp_port_list);
Packit 13e616
	if (num_ports < 2) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
Packit 13e616
			"MLID 0x%X has %u members - nothing to do\n",
Packit 13e616
			mbox->mlid, num_ports);
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* find the root switch for the spanning tree, which has the smallest
Packit 13e616
	   hops count to all LIDs in the mcast group
Packit 13e616
	 */
Packit 13e616
	root_sw = osm_mcast_mgr_find_root_switch(sm, &mcastgrp_port_list);
Packit 13e616
	if (!root_sw) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD51: "
Packit 13e616
			"Unable to locate a suitable switch for group 0x%X\n",
Packit 13e616
			mbox->mlid);
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* a) start one dijkstra step from the root switch to generate a
Packit 13e616
	   spanning tree
Packit 13e616
	   b) this might be a bit of an overkill to span the whole
Packit 13e616
	   network, if there are only a few ports in the mcast group, but
Packit 13e616
	   its only one dijkstra step for each mcast group and we did many
Packit 13e616
	   steps before in the ucast routing for each LID in the subnet;
Packit 13e616
	   c) we can use the subnet structure from the ucast routing, and
Packit 13e616
	   don't even have to reset the link weights (=> therefore the mcast
Packit 13e616
	   spanning tree will use less 'growded' links in the network)
Packit 13e616
	   d) the mcast dfsssp algorithm will not change the link weights
Packit 13e616
	 */
Packit 13e616
	lid = osm_node_get_base_lid(root_sw->p_node, 0);
Packit 13e616
	port = osm_get_port_by_lid(sm->p_subn, lid);
Packit 13e616
	err = dijkstra(p_mgr, &heap, adj_list, adj_list_size, port, lid);
Packit 13e616
	if (err) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD52: "
Packit 13e616
			"Dijkstra step for mcast failed for group 0x%X\n",
Packit 13e616
			mbox->mlid);
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* set mcast group membership again for update_mcft
Packit 13e616
	   (unfortunately: osm_mcast_mgr_find_root_switch resets it)
Packit 13e616
	 */
Packit 13e616
	update_mgrp_membership(&mcastgrp_port_list);
Packit 13e616
Packit 13e616
	/* update the mcast forwarding tables of the switches */
Packit 13e616
	err = update_mcft(sm, adj_list, adj_list_size, mbox->mlid,
Packit 13e616
			  &mcastgrp_port_map, root_sw);
Packit 13e616
	if (err) {
Packit 13e616
		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD53: "
Packit 13e616
			"Update of mcast forwarding tables failed for group 0x%X\n",
Packit 13e616
			mbox->mlid);
Packit 13e616
		status = IB_ERROR;
Packit 13e616
		goto Exit;
Packit 13e616
	}
Packit 13e616
Packit 13e616
Exit:
Packit 13e616
	if (cl_is_heap_inited(&heap))
Packit 13e616
		cl_heap_destroy(&heap;;
Packit 13e616
	reset_mgrp_membership(adj_list, adj_list_size);
Packit 13e616
	osm_mcast_drop_port_list(&mcastgrp_port_list);
Packit 13e616
	OSM_LOG_EXIT(sm->p_log);
Packit 13e616
	return status;
Packit 13e616
}
Packit 13e616
Packit 13e616
/* called from extern in QP creation process to gain the the service level and
Packit 13e616
   the virtual lane respectively for a <s,d> pair
Packit 13e616
*/
Packit 13e616
static uint8_t get_dfsssp_sl(void *context, uint8_t hint_for_default_sl,
Packit 13e616
			     const ib_net16_t slid, const ib_net16_t dlid)
Packit 13e616
{
Packit 13e616
	dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context;
Packit 13e616
	osm_port_t *src_port, *dest_port;
Packit 13e616
	vltable_t *srcdest2vl_table = NULL;
Packit 13e616
	uint8_t *vl_split_count = NULL;
Packit 13e616
	osm_ucast_mgr_t *p_mgr = NULL;
Packit 13e616
	int32_t res = 0;
Packit 13e616
Packit 13e616
	if (dfsssp_ctx
Packit 13e616
	    && dfsssp_ctx->routing_type == OSM_ROUTING_ENGINE_TYPE_DFSSSP) {
Packit 13e616
		p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr;
Packit 13e616
		srcdest2vl_table = (vltable_t *) (dfsssp_ctx->srcdest2vl_table);
Packit 13e616
		vl_split_count = (uint8_t *) (dfsssp_ctx->vl_split_count);
Packit 13e616
	}
Packit 13e616
	else
Packit 13e616
		return hint_for_default_sl;
Packit 13e616
Packit 13e616
	src_port = osm_get_port_by_lid(p_mgr->p_subn, slid);
Packit 13e616
	if (!src_port)
Packit 13e616
		return hint_for_default_sl;
Packit 13e616
Packit 13e616
	dest_port = osm_get_port_by_lid(p_mgr->p_subn, dlid);
Packit 13e616
	if (!dest_port)
Packit 13e616
		return hint_for_default_sl;
Packit 13e616
Packit 13e616
	if (!srcdest2vl_table)
Packit 13e616
		return hint_for_default_sl;
Packit 13e616
Packit 13e616
	res = vltable_get_vl(srcdest2vl_table, slid, dlid);
Packit 13e616
Packit 13e616
	/* we will randomly distribute the traffic over multiple VLs if
Packit 13e616
	   necessary for good balancing; therefore vl_split_count provides
Packit 13e616
	   the number of VLs to use for certain traffic
Packit 13e616
	 */
Packit 13e616
	if (res > -1) {
Packit 13e616
		if (vl_split_count[res] > 1)
Packit 13e616
			return (uint8_t) (res + rand()%(vl_split_count[res]));
Packit 13e616
		else
Packit 13e616
			return (uint8_t) res;
Packit 13e616
	} else
Packit 13e616
		return hint_for_default_sl;
Packit 13e616
}
Packit 13e616
Packit 13e616
static dfsssp_context_t *dfsssp_context_create(osm_opensm_t * p_osm,
Packit 13e616
					       osm_routing_engine_type_t
Packit 13e616
					       routing_type)
Packit 13e616
{
Packit 13e616
	dfsssp_context_t *dfsssp_ctx = NULL;
Packit 13e616
Packit 13e616
	/* allocate memory */
Packit 13e616
	dfsssp_ctx = (dfsssp_context_t *) malloc(sizeof(dfsssp_context_t));
Packit 13e616
	if (dfsssp_ctx) {
Packit 13e616
		/* set initial values */
Packit 13e616
		dfsssp_ctx->routing_type = routing_type;
Packit 13e616
		dfsssp_ctx->p_mgr = (osm_ucast_mgr_t *) & (p_osm->sm.ucast_mgr);
Packit 13e616
		dfsssp_ctx->adj_list = NULL;
Packit 13e616
		dfsssp_ctx->adj_list_size = 0;
Packit 13e616
		dfsssp_ctx->srcdest2vl_table = NULL;
Packit 13e616
		dfsssp_ctx->vl_split_count = NULL;
Packit 13e616
	} else {
Packit 13e616
		OSM_LOG(p_osm->sm.ucast_mgr.p_log, OSM_LOG_ERROR,
Packit 13e616
			"ERR AD04: cannot allocate memory for dfsssp_ctx in dfsssp_context_create\n");
Packit 13e616
		return NULL;
Packit 13e616
	}
Packit 13e616
Packit 13e616
	return dfsssp_ctx;
Packit 13e616
}
Packit 13e616
Packit 13e616
static void dfsssp_context_destroy(void *context)
Packit 13e616
{
Packit 13e616
	dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context;
Packit 13e616
	vertex_t *adj_list = (vertex_t *) (dfsssp_ctx->adj_list);
Packit 13e616
	uint32_t i = 0;
Packit 13e616
	link_t *link = NULL, *tmp = NULL;
Packit 13e616
Packit 13e616
	/* free adj_list */
Packit 13e616
	for (i = 0; i < dfsssp_ctx->adj_list_size; i++) {
Packit 13e616
		link = adj_list[i].links;
Packit 13e616
		while (link) {
Packit 13e616
			tmp = link;
Packit 13e616
			link = link->next;
Packit 13e616
			free(tmp);
Packit 13e616
		}
Packit 13e616
	}
Packit 13e616
	free(adj_list);
Packit 13e616
	dfsssp_ctx->adj_list = NULL;
Packit 13e616
	dfsssp_ctx->adj_list_size = 0;
Packit 13e616
Packit 13e616
	/* free srcdest2vl table and the split count information table
Packit 13e616
	   (can be done, because dfsssp_context_destroy is called after
Packit 13e616
	    osm_get_dfsssp_sl)
Packit 13e616
	 */
Packit 13e616
	vltable_dealloc(&(dfsssp_ctx->srcdest2vl_table));
Packit 13e616
	dfsssp_ctx->srcdest2vl_table = NULL;
Packit 13e616
Packit 13e616
	if (dfsssp_ctx->vl_split_count) {
Packit 13e616
		free(dfsssp_ctx->vl_split_count);
Packit 13e616
		dfsssp_ctx->vl_split_count = NULL;
Packit 13e616
	}
Packit 13e616
}
Packit 13e616
Packit 13e616
static void delete(void *context)
Packit 13e616
{
Packit 13e616
	if (!context)
Packit 13e616
		return;
Packit 13e616
	dfsssp_context_destroy(context);
Packit 13e616
Packit 13e616
	free(context);
Packit 13e616
}
Packit 13e616
Packit 13e616
int osm_ucast_dfsssp_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm)
Packit 13e616
{
Packit 13e616
	/* create context container and add ucast management object */
Packit 13e616
	dfsssp_context_t *dfsssp_context =
Packit 13e616
	    dfsssp_context_create(p_osm, OSM_ROUTING_ENGINE_TYPE_DFSSSP);
Packit 13e616
	if (!dfsssp_context) {
Packit 13e616
		return 1;	/* alloc failed -> skip this routing */
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* reset function pointers to dfsssp routines */
Packit 13e616
	r->context = (void *)dfsssp_context;
Packit 13e616
	r->build_lid_matrices = dfsssp_build_graph;
Packit 13e616
	r->ucast_build_fwd_tables = dfsssp_do_dijkstra_routing;
Packit 13e616
	r->mcast_build_stree = dfsssp_do_mcast_routing;
Packit 13e616
	r->path_sl = get_dfsssp_sl;
Packit 13e616
	r->destroy = delete;
Packit 13e616
Packit 13e616
	/* we initialize with the current time to achieve a 'good' randomized
Packit 13e616
	   assignment in get_dfsssp_sl(...)
Packit 13e616
	 */
Packit 13e616
	srand(time(NULL));
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
}
Packit 13e616
Packit 13e616
int osm_ucast_sssp_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm)
Packit 13e616
{
Packit 13e616
	/* create context container and add ucast management object */
Packit 13e616
	dfsssp_context_t *dfsssp_context =
Packit 13e616
	    dfsssp_context_create(p_osm, OSM_ROUTING_ENGINE_TYPE_SSSP);
Packit 13e616
	if (!dfsssp_context) {
Packit 13e616
		return 1;	/* alloc failed -> skip this routing */
Packit 13e616
	}
Packit 13e616
Packit 13e616
	/* reset function pointers to sssp routines */
Packit 13e616
	r->context = (void *)dfsssp_context;
Packit 13e616
	r->build_lid_matrices = dfsssp_build_graph;
Packit 13e616
	r->ucast_build_fwd_tables = dfsssp_do_dijkstra_routing;
Packit 13e616
	r->mcast_build_stree = dfsssp_do_mcast_routing;
Packit 13e616
	r->destroy = delete;
Packit 13e616
Packit 13e616
	return 0;
Packit 13e616
}