From 54dbc3bdc0d5e3b1dd20874ba38a9b5190b017a2 Mon Sep 17 00:00:00 2001 From: Packit Service Date: Dec 10 2020 00:13:11 +0000 Subject: opensm-3.3.23 base --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bbee270 --- /dev/null +++ b/.gitignore @@ -0,0 +1,44 @@ +*.la +*.lo +*.o +.deps +.libs +aclocal.m4 +autom4te.cache/ +config.log +config.status +config/compile +config/config.guess +config/config.sub +config/depcomp +config/install-sh +config/libtool.m4 +config/ltmain.sh +config/ltoptions.m4 +config/ltsugar.m4 +config/ltversion.m4 +config/lt~obsolete.m4 +config/missing +config/ylwrap +configure +include/config.h +include/config.h.in +include/opensm/osm_config.h +include/opensm/osm_version.h +include/opensm/stamp-h2 +include/stamp-h1 +libtool +Makefile +Makefile.in +man/opensm.8 +man/torus-2QoS.8 +man/torus-2QoS.conf.5 +opensm.spec +opensm/opensm +opensm/osm_qos_parser_l.c +opensm/osm_qos_parser_y.c +opensm/osm_qos_parser_y.h +osmtest/osmtest +scripts/opensm.init +scripts/redhat-opensm.init +scripts/sldd.sh diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..a05dcfe --- /dev/null +++ b/.travis.yml @@ -0,0 +1,20 @@ +language: c +dist: xenial +addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - bison + - flex + - autoconf + - automake + - gcc-8 + - libtool-bin + - make + - sparse + - wget + - libibumad-dev +script: + - scripts/travis-checkpatch + - scripts/travis-build diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..0106a07 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,9 @@ + +By the chronological order of involvement: +Steve King, Intel +Anil Keshavamurthy, Intel +Eitan Zahavi, Mellanox Technologies, eitan@mellanox.co.il +Yael Kalka, Mellanox Technologies, yael@mellanox.co.il +Shahar Frank, Voltaire +Hal Rosenstock, Voltaire, halr@voltaire.com +Sasha Khapyorsky, Voltaire, sashak@voltaire.com diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..07e5ab0 --- /dev/null +++ b/COPYING @@ -0,0 +1,32 @@ + Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + + This software is available to you under a choice of one of two + licenses. You may choose to be licensed under the terms of the GNU + General Public License (GPL) Version 2, available from the file + COPYING in the main directory of this source tree, or the + OpenIB.org BSD license below: + + Redistribution and use in source and binary forms, with or + without modification, are permitted provided that the following + conditions are met: + + - Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + - Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..cb67d2b --- /dev/null +++ b/ChangeLog @@ -0,0 +1,14 @@ +2005-09-12 Hal Rosenstock + + * Improved SA MCMemberRecord error messages + +2005-08-22 Yael Kalka + + * Merge of OpenSM 1.8.0 previously available only on Gen1 + +2005-08-14 Eitan Zahavi + + * Provided a top level auto tools project so there is no need to + cd into each of the sub directories and do: + ./autogen.sh && configure && make && make install + diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..095b1eb --- /dev/null +++ b/INSTALL @@ -0,0 +1,231 @@ +Installation Instructions +************************* + +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004 Free +Software Foundation, Inc. + +This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + +Basic Installation +================== + +These are generic installation instructions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. (Caching is +disabled by default to prevent problems with accidental use of stale +cache files.) + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You only need +`configure.ac' if you want to change it or regenerate `configure' using +a newer version of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. If you're + using `csh' on an old version of System V, you might need to type + `sh ./configure' instead to prevent `csh' from trying to execute + `configure' itself. + + Running `configure' takes awhile. While running, it prints some + messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + +Some systems require unusual options for compilation or linking that the +`configure' script does not know about. Run `./configure --help' for +details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + +You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you must use a version of `make' that +supports the `VPATH' variable, such as GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + If you have to use a `make' that does not support the `VPATH' +variable, you have to compile the package for one architecture at a +time in the source code directory. After you have installed the +package for one architecture, use `make distclean' before reconfiguring +for another architecture. + +Installation Names +================== + +By default, `make install' will install the package's files in +`/usr/local/bin', `/usr/local/man', etc. You can specify an +installation prefix other than `/usr/local' by giving `configure' the +option `--prefix=PREFIX'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +give `configure' the option `--exec-prefix=PREFIX', the package will +use PREFIX as the prefix for installing programs and libraries. +Documentation and other data files will still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + +Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + +There may be some features `configure' cannot figure out automatically, +but needs to determine by the type of machine the package will run on. +Usually, assuming the package is built to be run on the _same_ +architectures, `configure' can figure that out, but if it prints a +message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the `--target=TYPE' option to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + +If you want to set default values for `configure' scripts to share, you +can create a site shell script called `config.site' that gives default +values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + +Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +will cause the specified gcc to be used as the C compiler (unless it is +overridden in the site shell script). + +`configure' Invocation +====================== + +`configure' recognizes the following options to control how it operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. + diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..0bd48b4 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,27 @@ + +# note that order matters: make the libs first then use them +SUBDIRS = complib libopensm libvendor opensm osmtest include $(DEFAULT_EVENT_PLUGIN) +DIST_SUBDIRS = complib libopensm libvendor opensm osmtest include osmeventplugin osmroutingplugin + +ACLOCAL_AMFLAGS = -I config + +# we should provide a hint for other apps about the build mode of this project +install-exec-hook: + $(top_srcdir)/config/install-sh -m 755 -d $(DESTDIR)/$(sysconfdir)/init.d + cp $(top_builddir)/scripts/opensm.init $(DESTDIR)/$(sysconfdir)/init.d/opensmd + chmod 755 $(DESTDIR)/$(sysconfdir)/init.d/opensmd + + +man_MANS = man/opensm.8 man/osmtest.8 man/torus-2QoS.8 man/torus-2QoS.conf.5 + +various_scripts = $(wildcard scripts/*) +docs = doc/performance-manager-HOWTO.txt doc/QoS_management_in_OpenSM.txt \ + doc/partition-config.txt doc/opensm-sriov.txt \ + doc/current-routing.txt doc/opensm_release_notes-3.3.txt + +EXTRA_DIST = autogen.sh opensm.spec $(various_scripts) $(man_MANS) $(docs) + +dist-hook: $(EXTRA_DIST) + if [ -x $(top_srcdir)/gen_chlog.sh ] ; then \ + cd $(top_srcdir); ./gen_chlog.sh > $(distdir)/ChangeLog ; cd - ; \ + fi diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..8887552 --- /dev/null +++ b/NEWS @@ -0,0 +1,2 @@ + +This file will hold news about the OpenSM project. diff --git a/README b/README new file mode 100644 index 0000000..40556ba --- /dev/null +++ b/README @@ -0,0 +1,25 @@ +OpenSM README: +-------------- + +OpenSM provides an implementation for an InfiniBand Subnet Manager and +Administrator. Such a software entity is required to run for in order +to initialize the InfiniBand hardware (at least one per each +InfiniBand subnet). + +The full list of OpenSM features is described in the user manual +provided in the doc sub directory. + +The installation of OpenSM includes: + +sbin/ + opensm - the SM/SA executable + osmtest - a test program for the SM/SA +lib/ + libosmcomp.{a,so} - component library with generic services and containers + libopensm.{a,so} - opensm services for logs and mad buffer pool + libosmvendor.{a,so} - interface to the user mad service of the driver +include/ + iba/ib_types.h - IBA types header file + complib/ - component library includes + vendor/ - vendor library includes + opensm/ - public opensm library includes diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..fee8800 --- /dev/null +++ b/autogen.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# We change dir since the later utilities assume to work in the project dir +cd ${0%*/*} + +# make sure autoconf is up-to-date +ac_ver=`autoconf --version | head -n 1 | awk '{print $NF}'` +ac_maj=`echo $ac_ver|sed 's/\..*//'` +ac_min=`echo $ac_ver|sed 's/.*\.//'` +if [[ $ac_maj -lt 2 ]]; then + echo Min autoconf version is 2.57 + exit 1 +elif [[ $ac_maj -eq 2 && $ac_min -lt 57 ]]; then + echo Min autoconf version is 2.57 + exit 1 +fi + +# make sure automake is up-to-date +am_ver=`automake --version | head -n 1 | awk '{print $NF}'` +am_maj=`echo $am_ver|sed 's/\..*//'` +am_min=`echo $am_ver|sed 's/[^\.]*\.\([^\.]*\)\.*.*/\1/'` +am_sub=`echo $am_ver|sed 's/[^\.]*\.[^\.]*\.*//'` +if [[ $am_maj -lt 1 ]]; then + echo Min automake version is 1.6.3 + exit 1 +elif [[ $am_maj -eq 1 && $am_min -lt 6 ]]; then + echo "automake version is too old:$am_maj.$am_min.$am_sub < required 1.6.3" + exit 1 +elif [[ $am_maj -eq 1 && $am_min -eq 6 && $am_sub -lt 3 ]]; then + echo "automake version is too old:$am_maj.$am_min.$am_sub < required 1.6.3" + exit 1 +fi + +# make sure libtool is up-to-date +lt_ver=`libtool --version | head -n 1 | awk '{print $4}'` +lt_maj=`echo $lt_ver|sed 's/\..*//'` +lt_min=`echo $lt_ver|sed 's/[^\.]*\.\([^\.]*\)\.*.*/\1/'` +lt_sub=`echo $lt_ver|sed 's/[^\.]*\.[^\.]*\.*//'` +if [[ $lt_maj -lt 1 ]]; then + echo Min libtool version is 1.4.2 + exit 1 +elif [[ $lt_maj -eq 1 && $lt_min -lt 4 ]]; then + echo "libtool version is too old:$lt_maj.$lt_min.$lt_sub < required 1.4.2" + exit 1 +elif [[ $lt_maj -eq 1 && $lt_min -eq 4 && $lt_sub -lt 2 ]]; then + echo "libtool version is too old:$lt_maj.$lt_min.$lt_sub < required 1.4.2" + exit 1 +fi + +# cleanup +find . \( -name Makefile.in -o -name aclocal.m4 -o -name autom4te.cache -o -name configure -o -name aclocal.m4 \) -exec \rm -rf {} \; -prune + +aclocal -I config && \ +libtoolize --force --copy && \ +autoheader && \ +automake --foreign --add-missing --copy && \ +autoconf diff --git a/complib/ChangeLog b/complib/ChangeLog new file mode 100644 index 0000000..2b13147 --- /dev/null +++ b/complib/ChangeLog @@ -0,0 +1,96 @@ +2007-07=11 Hal Rosenstock + + * configure.in: to version 2.2.1 + +2007-06-25 Hal Rosenstock + + * cl_event_wheel.c: Fix some typos in printfs when + __CL_EVENT_WHEEL_TEST__ defined + +2007-06-20 Hal Rosenstock + + * libosmcomp.map: Add get_next map functions as global + +2007-06-20 Todd Rimmer + + * include/complib/cl_map.h, include/complib/cl_qmap.h, + include/complib/cl_fleximap.h, cl_map.c: + Add get_next functions to the various maps + + * include/complib/cl_fleximap.h: In cl_fmap_remove_all, make + sure the count field is properly maintained. + +2007-06-19 Todd Rimmer + + * include/complib/cl_qmap.h: In cl_qmap_remove_all, make + sure the count field is properly maintained. + +2007-06-19 Hal Rosenstock + + * include/complib/cl_threadpool.h: Eliminate compile warning + with cl_threadpool.c introduced by previous change + +2007-06-13 Sasha Khapyorsky + + * include/complib/cl_threadpool.h, complib/cl_threadpool.c, + complib/cl_dispatcher.c, complib/libosmcomp.map: Thread + pool rework + +2007-06-13 Hal Rosenstock + + * configure.in: Bump to version 2.2.0 + + * libosmcomp.ver, libosmcomp.map: Update version info for + previous API removals + + * include/complib/cl_memory.h, include/complib/cl_memtrack.h, + complib/cl_memory.c, complib/cl_memtrack.c, include/Makefile.am: + Remove deprecated memory allocation related routines + +2007-06-13 Yevgeny Kliteynik + + * include/complib/cl_perf.h, include/complib/cl_async_proc.h, + complib/cl_perf.c, complib/cl_async_proc.c, Makefile.am, + libosmcomp.map: Remove unused cl_perf and cl_async_proc + +2007-05-09 Hal Rosenstock + + * configure.in: Bump to version 2.1.2 + +2007-03-29 Hal Rosenstock + + * configure.in: Bump to version 2.1.1 + +2007-01-08 Sasha Khapyorsky + + * cl_log.c: SIGUSR1 fixes + +2007-01-08 Ira Weiny + + * cl_log.c: Add SIGUSR1 handling to reopen osm.log + +2006-10-31 Hal Rosenstock + + * configure.in: Bumped to version version 2.1.0 + +2006-09-05 Sasha Khapyorsky + + * cl_event_wheel.c: Changes to support new osm_log + initializer osm_log_init_v2() + +2006-08-29 Sasha Khapyorsky + + * cl_event_wheel.c: Support option to limit size of OpenSM + log file + +2006-07-20 Sasha Khapyorsky + + * cl_pool.c: Fix memory corruption in cl_qcpool_init + +2006-07-19 Hal Rosenstock + + * Makefile.am: Eliminate deprecated warnings + +2006-06-11 Hal Rosenstock + + * configure.in: Released version 1.2.1 (OFED 1.1) diff --git a/complib/Makefile.am b/complib/Makefile.am new file mode 100644 index 0000000..21e496a --- /dev/null +++ b/complib/Makefile.am @@ -0,0 +1,77 @@ + +AM_CPPFLAGS = -I$(srcdir)/../include + +lib_LTLIBRARIES = libosmcomp.la + +if DEBUG +DBGFLAGS = -ggdb -D_DEBUG_ +else +DBGFLAGS = -g +endif + +libosmcomp_la_CFLAGS = -Wall -Wwrite-strings $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_GNU_SOURCE=1 + +if HAVE_LD_VERSION_SCRIPT + libosmcomp_version_script = -Wl,--version-script=$(srcdir)/libosmcomp.map +else + libosmcomp_version_script = +endif + +complib_api_version=$(shell grep LIBVERSION= $(srcdir)/libosmcomp.ver | sed 's/LIBVERSION=//') + +libosmcomp_la_SOURCES = cl_complib.c cl_dispatcher.c \ + cl_event.c cl_event_wheel.c \ + cl_list.c cl_log.c cl_map.c \ + cl_pool.c cl_ptr_vector.c \ + cl_spinlock.c cl_statustext.c \ + cl_thread.c cl_threadpool.c \ + cl_timer.c cl_vector.c \ + cl_heap.c ib_statustext.c \ + cl_nodenamemap.c + +libosmcomp_la_LDFLAGS = -version-info $(complib_api_version) \ + -export-dynamic $(libosmcomp_version_script) +libosmcomp_la_DEPENDENCIES = $(srcdir)/libosmcomp.map + +libosmcompincludedir = $(includedir)/infiniband/complib + +libosmcompinclude_HEADERS = $(srcdir)/../include/complib/cl_atomic.h \ + $(srcdir)/../include/complib/cl_atomic_osd.h \ + $(srcdir)/../include/complib/cl_byteswap.h \ + $(srcdir)/../include/complib/cl_byteswap_osd.h \ + $(srcdir)/../include/complib/cl_comppool.h \ + $(srcdir)/../include/complib/cl_debug.h \ + $(srcdir)/../include/complib/cl_debug_osd.h \ + $(srcdir)/../include/complib/cl_dispatcher.h \ + $(srcdir)/../include/complib/cl_event.h \ + $(srcdir)/../include/complib/cl_event_wheel.h \ + $(srcdir)/../include/complib/cl_event_osd.h \ + $(srcdir)/../include/complib/cl_fleximap.h \ + $(srcdir)/../include/complib/cl_list.h \ + $(srcdir)/../include/complib/cl_log.h \ + $(srcdir)/../include/complib/cl_map.h \ + $(srcdir)/../include/complib/cl_math.h \ + $(srcdir)/../include/complib/cl_nodenamemap.h \ + $(srcdir)/../include/complib/cl_packoff.h \ + $(srcdir)/../include/complib/cl_packon.h \ + $(srcdir)/../include/complib/cl_passivelock.h \ + $(srcdir)/../include/complib/cl_pool.h \ + $(srcdir)/../include/complib/cl_ptr_vector.h \ + $(srcdir)/../include/complib/cl_qcomppool.h \ + $(srcdir)/../include/complib/cl_qlist.h \ + $(srcdir)/../include/complib/cl_qmap.h \ + $(srcdir)/../include/complib/cl_qpool.h \ + $(srcdir)/../include/complib/cl_spinlock.h \ + $(srcdir)/../include/complib/cl_spinlock_osd.h \ + $(srcdir)/../include/complib/cl_thread.h \ + $(srcdir)/../include/complib/cl_thread_osd.h \ + $(srcdir)/../include/complib/cl_threadpool.h \ + $(srcdir)/../include/complib/cl_timer.h \ + $(srcdir)/../include/complib/cl_timer_osd.h \ + $(srcdir)/../include/complib/cl_types.h \ + $(srcdir)/../include/complib/cl_types_osd.h \ + $(srcdir)/../include/complib/cl_vector.h \ + $(srcdir)/../include/complib/cl_heap.h + +# headers are distributed as part of the include dir +EXTRA_DIST = $(srcdir)/libosmcomp.map $(srcdir)/libosmcomp.ver diff --git a/complib/cl_complib.c b/complib/cl_complib.c new file mode 100644 index 0000000..cfea01e --- /dev/null +++ b/complib/cl_complib.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +#include +#include +#include + +/* + * Prototypes + */ + +extern cl_status_t __cl_timer_prov_create(void); + +extern void __cl_timer_prov_destroy(void); + +cl_spinlock_t cl_atomic_spinlock; + +void complib_init(void) +{ + cl_status_t status = CL_SUCCESS; + + status = cl_spinlock_init(&cl_atomic_spinlock); + if (status != CL_SUCCESS) + goto _error; + + status = __cl_timer_prov_create(); + if (status != CL_SUCCESS) + goto _error; + return; + +_error: + cl_msg_out("__init: failed to create complib (%s)\n", + CL_STATUS_MSG(status)); + exit(1); +} + +cl_status_t complib_init_v2(void) +{ + cl_status_t status = CL_SUCCESS; + + status = cl_spinlock_init(&cl_atomic_spinlock); + if (status != CL_SUCCESS) + goto _error; + + status = __cl_timer_prov_create(); + if (status != CL_SUCCESS) + goto _error; + return status; + +_error: + cl_msg_out("__init_v2: failed to create complib (%s)\n", + CL_STATUS_MSG(status)); + return status; +} + +void complib_exit(void) +{ + __cl_timer_prov_destroy(); + cl_spinlock_destroy(&cl_atomic_spinlock); +} + +boolean_t cl_is_debug(void) +{ +#if defined( _DEBUG_ ) + return TRUE; +#else + return FALSE; +#endif /* defined( _DEBUG_ ) */ +} diff --git a/complib/cl_dispatcher.c b/complib/cl_dispatcher.c new file mode 100644 index 0000000..5e8c85a --- /dev/null +++ b/complib/cl_dispatcher.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of Dispatcher abstraction. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +/* give some guidance when we build our cl_pool of messages */ +#define CL_DISP_INITIAL_MSG_COUNT 256 +#define CL_DISP_MSG_GROW_SIZE 64 + +/* give some guidance when we build our cl_pool of registration elements */ +#define CL_DISP_INITIAL_REG_COUNT 16 +#define CL_DISP_REG_GROW_SIZE 16 + +/******************************************************************** + __cl_disp_worker + + Description: + This function takes messages off the FIFO and calls Processmsg() + This function executes as passive level. + + Inputs: + p_disp - Pointer to Dispatcher object + + Outputs: + None + + Returns: + None +********************************************************************/ +void __cl_disp_worker(IN void *context) +{ + cl_disp_msg_t *p_msg; + cl_dispatcher_t *p_disp = (cl_dispatcher_t *) context; + + cl_spinlock_acquire(&p_disp->lock); + + /* Process the FIFO until we drain it dry. */ + while (cl_qlist_count(&p_disp->msg_fifo)) { + /* Pop the message at the head from the FIFO. */ + p_msg = + (cl_disp_msg_t *) cl_qlist_remove_head(&p_disp->msg_fifo); + + /* we track the time the last message spent in the queue */ + p_disp->last_msg_queue_time_us = + cl_get_time_stamp() - p_msg->in_time; + + /* + * Release the spinlock while the message is processed. + * The user's callback may reenter the dispatcher + * and cause the lock to be reaquired. + */ + cl_spinlock_release(&p_disp->lock); + p_msg->p_dest_reg->pfn_rcv_callback((void *)p_msg->p_dest_reg-> + context, + (void *)p_msg->p_data); + + cl_atomic_dec(&p_msg->p_dest_reg->ref_cnt); + + /* The client has seen the data. Notify the sender as appropriate. */ + if (p_msg->pfn_xmt_callback) { + p_msg->pfn_xmt_callback((void *)p_msg->context, + (void *)p_msg->p_data); + cl_atomic_dec(&p_msg->p_src_reg->ref_cnt); + } + + /* Grab the lock for the next iteration through the list. */ + cl_spinlock_acquire(&p_disp->lock); + + /* Return this message to the pool. */ + cl_qpool_put(&p_disp->msg_pool, (cl_pool_item_t *) p_msg); + } + + cl_spinlock_release(&p_disp->lock); +} + +void cl_disp_construct(IN cl_dispatcher_t * const p_disp) +{ + CL_ASSERT(p_disp); + + cl_qlist_init(&p_disp->reg_list); + cl_ptr_vector_construct(&p_disp->reg_vec); + cl_qlist_init(&p_disp->msg_fifo); + cl_spinlock_construct(&p_disp->lock); + cl_qpool_construct(&p_disp->msg_pool); +} + +void cl_disp_shutdown(IN cl_dispatcher_t * const p_disp) +{ + CL_ASSERT(p_disp); + + /* Stop the thread pool. */ + cl_thread_pool_destroy(&p_disp->worker_threads); + + /* Process all outstanding callbacks. */ + __cl_disp_worker(p_disp); + + /* Free all registration info. */ + while (!cl_is_qlist_empty(&p_disp->reg_list)) + free(cl_qlist_remove_head(&p_disp->reg_list)); +} + +void cl_disp_destroy(IN cl_dispatcher_t * const p_disp) +{ + CL_ASSERT(p_disp); + + cl_spinlock_destroy(&p_disp->lock); + /* Destroy the message pool */ + cl_qpool_destroy(&p_disp->msg_pool); + /* Destroy the pointer vector of registrants. */ + cl_ptr_vector_destroy(&p_disp->reg_vec); +} + +cl_status_t cl_disp_init(IN cl_dispatcher_t * const p_disp, + IN const uint32_t thread_count, + IN const char *const name) +{ + cl_status_t status; + + CL_ASSERT(p_disp); + + cl_disp_construct(p_disp); + + status = cl_spinlock_init(&p_disp->lock); + if (status != CL_SUCCESS) { + cl_disp_destroy(p_disp); + return (status); + } + + /* Specify no upper limit to the number of messages in the pool */ + status = cl_qpool_init(&p_disp->msg_pool, CL_DISP_INITIAL_MSG_COUNT, + 0, CL_DISP_MSG_GROW_SIZE, sizeof(cl_disp_msg_t), + NULL, NULL, NULL); + if (status != CL_SUCCESS) { + cl_disp_destroy(p_disp); + return (status); + } + + status = cl_ptr_vector_init(&p_disp->reg_vec, CL_DISP_INITIAL_REG_COUNT, + CL_DISP_REG_GROW_SIZE); + if (status != CL_SUCCESS) { + cl_disp_destroy(p_disp); + return (status); + } + + status = cl_thread_pool_init(&p_disp->worker_threads, thread_count, + __cl_disp_worker, p_disp, name); + if (status != CL_SUCCESS) + cl_disp_destroy(p_disp); + + return (status); +} + +cl_disp_reg_handle_t cl_disp_register(IN cl_dispatcher_t * const p_disp, + IN const cl_disp_msgid_t msg_id, + IN cl_pfn_msgrcv_cb_t pfn_callback + OPTIONAL, + IN const void *const context OPTIONAL) +{ + cl_disp_reg_info_t *p_reg; + cl_status_t status; + + CL_ASSERT(p_disp); + + /* Check that the requested registrant ID is available. */ + cl_spinlock_acquire(&p_disp->lock); + if ((msg_id != CL_DISP_MSGID_NONE) && + (msg_id < cl_ptr_vector_get_size(&p_disp->reg_vec)) && + (cl_ptr_vector_get(&p_disp->reg_vec, msg_id))) { + cl_spinlock_release(&p_disp->lock); + return (NULL); + } + + /* Get a registration info from the pool. */ + p_reg = (cl_disp_reg_info_t *) malloc(sizeof(cl_disp_reg_info_t)); + if (!p_reg) { + cl_spinlock_release(&p_disp->lock); + return (NULL); + } else { + memset(p_reg, 0, sizeof(cl_disp_reg_info_t)); + } + + p_reg->p_disp = p_disp; + p_reg->ref_cnt = 0; + p_reg->pfn_rcv_callback = pfn_callback; + p_reg->context = context; + p_reg->msg_id = msg_id; + + /* Insert the registration in the list. */ + cl_qlist_insert_tail(&p_disp->reg_list, (cl_list_item_t *) p_reg); + + /* Set the array entry to the registrant. */ + /* The ptr_vector grow automatically as necessary. */ + if (msg_id != CL_DISP_MSGID_NONE) { + status = cl_ptr_vector_set(&p_disp->reg_vec, msg_id, p_reg); + if (status != CL_SUCCESS) { + free(p_reg); + cl_spinlock_release(&p_disp->lock); + return (NULL); + } + } + + cl_spinlock_release(&p_disp->lock); + + return (p_reg); +} + +void cl_disp_unregister(IN const cl_disp_reg_handle_t handle) +{ + cl_disp_reg_info_t *p_reg; + cl_dispatcher_t *p_disp; + + if (handle == CL_DISP_INVALID_HANDLE) + return; + + p_reg = (cl_disp_reg_info_t *) handle; + p_disp = p_reg->p_disp; + CL_ASSERT(p_disp); + + cl_spinlock_acquire(&p_disp->lock); + /* + * Clear the registrant vector entry. This will cause any further + * post calls to fail. + */ + if (p_reg->msg_id != CL_DISP_MSGID_NONE) { + CL_ASSERT(p_reg->msg_id < + cl_ptr_vector_get_size(&p_disp->reg_vec)); + cl_ptr_vector_set(&p_disp->reg_vec, p_reg->msg_id, NULL); + } + cl_spinlock_release(&p_disp->lock); + + while (p_reg->ref_cnt > 0) + cl_thread_suspend(1); + + cl_spinlock_acquire(&p_disp->lock); + /* Remove the registrant from the list. */ + cl_qlist_remove_item(&p_disp->reg_list, (cl_list_item_t *) p_reg); + free(p_reg); + + cl_spinlock_release(&p_disp->lock); +} + +cl_status_t cl_disp_post(IN const cl_disp_reg_handle_t handle, + IN const cl_disp_msgid_t msg_id, + IN const void *const p_data, + IN cl_pfn_msgdone_cb_t pfn_callback OPTIONAL, + IN const void *const context OPTIONAL) +{ + cl_disp_reg_info_t *p_src_reg = (cl_disp_reg_info_t *) handle; + cl_disp_reg_info_t *p_dest_reg; + cl_dispatcher_t *p_disp; + cl_disp_msg_t *p_msg; + + p_disp = handle->p_disp; + CL_ASSERT(p_disp); + CL_ASSERT(msg_id != CL_DISP_MSGID_NONE); + + cl_spinlock_acquire(&p_disp->lock); + /* Check that the recipient exists. */ + if (cl_ptr_vector_get_size(&p_disp->reg_vec) <= msg_id) { + cl_spinlock_release(&p_disp->lock); + return (CL_NOT_FOUND); + } + + p_dest_reg = cl_ptr_vector_get(&p_disp->reg_vec, msg_id); + if (!p_dest_reg) { + cl_spinlock_release(&p_disp->lock); + return (CL_NOT_FOUND); + } + + /* Get a free message from the pool. */ + p_msg = (cl_disp_msg_t *) cl_qpool_get(&p_disp->msg_pool); + if (!p_msg) { + cl_spinlock_release(&p_disp->lock); + return (CL_INSUFFICIENT_MEMORY); + } + + /* Initialize the message */ + p_msg->p_src_reg = p_src_reg; + p_msg->p_dest_reg = p_dest_reg; + p_msg->p_data = p_data; + p_msg->pfn_xmt_callback = pfn_callback; + p_msg->context = context; + p_msg->in_time = cl_get_time_stamp(); + + /* + * Increment the sender's reference count if they request a completion + * notification. + */ + if (pfn_callback) + cl_atomic_inc(&p_src_reg->ref_cnt); + + /* Increment the recipient's reference count. */ + cl_atomic_inc(&p_dest_reg->ref_cnt); + + /* Queue the message in the FIFO. */ + cl_qlist_insert_tail(&p_disp->msg_fifo, (cl_list_item_t *) p_msg); + cl_spinlock_release(&p_disp->lock); + + /* Signal the thread pool that there is work to be done. */ + cl_thread_pool_signal(&p_disp->worker_threads); + return (CL_SUCCESS); +} + +void cl_disp_get_queue_status(IN const cl_disp_reg_handle_t handle, + OUT uint32_t * p_num_queued_msgs, + OUT uint64_t * p_last_msg_queue_time_ms) +{ + cl_dispatcher_t *p_disp = ((cl_disp_reg_info_t *) handle)->p_disp; + + cl_spinlock_acquire(&p_disp->lock); + + if (p_last_msg_queue_time_ms) + *p_last_msg_queue_time_ms = + p_disp->last_msg_queue_time_us / 1000; + + if (p_num_queued_msgs) + *p_num_queued_msgs = cl_qlist_count(&p_disp->msg_fifo); + + cl_spinlock_release(&p_disp->lock); +} diff --git a/complib/cl_event.c b/complib/cl_event.c new file mode 100644 index 0000000..27b4ae5 --- /dev/null +++ b/complib/cl_event.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +void cl_event_construct(IN cl_event_t * p_event) +{ + CL_ASSERT(p_event); + + p_event->state = CL_UNINITIALIZED; +} + +cl_status_t cl_event_init(IN cl_event_t * const p_event, + IN const boolean_t manual_reset) +{ + CL_ASSERT(p_event); + + cl_event_construct(p_event); + + pthread_cond_init(&p_event->condvar, NULL); + pthread_mutex_init(&p_event->mutex, NULL); + p_event->signaled = FALSE; + p_event->manual_reset = manual_reset; + p_event->state = CL_INITIALIZED; + + return CL_SUCCESS; +} + +void cl_event_destroy(IN cl_event_t * const p_event) +{ + CL_ASSERT(cl_is_state_valid(p_event->state)); + + /* Destroy only if the event was constructed */ + if (p_event->state == CL_INITIALIZED) { + pthread_cond_broadcast(&p_event->condvar); + pthread_cond_destroy(&p_event->condvar); + pthread_mutex_destroy(&p_event->mutex); + } + + p_event->state = CL_UNINITIALIZED; +} + +cl_status_t cl_event_signal(IN cl_event_t * const p_event) +{ + /* Make sure that the event was started */ + CL_ASSERT(p_event->state == CL_INITIALIZED); + + pthread_mutex_lock(&p_event->mutex); + p_event->signaled = TRUE; + /* Wake up one or all depending on whether the event is auto-resetting. */ + if (p_event->manual_reset) + pthread_cond_broadcast(&p_event->condvar); + else + pthread_cond_signal(&p_event->condvar); + + pthread_mutex_unlock(&p_event->mutex); + + return CL_SUCCESS; +} + +cl_status_t cl_event_reset(IN cl_event_t * const p_event) +{ + /* Make sure that the event was started */ + CL_ASSERT(p_event->state == CL_INITIALIZED); + + pthread_mutex_lock(&p_event->mutex); + p_event->signaled = FALSE; + pthread_mutex_unlock(&p_event->mutex); + + return CL_SUCCESS; +} + +cl_status_t cl_event_wait_on(IN cl_event_t * const p_event, + IN const uint32_t wait_us, + IN const boolean_t interruptible) +{ + cl_status_t status; + int wait_ret; + struct timespec timeout; + struct timeval curtime; + + /* Make sure that the event was Started */ + CL_ASSERT(p_event->state == CL_INITIALIZED); + + pthread_mutex_lock(&p_event->mutex); + + /* Return immediately if the event is signalled. */ + if (p_event->signaled) { + if (!p_event->manual_reset) + p_event->signaled = FALSE; + + pthread_mutex_unlock(&p_event->mutex); + return CL_SUCCESS; + } + + /* If just testing the state, return CL_TIMEOUT. */ + if (wait_us == 0) { + pthread_mutex_unlock(&p_event->mutex); + return CL_TIMEOUT; + } + + if (wait_us == EVENT_NO_TIMEOUT) { + /* Wait for condition variable to be signaled or broadcast. */ + if (pthread_cond_wait(&p_event->condvar, &p_event->mutex)) + status = CL_NOT_DONE; + else + status = CL_SUCCESS; + } else { + /* Get the current time */ + if (gettimeofday(&curtime, NULL) == 0) { + unsigned long n_sec = + (curtime.tv_usec + (wait_us % 1000000)) * 1000; + timeout.tv_sec = curtime.tv_sec + (wait_us / 1000000) + + (n_sec / 1000000000); + timeout.tv_nsec = n_sec % 1000000000; + + wait_ret = pthread_cond_timedwait(&p_event->condvar, + &p_event->mutex, + &timeout); + if (wait_ret == 0) + status = + (p_event-> + signaled ? CL_SUCCESS : CL_NOT_DONE); + else if (wait_ret == ETIMEDOUT) + status = CL_TIMEOUT; + else + status = CL_NOT_DONE; + } else + status = CL_ERROR; + } + if (!p_event->manual_reset) + p_event->signaled = FALSE; + + pthread_mutex_unlock(&p_event->mutex); + return status; +} diff --git a/complib/cl_event_wheel.c b/complib/cl_event_wheel.c new file mode 100644 index 0000000..8950410 --- /dev/null +++ b/complib/cl_event_wheel.c @@ -0,0 +1,578 @@ +/* + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#ifdef __CL_EVENT_WHEEL_TEST__ +#include /* sleep() */ +#endif +#include +#include + +#define CL_DBG(fmt, ...) + +static cl_status_t __event_will_age_before(IN const cl_list_item_t * + const p_list_item, IN void *context) +{ + uint64_t aging_time = *((uint64_t *) context); + cl_event_wheel_reg_info_t *p_event; + + p_event = + PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t, list_item); + + if (p_event->aging_time < aging_time) + return CL_SUCCESS; + else + return CL_NOT_FOUND; +} + +static void __cl_event_wheel_callback(IN void *context) +{ + cl_event_wheel_t *p_event_wheel = (cl_event_wheel_t *) context; + cl_list_item_t *p_list_item, *p_prev_event_list_item; + cl_list_item_t *p_list_next_item; + cl_event_wheel_reg_info_t *p_event; + uint64_t current_time; + uint64_t next_aging_time; + uint32_t new_timeout; + cl_status_t cl_status; + + /* might be during closing ... */ + if (p_event_wheel->closing) + return; + + current_time = cl_get_time_stamp(); + + if (NULL != p_event_wheel->p_external_lock) + + /* Take care of the order of acquiring locks to avoid the deadlock! + * The external lock goes first. + */ + cl_spinlock_acquire(p_event_wheel->p_external_lock); + + cl_spinlock_acquire(&p_event_wheel->lock); + + p_list_item = cl_qlist_head(&p_event_wheel->events_wheel); + if (p_list_item == cl_qlist_end(&p_event_wheel->events_wheel)) + /* the list is empty - nothing to do */ + goto Exit; + + /* we found such an item. get the p_event */ + p_event = + PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t, list_item); + + while (p_event->aging_time <= current_time) { + /* this object has aged - invoke it's callback */ + if (p_event->pfn_aged_callback) + next_aging_time = + p_event->pfn_aged_callback(p_event->key, + p_event->num_regs, + p_event->context); + else + next_aging_time = 0; + + /* point to the next object in the wheel */ + p_list_next_item = cl_qlist_next(p_list_item); + + /* We need to retire the event if the next aging time passed */ + if (next_aging_time < current_time) { + /* remove it from the map */ + cl_qmap_remove_item(&p_event_wheel->events_map, + &(p_event->map_item)); + + /* pop p_event from the wheel */ + cl_qlist_remove_head(&p_event_wheel->events_wheel); + + /* delete the event info object - allocated by cl_event_wheel_reg */ + free(p_event); + } else { + /* update the required aging time */ + p_event->aging_time = next_aging_time; + p_event->num_regs++; + + /* do not remove from the map - but remove from the list head and + place in the correct position */ + + /* pop p_event from the wheel */ + cl_qlist_remove_head(&p_event_wheel->events_wheel); + + /* find the event that ages just before */ + p_prev_event_list_item = + cl_qlist_find_from_tail(&p_event_wheel-> + events_wheel, + __event_will_age_before, + &p_event->aging_time); + + /* insert just after */ + cl_qlist_insert_next(&p_event_wheel->events_wheel, + p_prev_event_list_item, + &p_event->list_item); + + /* as we have modified the list - restart from first item: */ + p_list_next_item = + cl_qlist_head(&p_event_wheel->events_wheel); + } + + /* advance to next event */ + p_list_item = p_list_next_item; + if (p_list_item == cl_qlist_end(&p_event_wheel->events_wheel)) + /* the list is empty - nothing to do */ + break; + + /* get the p_event */ + p_event = + PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t, + list_item); + } + + /* We need to restart the timer only if the list is not empty now */ + if (p_list_item != cl_qlist_end(&p_event_wheel->events_wheel)) { + /* get the p_event */ + p_event = + PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t, + list_item); + + /* start the timer to the timeout [msec] */ + new_timeout = + (uint32_t) ((p_event->aging_time - current_time + 500) / 1000); + CL_DBG("__cl_event_wheel_callback: Restart timer in: " + "%u [msec]\n", new_timeout); + cl_status = cl_timer_start(&p_event_wheel->timer, new_timeout); + if (cl_status != CL_SUCCESS) { + CL_DBG("__cl_event_wheel_callback: ERR 6200: " + "Failed to start timer\n"); + } + } + + /* release the lock */ +Exit: + cl_spinlock_release(&p_event_wheel->lock); + if (NULL != p_event_wheel->p_external_lock) + cl_spinlock_release(p_event_wheel->p_external_lock); +} + +/* + * Construct and Initialize + */ +void cl_event_wheel_construct(IN cl_event_wheel_t * const p_event_wheel) +{ + cl_spinlock_construct(&(p_event_wheel->lock)); + cl_timer_construct(&(p_event_wheel->timer)); +} + +cl_status_t cl_event_wheel_init(IN cl_event_wheel_t * const p_event_wheel) +{ + cl_status_t cl_status = CL_SUCCESS; + + /* initialize */ + p_event_wheel->p_external_lock = NULL; + p_event_wheel->closing = FALSE; + cl_status = cl_spinlock_init(&(p_event_wheel->lock)); + if (cl_status != CL_SUCCESS) + return cl_status; + cl_qlist_init(&p_event_wheel->events_wheel); + cl_qmap_init(&p_event_wheel->events_map); + + /* init the timer with timeout */ + cl_status = cl_timer_init(&p_event_wheel->timer, __cl_event_wheel_callback, p_event_wheel); /* cb context */ + + return cl_status; +} + +cl_status_t cl_event_wheel_init_ex(IN cl_event_wheel_t * const p_event_wheel, + IN cl_spinlock_t * p_external_lock) +{ + cl_status_t cl_status; + + cl_status = cl_event_wheel_init(p_event_wheel); + if (CL_SUCCESS != cl_status) + return cl_status; + + p_event_wheel->p_external_lock = p_external_lock; + return cl_status; +} + +void cl_event_wheel_dump(IN cl_event_wheel_t * const p_event_wheel) +{ + cl_list_item_t *p_list_item; + cl_event_wheel_reg_info_t __attribute__((__unused__)) *p_event; + + p_list_item = cl_qlist_head(&p_event_wheel->events_wheel); + + while (p_list_item != cl_qlist_end(&p_event_wheel->events_wheel)) { + p_event = + PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t, + list_item); + CL_DBG("cl_event_wheel_dump: Found event key:<0x%" + PRIx64 ">, num_regs:%d, aging time:%" PRIu64 "\n", + p_event->key, p_event->num_regs, p_event->aging_time); + p_list_item = cl_qlist_next(p_list_item); + } +} + +void cl_event_wheel_destroy(IN cl_event_wheel_t * const p_event_wheel) +{ + cl_list_item_t *p_list_item; + cl_map_item_t *p_map_item; + cl_event_wheel_reg_info_t *p_event; + + /* we need to get a lock */ + cl_spinlock_acquire(&p_event_wheel->lock); + + cl_event_wheel_dump(p_event_wheel); + + /* go over all the items in the list and remove them */ + p_list_item = cl_qlist_remove_head(&p_event_wheel->events_wheel); + while (p_list_item != cl_qlist_end(&p_event_wheel->events_wheel)) { + p_event = + PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t, + list_item); + + CL_DBG("cl_event_wheel_destroy: Found outstanding event" + " key:<0x%" PRIx64 ">\n", p_event->key); + + /* remove it from the map */ + p_map_item = &(p_event->map_item); + cl_qmap_remove_item(&p_event_wheel->events_map, p_map_item); + free(p_event); /* allocated by cl_event_wheel_reg */ + p_list_item = + cl_qlist_remove_head(&p_event_wheel->events_wheel); + } + + /* destroy the timer */ + cl_timer_destroy(&p_event_wheel->timer); + + /* destroy the lock (this should be done without releasing - we don't want + any other run to grab the lock at this point. */ + cl_spinlock_release(&p_event_wheel->lock); + cl_spinlock_destroy(&(p_event_wheel->lock)); +} + +cl_status_t cl_event_wheel_reg(IN cl_event_wheel_t * const p_event_wheel, + IN const uint64_t key, + IN const uint64_t aging_time_usec, + IN cl_pfn_event_aged_cb_t pfn_callback, + IN void *const context) +{ + cl_event_wheel_reg_info_t *p_event; + uint64_t timeout; + uint32_t to; + cl_status_t cl_status = CL_SUCCESS; + cl_list_item_t *prev_event_list_item; + cl_map_item_t *p_map_item; + + /* Get the lock on the manager */ + cl_spinlock_acquire(&(p_event_wheel->lock)); + + cl_event_wheel_dump(p_event_wheel); + + /* Make sure such a key does not exists */ + p_map_item = cl_qmap_get(&p_event_wheel->events_map, key); + if (p_map_item != cl_qmap_end(&p_event_wheel->events_map)) { + CL_DBG("cl_event_wheel_reg: Already existing key:0x%" + PRIx64 "\n", key); + + /* already there - remove it from the list as it is getting a new time */ + p_event = + PARENT_STRUCT(p_map_item, cl_event_wheel_reg_info_t, + map_item); + + /* remove the item from the qlist */ + cl_qlist_remove_item(&p_event_wheel->events_wheel, + &p_event->list_item); + /* and the qmap */ + cl_qmap_remove_item(&p_event_wheel->events_map, + &p_event->map_item); + } else { + /* make a new one */ + p_event = (cl_event_wheel_reg_info_t *) + malloc(sizeof(cl_event_wheel_reg_info_t)); + if (!p_event) { + cl_status = CL_ERROR; + goto Exit; + } + p_event->num_regs = 0; + } + + p_event->key = key; + p_event->aging_time = aging_time_usec; + p_event->pfn_aged_callback = pfn_callback; + p_event->context = context; + p_event->num_regs++; + + CL_DBG("cl_event_wheel_reg: Registering event key:0x%" PRIx64 + " aging in %u [msec]\n", p_event->key, + (uint32_t) ((p_event->aging_time - cl_get_time_stamp()) / 1000)); + + /* If the list is empty - need to start the timer */ + if (cl_is_qlist_empty(&p_event_wheel->events_wheel)) { + /* Edward Bortnikov 03/29/2003 + * ++TBD Consider moving the timer manipulation behind the list manipulation. + */ + + /* calculate the new timeout */ + timeout = + (p_event->aging_time - cl_get_time_stamp() + 500) / 1000; + + /* stop the timer if it is running */ + + /* Edward Bortnikov 03/29/2003 + * Don't call cl_timer_stop() because it spins forever. + * cl_timer_start() will invoke cl_timer_stop() by itself. + * + * The problematic scenario is when __cl_event_wheel_callback() + * is in race condition with this code. It sets timer.in_timer_cb + * to TRUE and then blocks on p_event_wheel->lock. Following this, + * the call to cl_timer_stop() hangs. Following this, the whole system + * enters into a deadlock. + * + * cl_timer_stop(&p_event_wheel->timer); + */ + + /* The timeout for the cl_timer_start should be given as uint32_t. + if there is an overflow - warn about it. */ + to = (uint32_t) timeout; + if (timeout > (uint32_t) timeout) { + to = 0xffffffff; /* max 32 bit timer */ + CL_DBG("cl_event_wheel_reg: timeout requested is " + "too large. Using timeout: %u\n", to); + } + + /* start the timer to the timeout [msec] */ + cl_status = cl_timer_start(&p_event_wheel->timer, to); + if (cl_status != CL_SUCCESS) { + CL_DBG("cl_event_wheel_reg : ERR 6203: " + "Failed to start timer\n"); + goto Exit; + } + } + + /* insert the object to the qlist and the qmap */ + + /* BUT WE MUST INSERT IT IN A SORTED MANNER */ + prev_event_list_item = + cl_qlist_find_from_tail(&p_event_wheel->events_wheel, + __event_will_age_before, + &p_event->aging_time); + + cl_qlist_insert_next(&p_event_wheel->events_wheel, + prev_event_list_item, &p_event->list_item); + + cl_qmap_insert(&p_event_wheel->events_map, key, &(p_event->map_item)); + +Exit: + cl_spinlock_release(&p_event_wheel->lock); + + return cl_status; +} + +void cl_event_wheel_unreg(IN cl_event_wheel_t * const p_event_wheel, + IN uint64_t key) +{ + cl_event_wheel_reg_info_t *p_event; + cl_map_item_t *p_map_item; + + CL_DBG("cl_event_wheel_unreg: " "Removing key:0x%" PRIx64 "\n", key); + + cl_spinlock_acquire(&p_event_wheel->lock); + p_map_item = cl_qmap_get(&p_event_wheel->events_map, key); + if (p_map_item != cl_qmap_end(&p_event_wheel->events_map)) { + /* we found such an item. */ + p_event = + PARENT_STRUCT(p_map_item, cl_event_wheel_reg_info_t, + map_item); + + /* remove the item from the qlist */ + cl_qlist_remove_item(&p_event_wheel->events_wheel, + &(p_event->list_item)); + /* remove the item from the qmap */ + cl_qmap_remove_item(&p_event_wheel->events_map, + &(p_event->map_item)); + + CL_DBG("cl_event_wheel_unreg: Removed key:0x%" PRIx64 "\n", + key); + + /* free the item */ + free(p_event); + } else { + CL_DBG("cl_event_wheel_unreg: did not find key:0x%" PRIx64 + "\n", key); + } + + cl_spinlock_release(&p_event_wheel->lock); +} + +uint32_t cl_event_wheel_num_regs(IN cl_event_wheel_t * const p_event_wheel, + IN uint64_t key) +{ + + cl_event_wheel_reg_info_t *p_event; + cl_map_item_t *p_map_item; + uint32_t num_regs = 0; + + /* try to find the key in the map */ + CL_DBG("cl_event_wheel_num_regs: Looking for key:0x%" PRIx64 "\n", key); + + cl_spinlock_acquire(&p_event_wheel->lock); + p_map_item = cl_qmap_get(&p_event_wheel->events_map, key); + if (p_map_item != cl_qmap_end(&p_event_wheel->events_map)) { + /* ok so we can simply return it's num_regs */ + p_event = + PARENT_STRUCT(p_map_item, cl_event_wheel_reg_info_t, + map_item); + num_regs = p_event->num_regs; + } + + cl_spinlock_release(&p_event_wheel->lock); + return (num_regs); +} + +#ifdef __CL_EVENT_WHEEL_TEST__ + +/* Dump out the complete state of the event wheel */ +void __cl_event_wheel_dump(IN cl_event_wheel_t * const p_event_wheel) +{ + cl_list_item_t *p_list_item; + cl_map_item_t *p_map_item; + cl_event_wheel_reg_info_t *p_event; + + printf("************** Event Wheel Dump ***********************\n"); + printf("Event Wheel List has %u items:\n", + cl_qlist_count(&p_event_wheel->events_wheel)); + + p_list_item = cl_qlist_head(&p_event_wheel->events_wheel); + while (p_list_item != cl_qlist_end(&p_event_wheel->events_wheel)) { + p_event = + PARENT_STRUCT(p_list_item, cl_event_wheel_reg_info_t, + list_item); + printf("Event key:0x%" PRIx64 " Context:%s NumRegs:%u\n", + p_event->key, (char *)p_event->context, + p_event->num_regs); + + /* next */ + p_list_item = cl_qlist_next(p_list_item); + } + + printf("Event Map has %u items:\n", + cl_qmap_count(&p_event_wheel->events_map)); + + p_map_item = cl_qmap_head(&p_event_wheel->events_map); + while (p_map_item != cl_qmap_end(&p_event_wheel->events_map)) { + p_event = + PARENT_STRUCT(p_map_item, cl_event_wheel_reg_info_t, + map_item); + printf("Event key:0x%" PRIx64 " Context:%s NumRegs:%u\n", + p_event->key, (char *)p_event->context, + p_event->num_regs); + + /* next */ + p_map_item = cl_qmap_next(p_map_item); + } + +} + +/* The callback for aging event */ +/* We assume we pass a text context */ +static uint64_t __test_event_aging(uint64_t key, uint32_t num_regs, void *context) +{ + printf("*****************************************************\n"); + printf("Aged key: 0x%" PRIx64 " Context:%s\n", key, (char *) context); +} + +int main() +{ + cl_event_wheel_t event_wheel; + + /* init complib */ + if (complib_init_v2() != CL_SUCCESS) { + printf("complib_init_v2 failed\n"); + exit(1); + } + /* construct */ + cl_event_wheel_construct(&event_wheel); + + /* init */ + cl_event_wheel_init(&event_wheel); + + /* Start Playing */ + cl_event_wheel_reg(&event_wheel, 1, /* key */ + cl_get_time_stamp() + 3000000, /* 3 sec lifetime */ + __test_event_aging, /* cb */ + "The First Aging Event"); + + cl_event_wheel_reg(&event_wheel, 2, /* key */ + cl_get_time_stamp() + 3000000, /* 3 sec lifetime */ + __test_event_aging, /* cb */ + "The Second Aging Event"); + + cl_event_wheel_reg(&event_wheel, 3, /* key */ + cl_get_time_stamp() + 3500000, /* 3.5 sec lifetime */ + __test_event_aging, /* cb */ + "The Third Aging Event"); + + __cl_event_wheel_dump(&event_wheel); + + sleep(2); + cl_event_wheel_reg(&event_wheel, 2, /* key */ + cl_get_time_stamp() + 8000000, /* 8 sec lifetime */ + __test_event_aging, /* cb */ + "The Second Aging Event Moved"); + + __cl_event_wheel_dump(&event_wheel); + + sleep(1); + /* remove the third event */ + cl_event_wheel_unreg(&event_wheel, 3); /* key */ + + /* get the number of registrations for the keys */ + printf("Event 1 Registered: %u\n", + cl_event_wheel_num_regs(&event_wheel, 1)); + printf("Event 2 Registered: %u\n", + cl_event_wheel_num_regs(&event_wheel, 2)); + + sleep(5); + /* destroy */ + cl_event_wheel_destroy(&event_wheel); + + complib_exit(); + + return (0); +} + +#endif /* __CL_EVENT_WHEEL_TEST__ */ diff --git a/complib/cl_heap.c b/complib/cl_heap.c new file mode 100644 index 0000000..b7715f9 --- /dev/null +++ b/complib/cl_heap.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2009-2015 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * This file contains a d-ary heap implementation. + * The default is a minimum heap, however the caller can overwrite + * the compare function for the keys of the heap. + * + */ + +#if HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +typedef struct _cl_heap_elem { + uint64_t key; + void *context; +} cl_heap_elem_t; + +static int compare_keys(IN const void *p_key_1, IN const void *p_key_2) +{ + uint64_t key1, key2; + + CL_ASSERT(p_key_1); + CL_ASSERT(p_key_2); + + key1 = *((uint64_t *) p_key_1); + key2 = *((uint64_t *) p_key_2); + + return ((key1 < key2) ? -1 : ((key1 > key2) ? 1 : 0)); +} + +void cl_heap_construct(IN cl_heap_t * const p_heap) +{ + CL_ASSERT(p_heap); + + memset(p_heap, 0, sizeof(cl_heap_t)); + + p_heap->state = CL_UNINITIALIZED; +} + +cl_status_t cl_heap_init(IN cl_heap_t * const p_heap, IN const size_t max_size, + IN const uint8_t d, + IN cl_pfn_heap_apply_index_update_t pfn_index_update, + IN cl_pfn_heap_compare_keys_t pfn_compare OPTIONAL) +{ + CL_ASSERT(p_heap); + + if (!cl_is_state_valid(p_heap->state)) + cl_heap_construct(p_heap); + + if (max_size <= 0 || !d || !pfn_index_update) + return (CL_INVALID_PARAMETER); + + if (cl_is_heap_inited(p_heap)) + cl_heap_destroy(p_heap); + + p_heap->branching_factor = d; + p_heap->size = 0; + p_heap->capacity = max_size; + p_heap->pfn_index_update = pfn_index_update; + + if (pfn_compare) + p_heap->pfn_compare = pfn_compare; + else + p_heap->pfn_compare = &compare_keys; + + p_heap->element_array = + (cl_heap_elem_t *) malloc(max_size * sizeof(cl_heap_elem_t)); + if (!p_heap->element_array) + return (CL_INSUFFICIENT_MEMORY); + memset(p_heap->element_array, 0, max_size * sizeof(cl_heap_elem_t)); + + p_heap->state = CL_INITIALIZED; + + return (CL_SUCCESS); +} + +void cl_heap_destroy(IN cl_heap_t * const p_heap) +{ + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_state_valid(p_heap->state)); + + if (p_heap->element_array) + free(p_heap->element_array); + + cl_heap_construct(p_heap); +} + +cl_status_t cl_heap_resize(IN cl_heap_t * const p_heap, + IN const size_t new_size) +{ + cl_heap_elem_t *realloc_element_array = NULL; + + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_heap_inited(p_heap)); + + if (new_size <= 0 || new_size < p_heap->size) + return (CL_INVALID_PARAMETER); + + if (new_size == p_heap->capacity) + return (CL_SUCCESS); + + realloc_element_array = + (cl_heap_elem_t *) realloc(p_heap->element_array, + new_size * sizeof(cl_heap_elem_t)); + if (!realloc_element_array) + return (CL_INSUFFICIENT_MEMORY); + + p_heap->element_array = realloc_element_array; + memset(p_heap->element_array + p_heap->size, 0, + (new_size - p_heap->size) * sizeof(cl_heap_elem_t)); + + p_heap->capacity = new_size; + + return (CL_SUCCESS); +} + +static void heap_down(IN cl_heap_t * const p_heap, IN const size_t index) +{ + int64_t first_child, swap_child, child, parent, d; + cl_heap_elem_t tmp = p_heap->element_array[index]; + boolean_t swapped = FALSE; + + d = (int64_t) p_heap->branching_factor; + parent = index; + + while (parent * d + 1 < p_heap->size) { + swap_child = first_child = parent * d + 1; + /* find the min (or max) child among the children */ + for (child = first_child + 1; + child < first_child + d && child < p_heap->size; child++) + if (p_heap-> + pfn_compare(&(p_heap->element_array[child].key), + &(p_heap->element_array[swap_child]. + key)) <= 0) + swap_child = child; + + /* exchange parent and one child */ + if (p_heap-> + pfn_compare(&(tmp.key), + &(p_heap->element_array[swap_child].key)) > 0) { + p_heap->element_array[parent] = + p_heap->element_array[swap_child]; + p_heap->pfn_index_update(p_heap->element_array[parent]. + context, parent); + parent = swap_child; + swapped = TRUE; + } else + break; + } + + /* move the original element down in the heap */ + if (swapped) { + p_heap->element_array[parent] = tmp; + p_heap->pfn_index_update(p_heap->element_array[parent].context, + parent); + } +} + +static void heap_up(IN cl_heap_t * const p_heap, IN const size_t index) +{ + int64_t parent, child, swap_child = 0, d; + boolean_t swapped = FALSE; + + if (!index) + return; + + cl_heap_elem_t tmp = p_heap->element_array[index]; + + d = (int64_t) p_heap->branching_factor; + parent = index; + do { + child = parent; + parent = (child - 1) / d; + if (p_heap-> + pfn_compare(&(tmp.key), + &(p_heap->element_array[parent].key)) < 0) { + /* move the parent down and notify the user context about the change */ + p_heap->element_array[child] = + p_heap->element_array[parent]; + p_heap->pfn_index_update(p_heap->element_array[child]. + context, child); + swap_child = parent; + swapped = TRUE; + } else + break; + } while (parent > 0); + + /* write original heap element to the correct position */ + if (swapped) { + p_heap->element_array[swap_child] = tmp; + p_heap->pfn_index_update(p_heap->element_array[swap_child]. + context, swap_child); + } +} + +cl_status_t cl_heap_modify_key(IN cl_heap_t * const p_heap, + IN const uint64_t key, IN const size_t index) +{ + uint64_t old_key; + int compare_result; + + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_heap_inited(p_heap)); + + if (index < 0 || index >= p_heap->size) + return (CL_INVALID_PARAMETER); + + old_key = p_heap->element_array[index].key; + p_heap->element_array[index].key = key; + + compare_result = p_heap->pfn_compare(&key, &old_key); + if (compare_result < 0) + heap_up(p_heap, index); + else if (compare_result > 0) + heap_down(p_heap, index); + + return (CL_SUCCESS); +} + +cl_status_t cl_heap_insert(IN cl_heap_t * const p_heap, IN const uint64_t key, + IN const void *const context) +{ + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_heap_inited(p_heap)); + + if (!context) + return (CL_INVALID_PARAMETER); + + if (p_heap->size == p_heap->capacity) + return (CL_INSUFFICIENT_RESOURCES); + + p_heap->element_array[p_heap->size].key = key; + p_heap->element_array[p_heap->size].context = (void *) context; + p_heap->pfn_index_update(context, p_heap->size); + + heap_up(p_heap, p_heap->size++); + + return (CL_SUCCESS); +} + +void *cl_heap_delete(IN cl_heap_t * const p_heap, IN const size_t index) +{ + int64_t parent, d; + int compare_result; + cl_heap_elem_t tmp; + + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_heap_inited(p_heap)); + + if (!p_heap->size) + return NULL; + if (index < 0 || index >= p_heap->size) + return NULL; + if (p_heap->size == 1) + return p_heap->element_array[--(p_heap->size)].context; + + tmp = p_heap->element_array[--(p_heap->size)]; + + p_heap->element_array[p_heap->size] = p_heap->element_array[index]; + p_heap->pfn_index_update(p_heap->element_array[p_heap->size].context, + p_heap->size); + + p_heap->element_array[index] = tmp; + p_heap->pfn_index_update(p_heap->element_array[index].context, index); + + if (0 == index) + heap_down(p_heap, index); + else { + d = (int64_t) p_heap->branching_factor; + parent = (index - 1) / d; + compare_result = + p_heap->pfn_compare(&(p_heap->element_array[parent].key), + &(p_heap->element_array[index].key)); + + /* if the parent is smaller than tmp (which we moved within + * the head), then we have to attempt a heap_down + */ + if (compare_result < 0) + heap_down(p_heap, index); + /* otherwise heap_up is needed to restore the heap property */ + else if (compare_result > 0) + heap_up(p_heap, index); + } + + return p_heap->element_array[p_heap->size].context; +} + +void *cl_heap_extract_root(IN cl_heap_t * const p_heap) +{ + return cl_heap_delete(p_heap, 0); +} + +boolean_t cl_is_stored_in_heap(IN const cl_heap_t * const p_heap, + IN const void *const ctx, IN const size_t index) +{ + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_heap_inited(p_heap)); + + return ((index < 0 || index >= p_heap->size || + p_heap->element_array[index].context != ctx) ? FALSE : TRUE); +} + +boolean_t cl_verify_heap_property(IN const cl_heap_t * const p_heap) +{ + int64_t first_child, child, parent, d; + + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_heap_inited(p_heap)); + + d = (int64_t) p_heap->branching_factor; + parent = 0; + + while (parent < p_heap->size) { + first_child = parent * d + 1; + /* find the min (or max) child among the children */ + for (child = first_child; + child < first_child + d && child < p_heap->size; child++) + if (p_heap-> + pfn_compare(&(p_heap->element_array[parent].key), + &(p_heap->element_array[child].key)) > + 0) + return FALSE; + parent++; + } + + return TRUE; +} diff --git a/complib/cl_list.c b/complib/cl_list.c new file mode 100644 index 0000000..87ebc71 --- /dev/null +++ b/complib/cl_list.c @@ -0,0 +1,564 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of quick list, and list. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include + +#define FREE_ITEM_GROW_SIZE 10 + +/****************************************************************************** + IMPLEMENTATION OF QUICK LIST +******************************************************************************/ +void cl_qlist_insert_array_head(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_array, + IN uint32_t item_count, + IN const uint32_t item_size) +{ + cl_list_item_t *p_item; + + CL_ASSERT(p_list); + CL_ASSERT(p_list->state == CL_INITIALIZED); + CL_ASSERT(p_array); + CL_ASSERT(item_size >= sizeof(cl_list_item_t)); + CL_ASSERT(item_count); + + /* + * To add items from the array to the list in the same order as + * the elements appear in the array, we add them starting with + * the last one first. Locate the last item. + */ + p_item = (cl_list_item_t *) ((uint8_t *) p_array + + (item_size * (item_count - 1))); + + /* Continue to add all items to the list. */ + while (item_count--) { + cl_qlist_insert_head(p_list, p_item); + + /* Get the next object to add to the list. */ + p_item = (cl_list_item_t *) ((uint8_t *) p_item - item_size); + } +} + +void cl_qlist_insert_array_tail(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_array, + IN uint32_t item_count, + IN const uint32_t item_size) +{ + cl_list_item_t *p_item; + + CL_ASSERT(p_list); + CL_ASSERT(p_list->state == CL_INITIALIZED); + CL_ASSERT(p_array); + CL_ASSERT(item_size >= sizeof(cl_list_item_t)); + CL_ASSERT(item_count); + + /* Set the first item to add to the list. */ + p_item = p_array; + + /* Continue to add all items to the list. */ + while (item_count--) { + cl_qlist_insert_tail(p_list, p_item); + + /* Get the next object to add to the list. */ + p_item = (cl_list_item_t *) ((uint8_t *) p_item + item_size); + } +} + +void cl_qlist_insert_list_head(IN cl_qlist_t * const p_dest_list, + IN cl_qlist_t * const p_src_list) +{ +#if defined( _DEBUG_ ) + cl_list_item_t *p_item; +#endif + + CL_ASSERT(p_dest_list); + CL_ASSERT(p_src_list); + CL_ASSERT(p_dest_list->state == CL_INITIALIZED); + CL_ASSERT(p_src_list->state == CL_INITIALIZED); + + /* + * Is the src list empty? + * We must have this check here for code below to work. + */ + if (cl_is_qlist_empty(p_src_list)) + return; + +#if defined( _DEBUG_ ) + /* Check that all items in the source list belong there. */ + p_item = cl_qlist_head(p_src_list); + while (p_item != cl_qlist_end(p_src_list)) { + /* All list items in the source list must point to it. */ + CL_ASSERT(p_item->p_list == p_src_list); + /* Point them all to the destination list. */ + p_item->p_list = p_dest_list; + p_item = cl_qlist_next(p_item); + } +#endif + + /* Chain the destination list to the tail of the source list. */ + cl_qlist_tail(p_src_list)->p_next = cl_qlist_head(p_dest_list); + cl_qlist_head(p_dest_list)->p_prev = cl_qlist_tail(p_src_list); + + /* + * Update the head of the destination list to the head of + * the source list. + */ + p_dest_list->end.p_next = cl_qlist_head(p_src_list); + cl_qlist_head(p_src_list)->p_prev = &p_dest_list->end; + + /* + * Update the count of the destination to reflect the source items having + * been added. + */ + p_dest_list->count += p_src_list->count; + + /* Update source list to reflect being empty. */ + __cl_qlist_reset(p_src_list); +} + +void cl_qlist_insert_list_tail(IN cl_qlist_t * const p_dest_list, + IN cl_qlist_t * const p_src_list) +{ +#if defined( _DEBUG_ ) + cl_list_item_t *p_item; +#endif + + CL_ASSERT(p_dest_list); + CL_ASSERT(p_src_list); + CL_ASSERT(p_dest_list->state == CL_INITIALIZED); + CL_ASSERT(p_src_list->state == CL_INITIALIZED); + + /* + * Is the src list empty? + * We must have this check here for code below to work. + */ + if (cl_is_qlist_empty(p_src_list)) + return; + +#if defined( _DEBUG_ ) + /* Check that all items in the source list belong there. */ + p_item = cl_qlist_head(p_src_list); + while (p_item != cl_qlist_end(p_src_list)) { + /* All list items in the source list must point to it. */ + CL_ASSERT(p_item->p_list == p_src_list); + /* Point them all to the destination list. */ + p_item->p_list = p_dest_list; + p_item = cl_qlist_next(p_item); + } +#endif + + /* Chain the source list to the tail of the destination list. */ + cl_qlist_tail(p_dest_list)->p_next = cl_qlist_head(p_src_list); + cl_qlist_head(p_src_list)->p_prev = cl_qlist_tail(p_dest_list); + + /* + * Update the tail of the destination list to the tail of + * the source list. + */ + p_dest_list->end.p_prev = cl_qlist_tail(p_src_list); + cl_qlist_tail(p_src_list)->p_next = &p_dest_list->end; + + /* + * Update the count of the destination to reflect the source items having + * been added. + */ + p_dest_list->count += p_src_list->count; + + /* Update source list to reflect being empty. */ + __cl_qlist_reset(p_src_list); +} + +boolean_t cl_is_item_in_qlist(IN const cl_qlist_t * const p_list, + IN const cl_list_item_t * const p_list_item) +{ + const cl_list_item_t *p_temp; + + CL_ASSERT(p_list); + CL_ASSERT(p_list_item); + CL_ASSERT(p_list->state == CL_INITIALIZED); + + /* Traverse looking for a match */ + p_temp = cl_qlist_head(p_list); + while (p_temp != cl_qlist_end(p_list)) { + if (p_temp == p_list_item) { + CL_ASSERT(p_list_item->p_list == p_list); + return (TRUE); + } + + p_temp = cl_qlist_next(p_temp); + } + + return (FALSE); +} + +cl_list_item_t *cl_qlist_find_next(IN const cl_qlist_t * const p_list, + IN const cl_list_item_t * const p_list_item, + IN cl_pfn_qlist_find_t pfn_func, + IN const void *const context) +{ + cl_list_item_t *p_found_item; + + CL_ASSERT(p_list); + CL_ASSERT(p_list->state == CL_INITIALIZED); + CL_ASSERT(p_list_item); + CL_ASSERT(p_list_item->p_list == p_list); + CL_ASSERT(pfn_func); + + p_found_item = cl_qlist_next(p_list_item); + + /* The user provided a compare function */ + while (p_found_item != cl_qlist_end(p_list)) { + CL_ASSERT(p_found_item->p_list == p_list); + + if (pfn_func(p_found_item, (void *)context) == CL_SUCCESS) + break; + + p_found_item = cl_qlist_next(p_found_item); + } + + /* No match */ + return (p_found_item); +} + +cl_list_item_t *cl_qlist_find_prev(IN const cl_qlist_t * const p_list, + IN const cl_list_item_t * const p_list_item, + IN cl_pfn_qlist_find_t pfn_func, + IN const void *const context) +{ + cl_list_item_t *p_found_item; + + CL_ASSERT(p_list); + CL_ASSERT(p_list->state == CL_INITIALIZED); + CL_ASSERT(p_list_item); + CL_ASSERT(p_list_item->p_list == p_list); + CL_ASSERT(pfn_func); + + p_found_item = cl_qlist_prev(p_list_item); + + /* The user provided a compare function */ + while (p_found_item != cl_qlist_end(p_list)) { + CL_ASSERT(p_found_item->p_list == p_list); + + if (pfn_func(p_found_item, (void *)context) == CL_SUCCESS) + break; + + p_found_item = cl_qlist_prev(p_found_item); + } + + /* No match */ + return (p_found_item); +} + +void cl_qlist_apply_func(IN const cl_qlist_t * const p_list, + IN cl_pfn_qlist_apply_t pfn_func, + IN const void *const context) +{ + cl_list_item_t *p_list_item; + + /* Note that context can have any arbitrary value. */ + CL_ASSERT(p_list); + CL_ASSERT(p_list->state == CL_INITIALIZED); + CL_ASSERT(pfn_func); + + p_list_item = cl_qlist_head(p_list); + while (p_list_item != cl_qlist_end(p_list)) { + pfn_func(p_list_item, (void *)context); + p_list_item = cl_qlist_next(p_list_item); + } +} + +void cl_qlist_move_items(IN cl_qlist_t * const p_src_list, + IN cl_qlist_t * const p_dest_list, + IN cl_pfn_qlist_find_t pfn_func, + IN const void *const context) +{ + cl_list_item_t *p_current_item, *p_next; + + CL_ASSERT(p_src_list); + CL_ASSERT(p_dest_list); + CL_ASSERT(p_src_list->state == CL_INITIALIZED); + CL_ASSERT(p_dest_list->state == CL_INITIALIZED); + CL_ASSERT(pfn_func); + + p_current_item = cl_qlist_head(p_src_list); + + while (p_current_item != cl_qlist_end(p_src_list)) { + /* Before we do anything, get a pointer to the next item. */ + p_next = cl_qlist_next(p_current_item); + + if (pfn_func(p_current_item, (void *)context) == CL_SUCCESS) { + /* Move the item from one list to the other. */ + cl_qlist_remove_item(p_src_list, p_current_item); + cl_qlist_insert_tail(p_dest_list, p_current_item); + } + p_current_item = p_next; + } +} + +/****************************************************************************** + IMPLEMENTATION OF LIST +******************************************************************************/ +void cl_list_construct(IN cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + + cl_qpool_construct(&p_list->list_item_pool); +} + +cl_status_t cl_list_init(IN cl_list_t * const p_list, IN const size_t min_items) +{ + uint32_t grow_size; + + CL_ASSERT(p_list); + cl_qlist_init(&p_list->list); + + /* + * We will grow by min_items/8 items at a time, with a minimum of + * FREE_ITEM_GROW_SIZE. + */ + grow_size = (uint32_t) min_items >> 3; + if (grow_size < FREE_ITEM_GROW_SIZE) + grow_size = FREE_ITEM_GROW_SIZE; + + /* Initialize the pool of list items. */ + return (cl_qpool_init(&p_list->list_item_pool, min_items, 0, grow_size, + sizeof(cl_pool_obj_t), NULL, NULL, NULL)); +} + +void cl_list_destroy(IN cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + + cl_qpool_destroy(&p_list->list_item_pool); +} + +static cl_status_t cl_list_find_cb(IN const cl_list_item_t * const p_list_item, + IN void *const context) +{ + CL_ASSERT(p_list_item); + + if (cl_list_obj(p_list_item) == context) + return (CL_SUCCESS); + + return (CL_NOT_FOUND); +} + +cl_status_t cl_list_remove_object(IN cl_list_t * const p_list, + IN const void *const p_object) +{ + cl_list_item_t *p_list_item; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* find the item in question */ + p_list_item = + cl_qlist_find_from_head(&p_list->list, cl_list_find_cb, p_object); + if (p_list_item != cl_qlist_end(&p_list->list)) { + /* remove this item */ + cl_qlist_remove_item(&p_list->list, p_list_item); + cl_qpool_put(&p_list->list_item_pool, + (cl_pool_item_t *) p_list_item); + return (CL_SUCCESS); + } + return (CL_NOT_FOUND); +} + +boolean_t cl_is_object_in_list(IN const cl_list_t * const p_list, + IN const void *const p_object) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + return (cl_qlist_find_from_head + (&p_list->list, cl_list_find_cb, p_object) + != cl_qlist_end(&p_list->list)); +} + +cl_status_t cl_list_insert_array_head(IN cl_list_t * const p_list, + IN const void *const p_array, + IN uint32_t item_count, + IN const uint32_t item_size) +{ + cl_status_t status; + void *p_object; + uint32_t items_remain = item_count; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + CL_ASSERT(p_array); + CL_ASSERT(item_size); + CL_ASSERT(item_count); + + /* + * To add items from the array to the list in the same order as + * the elements appear in the array, we add them starting with + * the last one first. Locate the last item. + */ + p_object = ((uint8_t *) p_array + (item_size * (item_count - 1))); + + /* Continue to add all items to the list. */ + while (items_remain--) { + status = cl_list_insert_head(p_list, p_object); + if (status != CL_SUCCESS) { + /* Remove all items that have been inserted. */ + while (items_remain++ < (item_count - 1)) + cl_list_remove_head(p_list); + return (status); + } + + /* Get the next object to add to the list. */ + p_object = ((uint8_t *) p_object - item_size); + } + + return (CL_SUCCESS); +} + +cl_status_t cl_list_insert_array_tail(IN cl_list_t * const p_list, + IN const void *const p_array, + IN uint32_t item_count, + IN const uint32_t item_size) +{ + cl_status_t status; + void *p_object; + uint32_t items_remain = item_count; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + CL_ASSERT(p_array); + CL_ASSERT(item_size); + CL_ASSERT(item_count); + + /* Set the first item to add to the list. */ + p_object = (void *)p_array; + + /* Continue to add all items to the list. */ + while (items_remain--) { + status = cl_list_insert_tail(p_list, p_object); + if (status != CL_SUCCESS) { + /* Remove all items that have been inserted. */ + while (items_remain++ < (item_count - 1)) + cl_list_remove_tail(p_list); + return (status); + } + + /* Get the next object to add to the list. */ + p_object = ((uint8_t *) p_object + item_size); + } + + return (CL_SUCCESS); +} + +cl_list_iterator_t cl_list_find_from_head(IN const cl_list_t * const p_list, + IN cl_pfn_list_find_t pfn_func, + IN const void *const context) +{ + cl_status_t status; + cl_list_iterator_t itor; + + /* Note that context can have any arbitrary value. */ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + CL_ASSERT(pfn_func); + + itor = cl_list_head(p_list); + + while (itor != cl_list_end(p_list)) { + status = pfn_func(cl_list_obj(itor), (void *)context); + if (status == CL_SUCCESS) + break; + + itor = cl_list_next(itor); + } + + /* no match */ + return (itor); +} + +cl_list_iterator_t cl_list_find_from_tail(IN const cl_list_t * const p_list, + IN cl_pfn_list_find_t pfn_func, + IN const void *const context) +{ + cl_status_t status; + cl_list_iterator_t itor; + + /* Note that context can have any arbitrary value. */ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + CL_ASSERT(pfn_func); + + itor = cl_list_tail(p_list); + + while (itor != cl_list_end(p_list)) { + status = pfn_func(cl_list_obj(itor), (void *)context); + if (status == CL_SUCCESS) + break; + + itor = cl_list_prev(itor); + } + + /* no match */ + return (itor); +} + +void cl_list_apply_func(IN const cl_list_t * const p_list, + IN cl_pfn_list_apply_t pfn_func, + IN const void *const context) +{ + cl_list_iterator_t itor; + + /* Note that context can have any arbitrary value. */ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + CL_ASSERT(pfn_func); + + itor = cl_list_head(p_list); + + while (itor != cl_list_end(p_list)) { + pfn_func(cl_list_obj(itor), (void *)context); + + itor = cl_list_next(itor); + } +} diff --git a/complib/cl_log.c b/complib/cl_log.c new file mode 100644 index 0000000..c63cf74 --- /dev/null +++ b/complib/cl_log.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifdef __WIN__ +#pragma warning(disable : 4996) +#endif + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +/* Maximum number of bytes that can be logged. */ +#define CL_MAX_LOG_DATA (256) + +/* + * Size of the character buffer to allow logging the above + * number of bytes. A space is added after every DWORD, and + * a new line is added after 8 DWORDS (for a line length less than 80). + */ +#define CL_LOG_DATA_SIZE (CL_MAX_LOG_DATA + (CL_MAX_LOG_DATA/4)) + +void cl_log_event(IN const char *const name, IN const cl_log_type_t type, + IN const char *const message, + IN const void *const p_data OPTIONAL, + IN const uint32_t data_len) +{ + int priority, i; + char data[CL_LOG_DATA_SIZE]; + char *p_buf; + uint8_t *p_int_data = (uint8_t *) p_data; + + CL_ASSERT(name); + CL_ASSERT(message); + + openlog(name, LOG_NDELAY | LOG_PID, LOG_USER); + switch (type) { + case CL_LOG_ERROR: + priority = LOG_ERR; + break; + + case CL_LOG_WARN: + priority = LOG_WARNING; + break; + + case CL_LOG_INFO: + default: + priority = LOG_INFO; + break; + } + + if (p_data) { + CL_ASSERT(data_len); + if (data_len < CL_MAX_LOG_DATA) { + p_buf = data; + /* Format the data into ASCII. */ + for (i = 0; i < data_len; i++) { + sprintf(p_buf, "%02x", *p_int_data++); + p_buf += 2; + + /* Add line break after 8 DWORDS. */ + if (i % 32) { + sprintf(p_buf++, "\n"); + continue; + } + + /* Add a space between DWORDS. */ + if (i % 4) + sprintf(p_buf++, " "); + } + syslog(priority, "%s data:\n%s\n", message, p_buf); + } else { + /* The data portion is too large to log. */ + cl_msg_out + ("cl_log() - WARNING: data too large to log.\n"); + syslog(priority, "%s\n", message); + } + } else { + syslog(priority, "%s\n", message); + } + closelog(); +} diff --git a/complib/cl_map.c b/complib/cl_map.c new file mode 100644 index 0000000..81426ee --- /dev/null +++ b/complib/cl_map.c @@ -0,0 +1,1622 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of quick map, a binary tree where the caller always + * provides all necessary storage. + * + */ + +/***************************************************************************** +* +* Map +* +* Map is an associative array. By providing a key, the caller can retrieve +* an object from the map. All objects in the map have an associated key, +* as specified by the caller when the object was inserted into the map. +* In addition to random access, the caller can traverse the map much like +* a linked list, either forwards from the first object or backwards from +* the last object. The objects in the map are always traversed in +* order since the nodes are stored sorted. +* +* This implementation of Map uses a red black tree verified against +* Cormen-Leiserson-Rivest text, McGraw-Hill Edition, fourteenth +* printing, 1994. +* +*****************************************************************************/ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +/****************************************************************************** + IMPLEMENTATION OF QUICK MAP +******************************************************************************/ + +/* + * Get the root. + */ +static inline cl_map_item_t *__cl_map_root(IN const cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + return (p_map->root.p_left); +} + +/* + * Returns whether a given item is on the left of its parent. + */ +static boolean_t __cl_map_is_left_child(IN const cl_map_item_t * const p_item) +{ + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_up); + CL_ASSERT(p_item->p_up != p_item); + + return (p_item->p_up->p_left == p_item); +} + +/* + * Retrieve the pointer to the parent's pointer to an item. + */ +static cl_map_item_t **__cl_map_get_parent_ptr_to_item(IN cl_map_item_t * + const p_item) +{ + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_up); + CL_ASSERT(p_item->p_up != p_item); + + if (__cl_map_is_left_child(p_item)) + return (&p_item->p_up->p_left); + + CL_ASSERT(p_item->p_up->p_right == p_item); + return (&p_item->p_up->p_right); +} + +/* + * Rotate a node to the left. This rotation affects the least number of links + * between nodes and brings the level of C up by one while increasing the depth + * of A one. Note that the links to/from W, X, Y, and Z are not affected. + * + * R R + * | | + * A C + * / \ / \ + * W C A Z + * / \ / \ + * B Z W B + * / \ / \ + * X Y X Y + */ +static void __cl_map_rot_left(IN cl_qmap_t * const p_map, + IN cl_map_item_t * const p_item) +{ + cl_map_item_t **pp_root; + + CL_ASSERT(p_map); + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_right != &p_map->nil); + + pp_root = __cl_map_get_parent_ptr_to_item(p_item); + + /* Point R to C instead of A. */ + *pp_root = p_item->p_right; + /* Set C's parent to R. */ + (*pp_root)->p_up = p_item->p_up; + + /* Set A's right to B */ + p_item->p_right = (*pp_root)->p_left; + /* + * Set B's parent to A. We trap for B being NIL since the + * caller may depend on NIL not changing. + */ + if ((*pp_root)->p_left != &p_map->nil) + (*pp_root)->p_left->p_up = p_item; + + /* Set C's left to A. */ + (*pp_root)->p_left = p_item; + /* Set A's parent to C. */ + p_item->p_up = *pp_root; +} + +/* + * Rotate a node to the right. This rotation affects the least number of links + * between nodes and brings the level of A up by one while increasing the depth + * of C one. Note that the links to/from W, X, Y, and Z are not affected. + * + * R R + * | | + * C A + * / \ / \ + * A Z W C + * / \ / \ + * W B B Z + * / \ / \ + * X Y X Y + */ +static void __cl_map_rot_right(IN cl_qmap_t * const p_map, + IN cl_map_item_t * const p_item) +{ + cl_map_item_t **pp_root; + + CL_ASSERT(p_map); + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_left != &p_map->nil); + + /* Point R to A instead of C. */ + pp_root = __cl_map_get_parent_ptr_to_item(p_item); + (*pp_root) = p_item->p_left; + /* Set A's parent to R. */ + (*pp_root)->p_up = p_item->p_up; + + /* Set C's left to B */ + p_item->p_left = (*pp_root)->p_right; + /* + * Set B's parent to C. We trap for B being NIL since the + * caller may depend on NIL not changing. + */ + if ((*pp_root)->p_right != &p_map->nil) + (*pp_root)->p_right->p_up = p_item; + + /* Set A's right to C. */ + (*pp_root)->p_right = p_item; + /* Set C's parent to A. */ + p_item->p_up = *pp_root; +} + +void cl_qmap_init(IN cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + + memset(p_map, 0, sizeof(cl_qmap_t)); + + /* special setup for the root node */ + p_map->root.p_up = &p_map->root; + p_map->root.p_left = &p_map->nil; + p_map->root.p_right = &p_map->nil; + p_map->root.color = CL_MAP_BLACK; + + /* Setup the node used as terminator for all leaves. */ + p_map->nil.p_up = &p_map->nil; + p_map->nil.p_left = &p_map->nil; + p_map->nil.p_right = &p_map->nil; + p_map->nil.color = CL_MAP_BLACK; + + p_map->state = CL_INITIALIZED; + + cl_qmap_remove_all(p_map); +} + +cl_map_item_t *cl_qmap_get(IN const cl_qmap_t * const p_map, + IN const uint64_t key) +{ + cl_map_item_t *p_item; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + p_item = __cl_map_root(p_map); + + while (p_item != &p_map->nil) { + if (key == p_item->key) + break; /* just right */ + + if (key < p_item->key) + p_item = p_item->p_left; /* too small */ + else + p_item = p_item->p_right; /* too big */ + } + + return (p_item); +} + +cl_map_item_t *cl_qmap_get_next(IN const cl_qmap_t * const p_map, + IN const uint64_t key) +{ + cl_map_item_t *p_item; + cl_map_item_t *p_item_found; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + p_item = __cl_map_root(p_map); + p_item_found = (cl_map_item_t *) & p_map->nil; + + while (p_item != &p_map->nil) { + if (key < p_item->key) { + p_item_found = p_item; + p_item = p_item->p_left; + } else { + p_item = p_item->p_right; + } + } + + return (p_item_found); +} + +void cl_qmap_apply_func(IN const cl_qmap_t * const p_map, + IN cl_pfn_qmap_apply_t pfn_func, + IN const void *const context) +{ + cl_map_item_t *p_map_item; + + /* Note that context can have any arbitrary value. */ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + CL_ASSERT(pfn_func); + + p_map_item = cl_qmap_head(p_map); + while (p_map_item != cl_qmap_end(p_map)) { + pfn_func(p_map_item, (void *)context); + p_map_item = cl_qmap_next(p_map_item); + } +} + +/* + * Balance a tree starting at a given item back to the root. + */ +static void __cl_map_ins_bal(IN cl_qmap_t * const p_map, + IN cl_map_item_t * p_item) +{ + cl_map_item_t *p_grand_uncle; + + CL_ASSERT(p_map); + CL_ASSERT(p_item); + CL_ASSERT(p_item != &p_map->root); + + while (p_item->p_up->color == CL_MAP_RED) { + if (__cl_map_is_left_child(p_item->p_up)) { + p_grand_uncle = p_item->p_up->p_up->p_right; + CL_ASSERT(p_grand_uncle); + if (p_grand_uncle->color == CL_MAP_RED) { + p_grand_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + p_item = p_item->p_up->p_up; + continue; + } + + if (!__cl_map_is_left_child(p_item)) { + p_item = p_item->p_up; + __cl_map_rot_left(p_map, p_item); + } + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + __cl_map_rot_right(p_map, p_item->p_up->p_up); + } else { + p_grand_uncle = p_item->p_up->p_up->p_left; + CL_ASSERT(p_grand_uncle); + if (p_grand_uncle->color == CL_MAP_RED) { + p_grand_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + p_item = p_item->p_up->p_up; + continue; + } + + if (__cl_map_is_left_child(p_item)) { + p_item = p_item->p_up; + __cl_map_rot_right(p_map, p_item); + } + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + __cl_map_rot_left(p_map, p_item->p_up->p_up); + } + } +} + +cl_map_item_t *cl_qmap_insert(IN cl_qmap_t * const p_map, + IN const uint64_t key, + IN cl_map_item_t * const p_item) +{ + cl_map_item_t *p_insert_at, *p_comp_item; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + CL_ASSERT(p_item); + CL_ASSERT(p_map->root.p_up == &p_map->root); + CL_ASSERT(p_map->root.color != CL_MAP_RED); + CL_ASSERT(p_map->nil.color != CL_MAP_RED); + + p_item->p_left = &p_map->nil; + p_item->p_right = &p_map->nil; + p_item->key = key; + p_item->color = CL_MAP_RED; + + /* Find the insertion location. */ + p_insert_at = &p_map->root; + p_comp_item = __cl_map_root(p_map); + + while (p_comp_item != &p_map->nil) { + p_insert_at = p_comp_item; + + if (key == p_insert_at->key) + return (p_insert_at); + + /* Traverse the tree until the correct insertion point is found. */ + if (key < p_insert_at->key) + p_comp_item = p_insert_at->p_left; + else + p_comp_item = p_insert_at->p_right; + } + + CL_ASSERT(p_insert_at != &p_map->nil); + CL_ASSERT(p_comp_item == &p_map->nil); + /* Insert the item. */ + if (p_insert_at == &p_map->root) { + p_insert_at->p_left = p_item; + /* + * Primitive insert places the new item in front of + * the existing item. + */ + __cl_primitive_insert(&p_map->nil.pool_item.list_item, + &p_item->pool_item.list_item); + } else if (key < p_insert_at->key) { + p_insert_at->p_left = p_item; + /* + * Primitive insert places the new item in front of + * the existing item. + */ + __cl_primitive_insert(&p_insert_at->pool_item.list_item, + &p_item->pool_item.list_item); + } else { + p_insert_at->p_right = p_item; + /* + * Primitive insert places the new item in front of + * the existing item. + */ + __cl_primitive_insert(p_insert_at->pool_item.list_item.p_next, + &p_item->pool_item.list_item); + } + /* Increase the count. */ + p_map->count++; + + p_item->p_up = p_insert_at; + + /* + * We have added depth to this section of the tree. + * Rebalance as necessary as we retrace our path through the tree + * and update colors. + */ + __cl_map_ins_bal(p_map, p_item); + + __cl_map_root(p_map)->color = CL_MAP_BLACK; + + /* + * Note that it is not necessary to re-color the nil node black because all + * red color assignments are made via the p_up pointer, and nil is never + * set as the value of a p_up pointer. + */ + +#ifdef _DEBUG_ + /* Set the pointer to the map in the map item for consistency checking. */ + p_item->p_map = p_map; +#endif + + return (p_item); +} + +static void __cl_map_del_bal(IN cl_qmap_t * const p_map, + IN cl_map_item_t * p_item) +{ + cl_map_item_t *p_uncle; + + while ((p_item->color != CL_MAP_RED) && (p_item->p_up != &p_map->root)) { + if (__cl_map_is_left_child(p_item)) { + p_uncle = p_item->p_up->p_right; + + if (p_uncle->color == CL_MAP_RED) { + p_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_RED; + __cl_map_rot_left(p_map, p_item->p_up); + p_uncle = p_item->p_up->p_right; + } + + if (p_uncle->p_right->color != CL_MAP_RED) { + if (p_uncle->p_left->color != CL_MAP_RED) { + p_uncle->color = CL_MAP_RED; + p_item = p_item->p_up; + continue; + } + + p_uncle->p_left->color = CL_MAP_BLACK; + p_uncle->color = CL_MAP_RED; + __cl_map_rot_right(p_map, p_uncle); + p_uncle = p_item->p_up->p_right; + } + p_uncle->color = p_item->p_up->color; + p_item->p_up->color = CL_MAP_BLACK; + p_uncle->p_right->color = CL_MAP_BLACK; + __cl_map_rot_left(p_map, p_item->p_up); + break; + } else { + p_uncle = p_item->p_up->p_left; + + if (p_uncle->color == CL_MAP_RED) { + p_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_RED; + __cl_map_rot_right(p_map, p_item->p_up); + p_uncle = p_item->p_up->p_left; + } + + if (p_uncle->p_left->color != CL_MAP_RED) { + if (p_uncle->p_right->color != CL_MAP_RED) { + p_uncle->color = CL_MAP_RED; + p_item = p_item->p_up; + continue; + } + + p_uncle->p_right->color = CL_MAP_BLACK; + p_uncle->color = CL_MAP_RED; + __cl_map_rot_left(p_map, p_uncle); + p_uncle = p_item->p_up->p_left; + } + p_uncle->color = p_item->p_up->color; + p_item->p_up->color = CL_MAP_BLACK; + p_uncle->p_left->color = CL_MAP_BLACK; + __cl_map_rot_right(p_map, p_item->p_up); + break; + } + } + p_item->color = CL_MAP_BLACK; +} + +void cl_qmap_remove_item(IN cl_qmap_t * const p_map, + IN cl_map_item_t * const p_item) +{ + cl_map_item_t *p_child, *p_del_item; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + CL_ASSERT(p_item); + + if (p_item == cl_qmap_end(p_map)) + return; + + /* must be checked after comparing to cl_qmap_end, since + the end is not a valid item. */ + CL_ASSERT(p_item->p_map == p_map); + + if ((p_item->p_right == &p_map->nil) || (p_item->p_left == &p_map->nil)) { + /* The item being removed has children on at most on side. */ + p_del_item = p_item; + } else { + /* + * The item being removed has children on both side. + * We select the item that will replace it. After removing + * the substitute item and rebalancing, the tree will have the + * correct topology. Exchanging the substitute for the item + * will finalize the removal. + */ + p_del_item = cl_qmap_next(p_item); + CL_ASSERT(p_del_item != &p_map->nil); + } + + /* Remove the item from the list. */ + __cl_primitive_remove(&p_item->pool_item.list_item); + /* Decrement the item count. */ + p_map->count--; + + /* Get the pointer to the new root's child, if any. */ + if (p_del_item->p_left != &p_map->nil) + p_child = p_del_item->p_left; + else + p_child = p_del_item->p_right; + + /* + * This assignment may modify the parent pointer of the nil node. + * This is inconsequential. + */ + p_child->p_up = p_del_item->p_up; + (*__cl_map_get_parent_ptr_to_item(p_del_item)) = p_child; + + if (p_del_item->color != CL_MAP_RED) + __cl_map_del_bal(p_map, p_child); + + /* + * Note that the splicing done below does not need to occur before + * the tree is balanced, since the actual topology changes are made by the + * preceding code. The topology is preserved by the color assignment made + * below (reader should be reminded that p_del_item == p_item in some cases). + */ + if (p_del_item != p_item) { + /* + * Finalize the removal of the specified item by exchanging it with + * the substitute which we removed above. + */ + p_del_item->p_up = p_item->p_up; + p_del_item->p_left = p_item->p_left; + p_del_item->p_right = p_item->p_right; + (*__cl_map_get_parent_ptr_to_item(p_item)) = p_del_item; + p_item->p_right->p_up = p_del_item; + p_item->p_left->p_up = p_del_item; + p_del_item->color = p_item->color; + } + + CL_ASSERT(p_map->nil.color != CL_MAP_RED); + +#ifdef _DEBUG_ + /* Clear the pointer to the map since the item has been removed. */ + p_item->p_map = NULL; +#endif +} + +cl_map_item_t *cl_qmap_remove(IN cl_qmap_t * const p_map, IN const uint64_t key) +{ + cl_map_item_t *p_item; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + /* Seek the node with the specified key */ + p_item = cl_qmap_get(p_map, key); + + cl_qmap_remove_item(p_map, p_item); + + return (p_item); +} + +void cl_qmap_merge(OUT cl_qmap_t * const p_dest_map, + IN OUT cl_qmap_t * const p_src_map) +{ + cl_map_item_t *p_item, *p_item2, *p_next; + + CL_ASSERT(p_dest_map); + CL_ASSERT(p_src_map); + + p_item = cl_qmap_head(p_src_map); + + while (p_item != cl_qmap_end(p_src_map)) { + p_next = cl_qmap_next(p_item); + + /* Remove the item from its current map. */ + cl_qmap_remove_item(p_src_map, p_item); + /* Insert the item into the destination map. */ + p_item2 = + cl_qmap_insert(p_dest_map, cl_qmap_key(p_item), p_item); + /* Check that the item was successfully inserted. */ + if (p_item2 != p_item) { + /* Put the item in back in the source map. */ + p_item2 = + cl_qmap_insert(p_src_map, cl_qmap_key(p_item), + p_item); + CL_ASSERT(p_item2 == p_item); + } + p_item = p_next; + } +} + +static void __cl_qmap_delta_move(IN OUT cl_qmap_t * const p_dest, + IN OUT cl_qmap_t * const p_src, + IN OUT cl_map_item_t ** const pp_item) +{ + cl_map_item_t __attribute__((__unused__)) *p_temp; + cl_map_item_t *p_next; + + /* + * Get the next item so that we can ensure that pp_item points to + * a valid item upon return from the function. + */ + p_next = cl_qmap_next(*pp_item); + /* Move the old item from its current map the the old map. */ + cl_qmap_remove_item(p_src, *pp_item); + p_temp = cl_qmap_insert(p_dest, cl_qmap_key(*pp_item), *pp_item); + /* We should never have duplicates. */ + CL_ASSERT(p_temp == *pp_item); + /* Point pp_item to a valid item in the source map. */ + (*pp_item) = p_next; +} + +void cl_qmap_delta(IN OUT cl_qmap_t * const p_map1, + IN OUT cl_qmap_t * const p_map2, + OUT cl_qmap_t * const p_new, OUT cl_qmap_t * const p_old) +{ + cl_map_item_t *p_item1, *p_item2; + uint64_t key1, key2; + + CL_ASSERT(p_map1); + CL_ASSERT(p_map2); + CL_ASSERT(p_new); + CL_ASSERT(p_old); + CL_ASSERT(cl_is_qmap_empty(p_new)); + CL_ASSERT(cl_is_qmap_empty(p_old)); + + p_item1 = cl_qmap_head(p_map1); + p_item2 = cl_qmap_head(p_map2); + + while (p_item1 != cl_qmap_end(p_map1) && p_item2 != cl_qmap_end(p_map2)) { + key1 = cl_qmap_key(p_item1); + key2 = cl_qmap_key(p_item2); + if (key1 < key2) { + /* We found an old item. */ + __cl_qmap_delta_move(p_old, p_map1, &p_item1); + } else if (key1 > key2) { + /* We found a new item. */ + __cl_qmap_delta_move(p_new, p_map2, &p_item2); + } else { + /* Move both forward since they have the same key. */ + p_item1 = cl_qmap_next(p_item1); + p_item2 = cl_qmap_next(p_item2); + } + } + + /* Process the remainder if the end of either source map was reached. */ + while (p_item2 != cl_qmap_end(p_map2)) + __cl_qmap_delta_move(p_new, p_map2, &p_item2); + + while (p_item1 != cl_qmap_end(p_map1)) + __cl_qmap_delta_move(p_old, p_map1, &p_item1); +} + +/****************************************************************************** + IMPLEMENTATION OF MAP +******************************************************************************/ + +#define MAP_GROW_SIZE 32 + +void cl_map_construct(IN cl_map_t * const p_map) +{ + CL_ASSERT(p_map); + + cl_qpool_construct(&p_map->pool); +} + +cl_status_t cl_map_init(IN cl_map_t * const p_map, IN const uint32_t min_items) +{ + uint32_t grow_size; + + CL_ASSERT(p_map); + + cl_qmap_init(&p_map->qmap); + + /* + * We will grow by min_items/8 items at a time, with a minimum of + * MAP_GROW_SIZE. + */ + grow_size = min_items >> 3; + if (grow_size < MAP_GROW_SIZE) + grow_size = MAP_GROW_SIZE; + + return (cl_qpool_init(&p_map->pool, min_items, 0, grow_size, + sizeof(cl_map_obj_t), NULL, NULL, NULL)); +} + +void cl_map_destroy(IN cl_map_t * const p_map) +{ + CL_ASSERT(p_map); + + cl_qpool_destroy(&p_map->pool); +} + +void *cl_map_insert(IN cl_map_t * const p_map, + IN const uint64_t key, IN const void *const p_object) +{ + cl_map_obj_t *p_map_obj, *p_obj_at_key; + + CL_ASSERT(p_map); + + p_map_obj = (cl_map_obj_t *) cl_qpool_get(&p_map->pool); + + if (!p_map_obj) + return (NULL); + + cl_qmap_set_obj(p_map_obj, p_object); + + p_obj_at_key = + (cl_map_obj_t *) cl_qmap_insert(&p_map->qmap, key, + &p_map_obj->item); + + /* Return the item to the pool if insertion failed. */ + if (p_obj_at_key != p_map_obj) + cl_qpool_put(&p_map->pool, &p_map_obj->item.pool_item); + + return (cl_qmap_obj(p_obj_at_key)); +} + +void *cl_map_get(IN const cl_map_t * const p_map, IN const uint64_t key) +{ + cl_map_item_t *p_item; + + CL_ASSERT(p_map); + + p_item = cl_qmap_get(&p_map->qmap, key); + + if (p_item == cl_qmap_end(&p_map->qmap)) + return (NULL); + + return (cl_qmap_obj(PARENT_STRUCT(p_item, cl_map_obj_t, item))); +} + +void *cl_map_get_next(IN const cl_map_t * const p_map, IN const uint64_t key) +{ + cl_map_item_t *p_item; + + CL_ASSERT(p_map); + + p_item = cl_qmap_get_next(&p_map->qmap, key); + + if (p_item == cl_qmap_end(&p_map->qmap)) + return (NULL); + + return (cl_qmap_obj(PARENT_STRUCT(p_item, cl_map_obj_t, item))); +} + +void cl_map_remove_item(IN cl_map_t * const p_map, + IN const cl_map_iterator_t itor) +{ + CL_ASSERT(itor->p_map == &p_map->qmap); + + if (itor == cl_map_end(p_map)) + return; + + cl_qmap_remove_item(&p_map->qmap, (cl_map_item_t *) itor); + cl_qpool_put(&p_map->pool, &((cl_map_item_t *) itor)->pool_item); +} + +void *cl_map_remove(IN cl_map_t * const p_map, IN const uint64_t key) +{ + cl_map_item_t *p_item; + void *p_obj; + + CL_ASSERT(p_map); + + p_item = cl_qmap_remove(&p_map->qmap, key); + + if (p_item == cl_qmap_end(&p_map->qmap)) + return (NULL); + + p_obj = cl_qmap_obj((cl_map_obj_t *) p_item); + cl_qpool_put(&p_map->pool, &p_item->pool_item); + + return (p_obj); +} + +void cl_map_remove_all(IN cl_map_t * const p_map) +{ + cl_map_item_t *p_item; + + CL_ASSERT(p_map); + + /* Return all map items to the pool. */ + while (!cl_is_qmap_empty(&p_map->qmap)) { + p_item = cl_qmap_head(&p_map->qmap); + cl_qmap_remove_item(&p_map->qmap, p_item); + cl_qpool_put(&p_map->pool, &p_item->pool_item); + + if (!cl_is_qmap_empty(&p_map->qmap)) { + p_item = cl_qmap_tail(&p_map->qmap); + cl_qmap_remove_item(&p_map->qmap, p_item); + cl_qpool_put(&p_map->pool, &p_item->pool_item); + } + } +} + +cl_status_t cl_map_merge(OUT cl_map_t * const p_dest_map, + IN OUT cl_map_t * const p_src_map) +{ + cl_status_t status = CL_SUCCESS; + cl_map_iterator_t itor, next; + uint64_t key; + void *p_obj, *p_obj2; + + CL_ASSERT(p_dest_map); + CL_ASSERT(p_src_map); + + itor = cl_map_head(p_src_map); + while (itor != cl_map_end(p_src_map)) { + next = cl_map_next(itor); + + p_obj = cl_map_obj(itor); + key = cl_map_key(itor); + + cl_map_remove_item(p_src_map, itor); + + /* Insert the object into the destination map. */ + p_obj2 = cl_map_insert(p_dest_map, key, p_obj); + /* Trap for failure. */ + if (p_obj != p_obj2) { + if (!p_obj2) + status = CL_INSUFFICIENT_MEMORY; + /* Put the object back in the source map. This must succeed. */ + p_obj2 = cl_map_insert(p_src_map, key, p_obj); + CL_ASSERT(p_obj == p_obj2); + /* If the failure was due to insufficient memory, return. */ + if (status != CL_SUCCESS) + return (status); + } + itor = next; + } + + return (CL_SUCCESS); +} + +static void __cl_map_revert(IN OUT cl_map_t * const p_map1, + IN OUT cl_map_t * const p_map2, + IN OUT cl_map_t * const p_new, + IN OUT cl_map_t * const p_old) +{ + cl_status_t __attribute__((__unused__)) status; + + /* Restore the initial state. */ + status = cl_map_merge(p_map1, p_old); + CL_ASSERT(status == CL_SUCCESS); + status = cl_map_merge(p_map2, p_new); + CL_ASSERT(status == CL_SUCCESS); +} + +static cl_status_t __cl_map_delta_move(OUT cl_map_t * const p_dest, + IN OUT cl_map_t * const p_src, + IN OUT cl_map_iterator_t * const p_itor) +{ + cl_map_iterator_t next; + void *p_obj, *p_obj2; + uint64_t key; + + /* Get a valid iterator so we can continue the loop. */ + next = cl_map_next(*p_itor); + /* Get the pointer to the object for insertion. */ + p_obj = cl_map_obj(*p_itor); + /* Get the key for the object. */ + key = cl_map_key(*p_itor); + /* Move the object. */ + cl_map_remove_item(p_src, *p_itor); + p_obj2 = cl_map_insert(p_dest, key, p_obj); + /* Check for failure. We should never get a duplicate. */ + if (!p_obj2) { + p_obj2 = cl_map_insert(p_src, key, p_obj); + CL_ASSERT(p_obj2 == p_obj); + return (CL_INSUFFICIENT_MEMORY); + } + + /* We should never get a duplicate */ + CL_ASSERT(p_obj == p_obj2); + /* Update the iterator so that it is valid. */ + (*p_itor) = next; + + return (CL_SUCCESS); +} + +cl_status_t cl_map_delta(IN OUT cl_map_t * const p_map1, + IN OUT cl_map_t * const p_map2, + OUT cl_map_t * const p_new, OUT cl_map_t * const p_old) +{ + cl_map_iterator_t itor1, itor2; + uint64_t key1, key2; + cl_status_t status; + + CL_ASSERT(p_map1); + CL_ASSERT(p_map2); + CL_ASSERT(p_new); + CL_ASSERT(p_old); + CL_ASSERT(cl_is_map_empty(p_new)); + CL_ASSERT(cl_is_map_empty(p_old)); + + itor1 = cl_map_head(p_map1); + itor2 = cl_map_head(p_map2); + + /* + * Note that the check is for the end, since duplicate items will remain + * in their respective maps. + */ + while (itor1 != cl_map_end(p_map1) && itor2 != cl_map_end(p_map2)) { + key1 = cl_map_key(itor1); + key2 = cl_map_key(itor2); + if (key1 < key2) { + status = __cl_map_delta_move(p_old, p_map1, &itor1); + /* Check for failure. */ + if (status != CL_SUCCESS) { + /* Restore the initial state. */ + __cl_map_revert(p_map1, p_map2, p_new, p_old); + /* Return the failure status. */ + return (status); + } + } else if (key1 > key2) { + status = __cl_map_delta_move(p_new, p_map2, &itor2); + if (status != CL_SUCCESS) { + /* Restore the initial state. */ + __cl_map_revert(p_map1, p_map2, p_new, p_old); + /* Return the failure status. */ + return (status); + } + } else { + /* Move both forward since they have the same key. */ + itor1 = cl_map_next(itor1); + itor2 = cl_map_next(itor2); + } + } + + /* Process the remainder if either source map is empty. */ + while (itor2 != cl_map_end(p_map2)) { + status = __cl_map_delta_move(p_new, p_map2, &itor2); + if (status != CL_SUCCESS) { + /* Restore the initial state. */ + __cl_map_revert(p_map1, p_map2, p_new, p_old); + /* Return the failure status. */ + return (status); + } + } + + while (itor1 != cl_map_end(p_map1)) { + status = __cl_map_delta_move(p_old, p_map1, &itor1); + if (status != CL_SUCCESS) { + /* Restore the initial state. */ + __cl_map_revert(p_map1, p_map2, p_new, p_old); + /* Return the failure status. */ + return (status); + } + } + + return (CL_SUCCESS); +} + +/****************************************************************************** + IMPLEMENTATION OF FLEXI MAP +******************************************************************************/ + +/* + * Get the root. + */ +static inline cl_fmap_item_t *__cl_fmap_root(IN const cl_fmap_t * const p_map) +{ + CL_ASSERT(p_map); + return (p_map->root.p_left); +} + +/* + * Returns whether a given item is on the left of its parent. + */ +static boolean_t __cl_fmap_is_left_child(IN const cl_fmap_item_t * const p_item) +{ + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_up); + CL_ASSERT(p_item->p_up != p_item); + + return (p_item->p_up->p_left == p_item); +} + +/* + * Retrieve the pointer to the parent's pointer to an item. + */ +static cl_fmap_item_t **__cl_fmap_get_parent_ptr_to_item(IN cl_fmap_item_t * + const p_item) +{ + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_up); + CL_ASSERT(p_item->p_up != p_item); + + if (__cl_fmap_is_left_child(p_item)) + return (&p_item->p_up->p_left); + + CL_ASSERT(p_item->p_up->p_right == p_item); + return (&p_item->p_up->p_right); +} + +/* + * Rotate a node to the left. This rotation affects the least number of links + * between nodes and brings the level of C up by one while increasing the depth + * of A one. Note that the links to/from W, X, Y, and Z are not affected. + * + * R R + * | | + * A C + * / \ / \ + * W C A Z + * / \ / \ + * B Z W B + * / \ / \ + * X Y X Y + */ +static void __cl_fmap_rot_left(IN cl_fmap_t * const p_map, + IN cl_fmap_item_t * const p_item) +{ + cl_fmap_item_t **pp_root; + + CL_ASSERT(p_map); + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_right != &p_map->nil); + + pp_root = __cl_fmap_get_parent_ptr_to_item(p_item); + + /* Point R to C instead of A. */ + *pp_root = p_item->p_right; + /* Set C's parent to R. */ + (*pp_root)->p_up = p_item->p_up; + + /* Set A's right to B */ + p_item->p_right = (*pp_root)->p_left; + /* + * Set B's parent to A. We trap for B being NIL since the + * caller may depend on NIL not changing. + */ + if ((*pp_root)->p_left != &p_map->nil) + (*pp_root)->p_left->p_up = p_item; + + /* Set C's left to A. */ + (*pp_root)->p_left = p_item; + /* Set A's parent to C. */ + p_item->p_up = *pp_root; +} + +/* + * Rotate a node to the right. This rotation affects the least number of links + * between nodes and brings the level of A up by one while increasing the depth + * of C one. Note that the links to/from W, X, Y, and Z are not affected. + * + * R R + * | | + * C A + * / \ / \ + * A Z W C + * / \ / \ + * W B B Z + * / \ / \ + * X Y X Y + */ +static void __cl_fmap_rot_right(IN cl_fmap_t * const p_map, + IN cl_fmap_item_t * const p_item) +{ + cl_fmap_item_t **pp_root; + + CL_ASSERT(p_map); + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_left != &p_map->nil); + + /* Point R to A instead of C. */ + pp_root = __cl_fmap_get_parent_ptr_to_item(p_item); + (*pp_root) = p_item->p_left; + /* Set A's parent to R. */ + (*pp_root)->p_up = p_item->p_up; + + /* Set C's left to B */ + p_item->p_left = (*pp_root)->p_right; + /* + * Set B's parent to C. We trap for B being NIL since the + * caller may depend on NIL not changing. + */ + if ((*pp_root)->p_right != &p_map->nil) + (*pp_root)->p_right->p_up = p_item; + + /* Set A's right to C. */ + (*pp_root)->p_right = p_item; + /* Set C's parent to A. */ + p_item->p_up = *pp_root; +} + +void cl_fmap_init(IN cl_fmap_t * const p_map, IN cl_pfn_fmap_cmp_t pfn_compare) +{ + CL_ASSERT(p_map); + CL_ASSERT(pfn_compare); + + memset(p_map, 0, sizeof(cl_fmap_t)); + + /* special setup for the root node */ + p_map->root.p_up = &p_map->root; + p_map->root.p_left = &p_map->nil; + p_map->root.p_right = &p_map->nil; + p_map->root.color = CL_MAP_BLACK; + + /* Setup the node used as terminator for all leaves. */ + p_map->nil.p_up = &p_map->nil; + p_map->nil.p_left = &p_map->nil; + p_map->nil.p_right = &p_map->nil; + p_map->nil.color = CL_MAP_BLACK; + + /* Store the compare function pointer. */ + p_map->pfn_compare = pfn_compare; + + p_map->state = CL_INITIALIZED; + + cl_fmap_remove_all(p_map); +} + +cl_fmap_item_t *cl_fmap_match(IN const cl_fmap_t * const p_map, + IN const void *const p_key, + IN cl_pfn_fmap_cmp_t pfn_compare) +{ + cl_fmap_item_t *p_item; + int cmp; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + p_item = __cl_fmap_root(p_map); + + while (p_item != &p_map->nil) { + cmp = pfn_compare ? pfn_compare(p_key, p_item->p_key) : + p_map->pfn_compare(p_key, p_item->p_key); + + if (!cmp) + break; /* just right */ + + if (cmp < 0) + p_item = p_item->p_left; /* too small */ + else + p_item = p_item->p_right; /* too big */ + } + + return (p_item); +} + +cl_fmap_item_t *cl_fmap_get(IN const cl_fmap_t * const p_map, + IN const void *const p_key) +{ + return cl_fmap_match(p_map, p_key, p_map->pfn_compare); +} + +cl_fmap_item_t *cl_fmap_get_next(IN const cl_fmap_t * const p_map, + IN const void *const p_key) +{ + cl_fmap_item_t *p_item; + cl_fmap_item_t *p_item_found; + int cmp; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + p_item = __cl_fmap_root(p_map); + p_item_found = (cl_fmap_item_t *) & p_map->nil; + + while (p_item != &p_map->nil) { + cmp = p_map->pfn_compare(p_key, p_item->p_key); + + if (cmp < 0) { + p_item_found = p_item; + p_item = p_item->p_left; /* too small */ + } else { + p_item = p_item->p_right; /* too big or match */ + } + } + + return (p_item_found); +} + +void cl_fmap_apply_func(IN const cl_fmap_t * const p_map, + IN cl_pfn_fmap_apply_t pfn_func, + IN const void *const context) +{ + cl_fmap_item_t *p_fmap_item; + + /* Note that context can have any arbitrary value. */ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + CL_ASSERT(pfn_func); + + p_fmap_item = cl_fmap_head(p_map); + while (p_fmap_item != cl_fmap_end(p_map)) { + pfn_func(p_fmap_item, (void *)context); + p_fmap_item = cl_fmap_next(p_fmap_item); + } +} + +/* + * Balance a tree starting at a given item back to the root. + */ +static void __cl_fmap_ins_bal(IN cl_fmap_t * const p_map, + IN cl_fmap_item_t * p_item) +{ + cl_fmap_item_t *p_grand_uncle; + + CL_ASSERT(p_map); + CL_ASSERT(p_item); + CL_ASSERT(p_item != &p_map->root); + + while (p_item->p_up->color == CL_MAP_RED) { + if (__cl_fmap_is_left_child(p_item->p_up)) { + p_grand_uncle = p_item->p_up->p_up->p_right; + CL_ASSERT(p_grand_uncle); + if (p_grand_uncle->color == CL_MAP_RED) { + p_grand_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + p_item = p_item->p_up->p_up; + continue; + } + + if (!__cl_fmap_is_left_child(p_item)) { + p_item = p_item->p_up; + __cl_fmap_rot_left(p_map, p_item); + } + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + __cl_fmap_rot_right(p_map, p_item->p_up->p_up); + } else { + p_grand_uncle = p_item->p_up->p_up->p_left; + CL_ASSERT(p_grand_uncle); + if (p_grand_uncle->color == CL_MAP_RED) { + p_grand_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + p_item = p_item->p_up->p_up; + continue; + } + + if (__cl_fmap_is_left_child(p_item)) { + p_item = p_item->p_up; + __cl_fmap_rot_right(p_map, p_item); + } + p_item->p_up->color = CL_MAP_BLACK; + p_item->p_up->p_up->color = CL_MAP_RED; + __cl_fmap_rot_left(p_map, p_item->p_up->p_up); + } + } +} + +cl_fmap_item_t *cl_fmap_insert(IN cl_fmap_t * const p_map, + IN const void *const p_key, + IN cl_fmap_item_t * const p_item) +{ + cl_fmap_item_t *p_insert_at, *p_comp_item; + int cmp = 0; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + CL_ASSERT(p_item); + CL_ASSERT(p_map->root.p_up == &p_map->root); + CL_ASSERT(p_map->root.color != CL_MAP_RED); + CL_ASSERT(p_map->nil.color != CL_MAP_RED); + + p_item->p_left = &p_map->nil; + p_item->p_right = &p_map->nil; + p_item->p_key = p_key; + p_item->color = CL_MAP_RED; + + /* Find the insertion location. */ + p_insert_at = &p_map->root; + p_comp_item = __cl_fmap_root(p_map); + + while (p_comp_item != &p_map->nil) { + p_insert_at = p_comp_item; + + cmp = p_map->pfn_compare(p_key, p_insert_at->p_key); + + if (!cmp) + return (p_insert_at); + + /* Traverse the tree until the correct insertion point is found. */ + if (cmp < 0) + p_comp_item = p_insert_at->p_left; + else + p_comp_item = p_insert_at->p_right; + } + + CL_ASSERT(p_insert_at != &p_map->nil); + CL_ASSERT(p_comp_item == &p_map->nil); + /* Insert the item. */ + if (p_insert_at == &p_map->root) { + p_insert_at->p_left = p_item; + /* + * Primitive insert places the new item in front of + * the existing item. + */ + __cl_primitive_insert(&p_map->nil.pool_item.list_item, + &p_item->pool_item.list_item); + } else if (cmp < 0) { + p_insert_at->p_left = p_item; + /* + * Primitive insert places the new item in front of + * the existing item. + */ + __cl_primitive_insert(&p_insert_at->pool_item.list_item, + &p_item->pool_item.list_item); + } else { + p_insert_at->p_right = p_item; + /* + * Primitive insert places the new item in front of + * the existing item. + */ + __cl_primitive_insert(p_insert_at->pool_item.list_item.p_next, + &p_item->pool_item.list_item); + } + /* Increase the count. */ + p_map->count++; + + p_item->p_up = p_insert_at; + + /* + * We have added depth to this section of the tree. + * Rebalance as necessary as we retrace our path through the tree + * and update colors. + */ + __cl_fmap_ins_bal(p_map, p_item); + + __cl_fmap_root(p_map)->color = CL_MAP_BLACK; + + /* + * Note that it is not necessary to re-color the nil node black because all + * red color assignments are made via the p_up pointer, and nil is never + * set as the value of a p_up pointer. + */ + +#ifdef _DEBUG_ + /* Set the pointer to the map in the map item for consistency checking. */ + p_item->p_map = p_map; +#endif + + return (p_item); +} + +static void __cl_fmap_del_bal(IN cl_fmap_t * const p_map, + IN cl_fmap_item_t * p_item) +{ + cl_fmap_item_t *p_uncle; + + while ((p_item->color != CL_MAP_RED) && (p_item->p_up != &p_map->root)) { + if (__cl_fmap_is_left_child(p_item)) { + p_uncle = p_item->p_up->p_right; + + if (p_uncle->color == CL_MAP_RED) { + p_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_RED; + __cl_fmap_rot_left(p_map, p_item->p_up); + p_uncle = p_item->p_up->p_right; + } + + if (p_uncle->p_right->color != CL_MAP_RED) { + if (p_uncle->p_left->color != CL_MAP_RED) { + p_uncle->color = CL_MAP_RED; + p_item = p_item->p_up; + continue; + } + + p_uncle->p_left->color = CL_MAP_BLACK; + p_uncle->color = CL_MAP_RED; + __cl_fmap_rot_right(p_map, p_uncle); + p_uncle = p_item->p_up->p_right; + } + p_uncle->color = p_item->p_up->color; + p_item->p_up->color = CL_MAP_BLACK; + p_uncle->p_right->color = CL_MAP_BLACK; + __cl_fmap_rot_left(p_map, p_item->p_up); + break; + } else { + p_uncle = p_item->p_up->p_left; + + if (p_uncle->color == CL_MAP_RED) { + p_uncle->color = CL_MAP_BLACK; + p_item->p_up->color = CL_MAP_RED; + __cl_fmap_rot_right(p_map, p_item->p_up); + p_uncle = p_item->p_up->p_left; + } + + if (p_uncle->p_left->color != CL_MAP_RED) { + if (p_uncle->p_right->color != CL_MAP_RED) { + p_uncle->color = CL_MAP_RED; + p_item = p_item->p_up; + continue; + } + + p_uncle->p_right->color = CL_MAP_BLACK; + p_uncle->color = CL_MAP_RED; + __cl_fmap_rot_left(p_map, p_uncle); + p_uncle = p_item->p_up->p_left; + } + p_uncle->color = p_item->p_up->color; + p_item->p_up->color = CL_MAP_BLACK; + p_uncle->p_left->color = CL_MAP_BLACK; + __cl_fmap_rot_right(p_map, p_item->p_up); + break; + } + } + p_item->color = CL_MAP_BLACK; +} + +void cl_fmap_remove_item(IN cl_fmap_t * const p_map, + IN cl_fmap_item_t * const p_item) +{ + cl_fmap_item_t *p_child, *p_del_item; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + CL_ASSERT(p_item); + CL_ASSERT(p_item->p_map == p_map); + + if (p_item == cl_fmap_end(p_map)) + return; + + if ((p_item->p_right == &p_map->nil) || (p_item->p_left == &p_map->nil)) { + /* The item being removed has children on at most on side. */ + p_del_item = p_item; + } else { + /* + * The item being removed has children on both side. + * We select the item that will replace it. After removing + * the substitute item and rebalancing, the tree will have the + * correct topology. Exchanging the substitute for the item + * will finalize the removal. + */ + p_del_item = cl_fmap_next(p_item); + CL_ASSERT(p_del_item != &p_map->nil); + } + + /* Remove the item from the list. */ + __cl_primitive_remove(&p_item->pool_item.list_item); + /* Decrement the item count. */ + p_map->count--; + + /* Get the pointer to the new root's child, if any. */ + if (p_del_item->p_left != &p_map->nil) + p_child = p_del_item->p_left; + else + p_child = p_del_item->p_right; + + /* + * This assignment may modify the parent pointer of the nil node. + * This is inconsequential. + */ + p_child->p_up = p_del_item->p_up; + (*__cl_fmap_get_parent_ptr_to_item(p_del_item)) = p_child; + + if (p_del_item->color != CL_MAP_RED) + __cl_fmap_del_bal(p_map, p_child); + + /* + * Note that the splicing done below does not need to occur before + * the tree is balanced, since the actual topology changes are made by the + * preceding code. The topology is preserved by the color assignment made + * below (reader should be reminded that p_del_item == p_item in some cases). + */ + if (p_del_item != p_item) { + /* + * Finalize the removal of the specified item by exchanging it with + * the substitute which we removed above. + */ + p_del_item->p_up = p_item->p_up; + p_del_item->p_left = p_item->p_left; + p_del_item->p_right = p_item->p_right; + (*__cl_fmap_get_parent_ptr_to_item(p_item)) = p_del_item; + p_item->p_right->p_up = p_del_item; + p_item->p_left->p_up = p_del_item; + p_del_item->color = p_item->color; + } + + CL_ASSERT(p_map->nil.color != CL_MAP_RED); + +#ifdef _DEBUG_ + /* Clear the pointer to the map since the item has been removed. */ + p_item->p_map = NULL; +#endif +} + +cl_fmap_item_t *cl_fmap_remove(IN cl_fmap_t * const p_map, + IN const void *const p_key) +{ + cl_fmap_item_t *p_item; + + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + /* Seek the node with the specified key */ + p_item = cl_fmap_get(p_map, p_key); + + cl_fmap_remove_item(p_map, p_item); + + return (p_item); +} + +void cl_fmap_merge(OUT cl_fmap_t * const p_dest_map, + IN OUT cl_fmap_t * const p_src_map) +{ + cl_fmap_item_t *p_item, *p_item2, *p_next; + + CL_ASSERT(p_dest_map); + CL_ASSERT(p_src_map); + + p_item = cl_fmap_head(p_src_map); + + while (p_item != cl_fmap_end(p_src_map)) { + p_next = cl_fmap_next(p_item); + + /* Remove the item from its current map. */ + cl_fmap_remove_item(p_src_map, p_item); + /* Insert the item into the destination map. */ + p_item2 = + cl_fmap_insert(p_dest_map, cl_fmap_key(p_item), p_item); + /* Check that the item was successfully inserted. */ + if (p_item2 != p_item) { + /* Put the item in back in the source map. */ + p_item2 = + cl_fmap_insert(p_src_map, cl_fmap_key(p_item), + p_item); + CL_ASSERT(p_item2 == p_item); + } + p_item = p_next; + } +} + +static void __cl_fmap_delta_move(IN OUT cl_fmap_t * const p_dest, + IN OUT cl_fmap_t * const p_src, + IN OUT cl_fmap_item_t ** const pp_item) +{ + cl_fmap_item_t __attribute__((__unused__)) *p_temp; + cl_fmap_item_t *p_next; + + /* + * Get the next item so that we can ensure that pp_item points to + * a valid item upon return from the function. + */ + p_next = cl_fmap_next(*pp_item); + /* Move the old item from its current map the the old map. */ + cl_fmap_remove_item(p_src, *pp_item); + p_temp = cl_fmap_insert(p_dest, cl_fmap_key(*pp_item), *pp_item); + /* We should never have duplicates. */ + CL_ASSERT(p_temp == *pp_item); + /* Point pp_item to a valid item in the source map. */ + (*pp_item) = p_next; +} + +void cl_fmap_delta(IN OUT cl_fmap_t * const p_map1, + IN OUT cl_fmap_t * const p_map2, + OUT cl_fmap_t * const p_new, OUT cl_fmap_t * const p_old) +{ + cl_fmap_item_t *p_item1, *p_item2; + int cmp; + + CL_ASSERT(p_map1); + CL_ASSERT(p_map2); + CL_ASSERT(p_new); + CL_ASSERT(p_old); + CL_ASSERT(cl_is_fmap_empty(p_new)); + CL_ASSERT(cl_is_fmap_empty(p_old)); + + p_item1 = cl_fmap_head(p_map1); + p_item2 = cl_fmap_head(p_map2); + + while (p_item1 != cl_fmap_end(p_map1) && p_item2 != cl_fmap_end(p_map2)) { + cmp = p_map1->pfn_compare(cl_fmap_key(p_item1), + cl_fmap_key(p_item2)); + if (cmp < 0) { + /* We found an old item. */ + __cl_fmap_delta_move(p_old, p_map1, &p_item1); + } else if (cmp > 0) { + /* We found a new item. */ + __cl_fmap_delta_move(p_new, p_map2, &p_item2); + } else { + /* Move both forward since they have the same key. */ + p_item1 = cl_fmap_next(p_item1); + p_item2 = cl_fmap_next(p_item2); + } + } + + /* Process the remainder if the end of either source map was reached. */ + while (p_item2 != cl_fmap_end(p_map2)) + __cl_fmap_delta_move(p_new, p_map2, &p_item2); + + while (p_item1 != cl_fmap_end(p_map1)) + __cl_fmap_delta_move(p_old, p_map1, &p_item1); +} diff --git a/complib/cl_nodenamemap.c b/complib/cl_nodenamemap.c new file mode 100644 index 0000000..353f58c --- /dev/null +++ b/complib/cl_nodenamemap.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 Lawrence Livermore National Lab + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define PARSE_NODE_MAP_BUFLEN 256 + +static int parse_node_map_wrap(const char *file_name, + int (*create)(void *, uint64_t, char *), + void *cxt, + char *linebuf, + unsigned int linebuflen); + +static int map_name(void *cxt, uint64_t guid, char *p) +{ + cl_qmap_t *map = cxt; + name_map_item_t *item; + + p = strtok(p, "\"#"); + if (!p) + return 0; + + item = malloc(sizeof(*item)); + if (!item) + return -1; + item->guid = guid; + item->name = strdup(p); + cl_qmap_insert(map, item->guid, (cl_map_item_t *) item); + return 0; +} + +nn_map_t *open_node_name_map(const char *node_name_map) +{ + nn_map_t *map; + char linebuf[PARSE_NODE_MAP_BUFLEN + 1]; + + if (!node_name_map) { +#ifdef HAVE_DEFAULT_NODENAME_MAP + struct stat buf; + node_name_map = HAVE_DEFAULT_NODENAME_MAP; + if (stat(node_name_map, &buf)) + return NULL; +#else + return NULL; +#endif /* HAVE_DEFAULT_NODENAME_MAP */ + } + + map = malloc(sizeof(*map)); + if (!map) + return NULL; + cl_qmap_init(map); + + memset(linebuf, '\0', PARSE_NODE_MAP_BUFLEN + 1); + if (parse_node_map_wrap(node_name_map, map_name, map, + linebuf, PARSE_NODE_MAP_BUFLEN)) { + if (errno == EIO) { + fprintf(stderr, + "WARNING failed to parse node name map " + "\"%s\"\n", + node_name_map); + fprintf(stderr, + "WARNING failed line: \"%s\"\n", + linebuf); + } + else + fprintf(stderr, + "WARNING failed to open node name map " + "\"%s\" (%s)\n", + node_name_map, strerror(errno)); + close_node_name_map(map); + return NULL; + } + + return map; +} + +void close_node_name_map(nn_map_t * map) +{ + name_map_item_t *item = NULL; + + if (!map) + return; + + item = (name_map_item_t *) cl_qmap_head(map); + while (item != (name_map_item_t *) cl_qmap_end(map)) { + item = (name_map_item_t *) cl_qmap_remove(map, item->guid); + free(item->name); + free(item); + item = (name_map_item_t *) cl_qmap_head(map); + } + free(map); +} + +char *remap_node_name(nn_map_t * map, uint64_t target_guid, char *nodedesc) +{ + char *rc = NULL; + name_map_item_t *item = NULL; + + if (!map) + goto done; + + item = (name_map_item_t *) cl_qmap_get(map, target_guid); + if (item != (name_map_item_t *) cl_qmap_end(map)) + rc = strdup(item->name); + +done: + if (rc == NULL) + rc = strdup(clean_nodedesc(nodedesc)); + return (rc); +} + +char *clean_nodedesc(char *nodedesc) +{ + int i = 0; + + nodedesc[63] = '\0'; + while (nodedesc[i]) { + if (!isprint(nodedesc[i])) + nodedesc[i] = ' '; + i++; + } + + return (nodedesc); +} + +static int parse_node_map_wrap(const char *file_name, + int (*create) (void *, uint64_t, char *), + void *cxt, + char *linebuf, + unsigned int linebuflen) +{ + char line[PARSE_NODE_MAP_BUFLEN]; + FILE *f; + + if (!(f = fopen(file_name, "r"))) + return -1; + + while (fgets(line, sizeof(line), f)) { + uint64_t guid; + char *p, *e; + + p = line; + while (isspace(*p)) + p++; + if (*p == '\0' || *p == '\n' || *p == '#') + continue; + + guid = strtoull(p, &e, 0); + if (e == p || (!isspace(*e) && *e != '#' && *e != '\0')) { + fclose(f); + errno = EIO; + if (linebuf) { + memcpy(linebuf, line, + MIN(PARSE_NODE_MAP_BUFLEN, linebuflen)); + e = strpbrk(linebuf, "\n"); + if (e) + *e = '\0'; + } + return -1; + } + + p = e; + while (isspace(*p)) + p++; + + e = strpbrk(p, "\n"); + if (e) + *e = '\0'; + + if (create(cxt, guid, p)) { + fclose(f); + return -1; + } + } + + fclose(f); + return 0; +} + +int parse_node_map(const char *file_name, + int (*create) (void *, uint64_t, char *), void *cxt) +{ + return parse_node_map_wrap(file_name, create, cxt, NULL, 0); +} diff --git a/complib/cl_pool.c b/complib/cl_pool.c new file mode 100644 index 0000000..6248688 --- /dev/null +++ b/complib/cl_pool.c @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of the grow pools. The grow pools manage a pool of objects. + * The pools can grow to meet demand, limited only by system memory. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * IMPLEMENTATION OF QUICK COMPOSITE POOL + */ +void cl_qcpool_construct(IN cl_qcpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + + memset(p_pool, 0, sizeof(cl_qcpool_t)); + + p_pool->state = CL_UNINITIALIZED; +} + +cl_status_t cl_qcpool_init(IN cl_qcpool_t * const p_pool, + IN const size_t min_size, IN const size_t max_size, + IN const size_t grow_size, + IN const size_t * const component_sizes, + IN const uint32_t num_components, + IN cl_pfn_qcpool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_qcpool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context) +{ + cl_status_t status; + uint32_t i; + + CL_ASSERT(p_pool); + /* Must have a minimum of 1 component. */ + CL_ASSERT(num_components); + /* A component size array is required. */ + CL_ASSERT(component_sizes); + /* + * If no initializer is provided, the first component must be large + * enough to hold a pool item. + */ + CL_ASSERT(pfn_initializer || + (component_sizes[0] >= sizeof(cl_pool_item_t))); + + cl_qcpool_construct(p_pool); + + if (num_components > 1 && !pfn_initializer) + return (CL_INVALID_SETTING); + + if (max_size && max_size < min_size) + return (CL_INVALID_SETTING); + + /* + * Allocate the array of component sizes and component pointers all + * in one allocation. + */ + p_pool->component_sizes = (size_t *) malloc((sizeof(size_t) + + sizeof(void *)) * + num_components); + + if (!p_pool->component_sizes) + return (CL_INSUFFICIENT_MEMORY); + else + memset(p_pool->component_sizes, 0, + (sizeof(size_t) + sizeof(void *)) * num_components); + + /* Calculate the pointer to the array of pointers, used for callbacks. */ + p_pool->p_components = + (void **)(p_pool->component_sizes + num_components); + + /* Copy the user's sizes into our array for future use. */ + memcpy(p_pool->component_sizes, component_sizes, + sizeof(component_sizes[0]) * num_components); + + /* Store the number of components per object. */ + p_pool->num_components = num_components; + + /* Round up and store the size of the components. */ + for (i = 0; i < num_components; i++) { + /* + * We roundup each component size so that all components + * are aligned on a natural boundary. + */ + p_pool->component_sizes[i] = + ROUNDUP(p_pool->component_sizes[i], sizeof(uintptr_t)); + } + + p_pool->max_objects = max_size ? max_size : ~(size_t) 0; + p_pool->grow_size = grow_size; + + /* Store callback function pointers. */ + p_pool->pfn_init = pfn_initializer; /* may be NULL */ + p_pool->pfn_dtor = pfn_destructor; /* may be NULL */ + p_pool->context = context; + + cl_qlist_init(&p_pool->alloc_list); + + cl_qlist_init(&p_pool->free_list); + + /* + * We are now initialized. We change the initialized flag before + * growing since the grow function asserts that we are initialized. + */ + p_pool->state = CL_INITIALIZED; + + /* Allocate the minimum number of objects as requested. */ + if (!min_size) + return (CL_SUCCESS); + + status = cl_qcpool_grow(p_pool, min_size); + /* Trap for error and cleanup if necessary. */ + if (status != CL_SUCCESS) + cl_qcpool_destroy(p_pool); + + return (status); +} + +void cl_qcpool_destroy(IN cl_qcpool_t * const p_pool) +{ + /* CL_ASSERT that a non-NULL pointer was provided. */ + CL_ASSERT(p_pool); + /* CL_ASSERT that we are in a valid state (not uninitialized memory). */ + CL_ASSERT(cl_is_state_valid(p_pool->state)); + + if (p_pool->state == CL_INITIALIZED) { + /* + * Assert if the user hasn't put everything back in the pool + * before destroying it + * if they haven't, then most likely they are still using memory + * that will be freed, and the destructor will not be called! + */ +#ifdef _DEBUG_ + /* but we do not want "free" version to assert on this one */ + CL_ASSERT(cl_qcpool_count(p_pool) == p_pool->num_objects); +#endif + /* call the user's destructor for each object in the pool */ + if (p_pool->pfn_dtor) { + while (!cl_is_qlist_empty(&p_pool->free_list)) { + p_pool->pfn_dtor((cl_pool_item_t *) + cl_qlist_remove_head(&p_pool-> + free_list), + (void *)p_pool->context); + } + } else { + cl_qlist_remove_all(&p_pool->free_list); + } + + /* Free all allocated memory blocks. */ + while (!cl_is_qlist_empty(&p_pool->alloc_list)) + free(cl_qlist_remove_head(&p_pool->alloc_list)); + + if (p_pool->component_sizes) { + free(p_pool->component_sizes); + p_pool->component_sizes = NULL; + } + } + + p_pool->state = CL_UNINITIALIZED; +} + +cl_status_t cl_qcpool_grow(IN cl_qcpool_t * const p_pool, IN size_t obj_count) +{ + cl_status_t status = CL_SUCCESS; + uint8_t *p_objects; + cl_pool_item_t *p_pool_item; + uint32_t i; + size_t obj_size; + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->state == CL_INITIALIZED); + CL_ASSERT(obj_count); + + /* Validate that growth is possible. */ + if (p_pool->num_objects == p_pool->max_objects) + return (CL_INSUFFICIENT_MEMORY); + + /* Cap the growth to the desired maximum. */ + if (obj_count > (p_pool->max_objects - p_pool->num_objects)) + obj_count = p_pool->max_objects - p_pool->num_objects; + + /* Calculate the size of an object. */ + obj_size = 0; + for (i = 0; i < p_pool->num_components; i++) + obj_size += p_pool->component_sizes[i]; + + /* Allocate the buffer for the new objects. */ + p_objects = (uint8_t *) + malloc(sizeof(cl_list_item_t) + (obj_size * obj_count)); + + /* Make sure the allocation succeeded. */ + if (!p_objects) + return (CL_INSUFFICIENT_MEMORY); + else + memset(p_objects, 0, + sizeof(cl_list_item_t) + (obj_size * obj_count)); + + /* Insert the allocation in our list. */ + cl_qlist_insert_tail(&p_pool->alloc_list, (cl_list_item_t *) p_objects); + p_objects += sizeof(cl_list_item_t); + + /* initialize the new elements and add them to the free list */ + while (obj_count--) { + /* Setup the array of components for the current object. */ + p_pool->p_components[0] = p_objects; + for (i = 1; i < p_pool->num_components; i++) { + /* Calculate the pointer to the next component. */ + p_pool->p_components[i] = + (uint8_t *) p_pool->p_components[i - 1] + + p_pool->component_sizes[i - 1]; + } + + /* + * call the user's initializer + * this can fail! + */ + if (p_pool->pfn_init) { + p_pool_item = NULL; + status = p_pool->pfn_init(p_pool->p_components, + p_pool->num_components, + (void *)p_pool->context, + &p_pool_item); + if (status != CL_SUCCESS) { + /* + * User initialization failed + * we may have only grown the pool by some partial amount + * Invoke the destructor for the object that failed + * initialization. + */ + if (p_pool->pfn_dtor) + p_pool->pfn_dtor(p_pool_item, + (void *)p_pool-> + context); + + /* Return the user's status. */ + return (status); + } + CL_ASSERT(p_pool_item); + } else { + /* + * If no initializer is provided, assume that the pool item + * is stored at the beginning of the first component. + */ + p_pool_item = + (cl_pool_item_t *) p_pool->p_components[0]; + } + +#ifdef _DEBUG_ + /* + * Set the pool item's pool pointer to this pool so that we can + * check that items get returned to the correct pool. + */ + p_pool_item->p_pool = p_pool; +#endif + + /* Insert the new item in the free list, traping for failure. */ + cl_qlist_insert_head(&p_pool->free_list, + &p_pool_item->list_item); + + p_pool->num_objects++; + + /* move the pointer to the next item */ + p_objects += obj_size; + } + + return (status); +} + +cl_pool_item_t *cl_qcpool_get(IN cl_qcpool_t * const p_pool) +{ + cl_list_item_t *p_list_item; + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->state == CL_INITIALIZED); + + if (cl_is_qlist_empty(&p_pool->free_list)) { + /* + * No object is available. + * Return NULL if the user does not want automatic growth. + */ + if (!p_pool->grow_size) + return (NULL); + + /* We ran out of elements. Get more */ + cl_qcpool_grow(p_pool, p_pool->grow_size); + /* + * We may not have gotten everything we wanted but we might have + * gotten something. + */ + if (cl_is_qlist_empty(&p_pool->free_list)) + return (NULL); + } + + p_list_item = cl_qlist_remove_head(&p_pool->free_list); + /* OK, at this point we have an object */ + CL_ASSERT(p_list_item != cl_qlist_end(&p_pool->free_list)); + return ((cl_pool_item_t *) p_list_item); +} + +cl_pool_item_t *cl_qcpool_get_tail(IN cl_qcpool_t * const p_pool) +{ + cl_list_item_t *p_list_item; + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->state == CL_INITIALIZED); + + if (cl_is_qlist_empty(&p_pool->free_list)) { + /* + * No object is available. + * Return NULL if the user does not want automatic growth. + */ + if (!p_pool->grow_size) + return (NULL); + + /* We ran out of elements. Get more */ + cl_qcpool_grow(p_pool, p_pool->grow_size); + /* + * We may not have gotten everything we wanted but we might have + * gotten something. + */ + if (cl_is_qlist_empty(&p_pool->free_list)) + return (NULL); + } + + p_list_item = cl_qlist_remove_tail(&p_pool->free_list); + /* OK, at this point we have an object */ + CL_ASSERT(p_list_item != cl_qlist_end(&p_pool->free_list)); + return ((cl_pool_item_t *) p_list_item); +} + +/* + * IMPLEMENTATION OF QUICK GROW POOL + */ + +/* + * Callback to translate quick composite to quick grow pool + * initializer callback. + */ +static cl_status_t __cl_qpool_init_cb(IN void **const p_comp_array, + IN const uint32_t num_components, + IN void *const context, + OUT cl_pool_item_t ** const pp_pool_item) +{ + cl_qpool_t *p_pool = (cl_qpool_t *) context; + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->pfn_init); + CL_ASSERT(num_components == 1); + + UNUSED_PARAM(num_components); + + return (p_pool->pfn_init(p_comp_array[0], (void *)p_pool->context, + pp_pool_item)); +} + +/* + * Callback to translate quick composite to quick grow pool + * destructor callback. + */ +static void __cl_qpool_dtor_cb(IN const cl_pool_item_t * const p_pool_item, + IN void *const context) +{ + cl_qpool_t *p_pool = (cl_qpool_t *) context; + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->pfn_dtor); + + p_pool->pfn_dtor(p_pool_item, (void *)p_pool->context); +} + +void cl_qpool_construct(IN cl_qpool_t * const p_pool) +{ + memset(p_pool, 0, sizeof(cl_qpool_t)); + + cl_qcpool_construct(&p_pool->qcpool); +} + +cl_status_t cl_qpool_init(IN cl_qpool_t * const p_pool, + IN const size_t min_size, IN const size_t max_size, + IN const size_t grow_size, + IN const size_t object_size, + IN cl_pfn_qpool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_qpool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context) +{ + cl_status_t status; + + CL_ASSERT(p_pool); + + p_pool->pfn_init = pfn_initializer; /* may be NULL */ + p_pool->pfn_dtor = pfn_destructor; /* may be NULL */ + p_pool->context = context; + + status = cl_qcpool_init(&p_pool->qcpool, min_size, max_size, grow_size, + &object_size, 1, + pfn_initializer ? __cl_qpool_init_cb : NULL, + pfn_destructor ? __cl_qpool_dtor_cb : NULL, + p_pool); + + return (status); +} + +/* + * IMPLEMENTATION OF COMPOSITE POOL + */ + +/* + * Callback to translate quick composite to compsite pool + * initializer callback. + */ +static cl_status_t __cl_cpool_init_cb(IN void **const p_comp_array, + IN const uint32_t num_components, + IN void *const context, + OUT cl_pool_item_t ** const pp_pool_item) +{ + cl_cpool_t *p_pool = (cl_cpool_t *) context; + cl_pool_obj_t *p_pool_obj; + cl_status_t status = CL_SUCCESS; + + CL_ASSERT(p_pool); + + /* + * Set our pointer to the list item, which is stored at the beginning of + * the first component. + */ + p_pool_obj = (cl_pool_obj_t *) p_comp_array[0]; + /* Set the pool item pointer for the caller. */ + *pp_pool_item = &p_pool_obj->pool_item; + + /* Calculate the pointer to the user's first component. */ + p_comp_array[0] = ((uint8_t *) p_comp_array[0]) + sizeof(cl_pool_obj_t); + + /* + * Set the object pointer in the pool object to point to the first of the + * user's components. + */ + p_pool_obj->p_object = p_comp_array[0]; + + /* Invoke the user's constructor callback. */ + if (p_pool->pfn_init) { + status = p_pool->pfn_init(p_comp_array, num_components, + (void *)p_pool->context); + } + + return (status); +} + +/* + * Callback to translate quick composite to composite pool + * destructor callback. + */ +static void __cl_cpool_dtor_cb(IN const cl_pool_item_t * const p_pool_item, + IN void *const context) +{ + cl_cpool_t *p_pool = (cl_cpool_t *) context; + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->pfn_dtor); + CL_ASSERT(((cl_pool_obj_t *) p_pool_item)->p_object); + + /* Invoke the user's destructor callback. */ + p_pool->pfn_dtor((void *)((cl_pool_obj_t *) p_pool_item)->p_object, + (void *)p_pool->context); +} + +void cl_cpool_construct(IN cl_cpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + + memset(p_pool, 0, sizeof(cl_cpool_t)); + + cl_qcpool_construct(&p_pool->qcpool); +} + +cl_status_t cl_cpool_init(IN cl_cpool_t * const p_pool, + IN const size_t min_size, IN const size_t max_size, + IN const size_t grow_size, + IN size_t * const component_sizes, + IN const uint32_t num_components, + IN cl_pfn_cpool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_cpool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context) +{ + cl_status_t status; + + CL_ASSERT(p_pool); + CL_ASSERT(num_components); + CL_ASSERT(component_sizes); + + /* Add the size of the pool object to the first component. */ + component_sizes[0] += sizeof(cl_pool_obj_t); + + /* Store callback function pointers. */ + p_pool->pfn_init = pfn_initializer; /* may be NULL */ + p_pool->pfn_dtor = pfn_destructor; /* may be NULL */ + p_pool->context = context; + + status = cl_qcpool_init(&p_pool->qcpool, min_size, max_size, grow_size, + component_sizes, num_components, + __cl_cpool_init_cb, + pfn_destructor ? __cl_cpool_dtor_cb : NULL, + p_pool); + + /* Restore the original value of the first component. */ + component_sizes[0] -= sizeof(cl_pool_obj_t); + + return (status); +} + +/* + * IMPLEMENTATION OF GROW POOL + */ + +/* + * Callback to translate quick composite to grow pool constructor callback. + */ +static cl_status_t __cl_pool_init_cb(IN void **const pp_obj, + IN const uint32_t count, + IN void *const context, + OUT cl_pool_item_t ** const pp_pool_item) +{ + cl_pool_t *p_pool = (cl_pool_t *) context; + cl_pool_obj_t *p_pool_obj; + cl_status_t status = CL_SUCCESS; + + CL_ASSERT(p_pool); + CL_ASSERT(pp_obj); + CL_ASSERT(count == 1); + + UNUSED_PARAM(count); + + /* + * Set our pointer to the list item, which is stored at the beginning of + * the first component. + */ + p_pool_obj = (cl_pool_obj_t *) * pp_obj; + *pp_pool_item = &p_pool_obj->pool_item; + + /* Calculate the pointer to the user's first component. */ + *pp_obj = ((uint8_t *) * pp_obj) + sizeof(cl_pool_obj_t); + + /* + * Set the object pointer in the pool item to point to the first of the + * user's components. + */ + p_pool_obj->p_object = *pp_obj; + + /* Invoke the user's constructor callback. */ + if (p_pool->pfn_init) + status = p_pool->pfn_init(*pp_obj, (void *)p_pool->context); + + return (status); +} + +/* + * Callback to translate quick composite to grow pool destructor callback. + */ +static void __cl_pool_dtor_cb(IN const cl_pool_item_t * const p_pool_item, + IN void *const context) +{ + cl_pool_t *p_pool = (cl_pool_t *) context; + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->pfn_dtor); + CL_ASSERT(((cl_pool_obj_t *) p_pool_item)->p_object); + + /* Invoke the user's destructor callback. */ + p_pool->pfn_dtor((void *)((cl_pool_obj_t *) p_pool_item)->p_object, + (void *)p_pool->context); +} + +void cl_pool_construct(IN cl_pool_t * const p_pool) +{ + CL_ASSERT(p_pool); + + memset(p_pool, 0, sizeof(cl_pool_t)); + + cl_qcpool_construct(&p_pool->qcpool); +} + +cl_status_t cl_pool_init(IN cl_pool_t * const p_pool, IN const size_t min_size, + IN const size_t max_size, IN const size_t grow_size, + IN const size_t object_size, + IN cl_pfn_pool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_pool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context) +{ + cl_status_t status; + size_t total_size; + + CL_ASSERT(p_pool); + + /* Add the size of the list item to the first component. */ + total_size = object_size + sizeof(cl_pool_obj_t); + + /* Store callback function pointers. */ + p_pool->pfn_init = pfn_initializer; /* may be NULL */ + p_pool->pfn_dtor = pfn_destructor; /* may be NULL */ + p_pool->context = context; + + /* + * We need an initializer in all cases for quick composite pool, since + * the user pointer must be manipulated to hide the prefixed cl_pool_obj_t. + */ + status = cl_qcpool_init(&p_pool->qcpool, min_size, max_size, grow_size, + &total_size, 1, __cl_pool_init_cb, + pfn_destructor ? __cl_pool_dtor_cb : NULL, + p_pool); + + return (status); +} diff --git a/complib/cl_ptr_vector.c b/complib/cl_ptr_vector.c new file mode 100644 index 0000000..36db4db --- /dev/null +++ b/complib/cl_ptr_vector.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * This file contains ivector and isvector implementations. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +void cl_ptr_vector_construct(IN cl_ptr_vector_t * const p_vector) +{ + CL_ASSERT(p_vector); + + memset(p_vector, 0, sizeof(cl_ptr_vector_t)); + + p_vector->state = CL_UNINITIALIZED; +} + +cl_status_t cl_ptr_vector_init(IN cl_ptr_vector_t * const p_vector, + IN const size_t min_size, + IN const size_t grow_size) +{ + cl_status_t status = CL_SUCCESS; + + CL_ASSERT(p_vector); + + cl_ptr_vector_construct(p_vector); + + p_vector->grow_size = grow_size; + + /* + * Set the state to initialized so that the call to set_size + * doesn't assert. + */ + p_vector->state = CL_INITIALIZED; + + /* get the storage needed by the user */ + if (min_size) { + status = cl_ptr_vector_set_size(p_vector, min_size); + if (status != CL_SUCCESS) + cl_ptr_vector_destroy(p_vector); + } + + return (status); +} + +void cl_ptr_vector_destroy(IN cl_ptr_vector_t * const p_vector) +{ + CL_ASSERT(p_vector); + CL_ASSERT(cl_is_state_valid(p_vector->state)); + + /* Call the user's destructor for each element in the array. */ + if (p_vector->state == CL_INITIALIZED) { + /* Destroy the page vector. */ + if (p_vector->p_ptr_array) { + free((void *)p_vector->p_ptr_array); + p_vector->p_ptr_array = NULL; + } + } + + p_vector->state = CL_UNINITIALIZED; +} + +cl_status_t cl_ptr_vector_at(IN const cl_ptr_vector_t * const p_vector, + IN const size_t index, OUT void **const p_element) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + /* Range check */ + if (index >= p_vector->size) + return (CL_INVALID_PARAMETER); + + *p_element = cl_ptr_vector_get(p_vector, index); + return (CL_SUCCESS); +} + +cl_status_t cl_ptr_vector_set(IN cl_ptr_vector_t * const p_vector, + IN const size_t index, + IN const void *const element) +{ + cl_status_t status; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + /* Determine if the vector has room for this element. */ + if (index >= p_vector->size) { + /* Resize to accomodate the given index. */ + status = cl_ptr_vector_set_size(p_vector, index + 1); + + /* Check for failure on or before the given index. */ + if ((status != CL_SUCCESS) && (p_vector->size < index)) + return (status); + } + + /* At this point, the array is guaranteed to be big enough */ + p_vector->p_ptr_array[index] = element; + + return (CL_SUCCESS); +} + +void *cl_ptr_vector_remove(IN cl_ptr_vector_t * const p_vector, + IN const size_t index) +{ + size_t src; + const void *element; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(p_vector->size > index); + + /* Store a copy of the element to return. */ + element = p_vector->p_ptr_array[index]; + /* Shift all items above the removed item down. */ + if (index < --p_vector->size) { + for (src = index; src < p_vector->size; src++) + p_vector->p_ptr_array[src] = + p_vector->p_ptr_array[src + 1]; + } + /* Clear the entry for the element just outside of the new upper bound. */ + p_vector->p_ptr_array[p_vector->size] = NULL; + + return ((void *)element); +} + +cl_status_t cl_ptr_vector_set_capacity(IN cl_ptr_vector_t * const p_vector, + IN const size_t new_capacity) +{ + void *p_new_ptr_array; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + /* Do we have to do anything here? */ + if (new_capacity <= p_vector->capacity) { + /* Nope */ + return (CL_SUCCESS); + } + + /* Allocate our pointer array. */ + p_new_ptr_array = malloc(new_capacity * sizeof(void *)); + if (!p_new_ptr_array) + return (CL_INSUFFICIENT_MEMORY); + else + memset(p_new_ptr_array, 0, new_capacity * sizeof(void *)); + + if (p_vector->p_ptr_array) { + /* Copy the old pointer array into the new. */ + memcpy(p_new_ptr_array, p_vector->p_ptr_array, + p_vector->capacity * sizeof(void *)); + + /* Free the old pointer array. */ + free((void *)p_vector->p_ptr_array); + } + + /* Set the new array. */ + p_vector->p_ptr_array = p_new_ptr_array; + + /* Update the vector with the new capactity. */ + p_vector->capacity = new_capacity; + + return (CL_SUCCESS); +} + +cl_status_t cl_ptr_vector_set_size(IN cl_ptr_vector_t * const p_vector, + IN const size_t size) +{ + cl_status_t status; + size_t new_capacity; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + /* Check to see if the requested size is the same as the existing size. */ + if (size == p_vector->size) + return (CL_SUCCESS); + + /* Determine if the vector has room for this element. */ + if (size >= p_vector->capacity) { + if (!p_vector->grow_size) + return (CL_INSUFFICIENT_MEMORY); + + /* Calculate the new capacity, taking into account the grow size. */ + new_capacity = size; + if (size % p_vector->grow_size) { + /* Round up to nearest grow_size boundary. */ + new_capacity += p_vector->grow_size - + (size % p_vector->grow_size); + } + + status = cl_ptr_vector_set_capacity(p_vector, new_capacity); + if (status != CL_SUCCESS) + return (status); + } + + p_vector->size = size; + return (CL_SUCCESS); +} + +cl_status_t cl_ptr_vector_set_min_size(IN cl_ptr_vector_t * const p_vector, + IN const size_t min_size) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + if (min_size > p_vector->size) { + /* We have to resize the array */ + return (cl_ptr_vector_set_size(p_vector, min_size)); + } + + /* We didn't have to do anything */ + return (CL_SUCCESS); +} + +void cl_ptr_vector_apply_func(IN const cl_ptr_vector_t * const p_vector, + IN cl_pfn_ptr_vec_apply_t pfn_callback, + IN const void *const context) +{ + size_t i; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(pfn_callback); + + for (i = 0; i < p_vector->size; i++) + pfn_callback(i, (void *)p_vector->p_ptr_array[i], + (void *)context); +} + +size_t cl_ptr_vector_find_from_start(IN const cl_ptr_vector_t * const p_vector, + IN cl_pfn_ptr_vec_find_t pfn_callback, + IN const void *const context) +{ + size_t i; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(pfn_callback); + + for (i = 0; i < p_vector->size; i++) { + /* Invoke the callback */ + if (pfn_callback(i, (void *)p_vector->p_ptr_array[i], + (void *)context) == CL_SUCCESS) { + break; + } + } + return (i); +} + +size_t cl_ptr_vector_find_from_end(IN const cl_ptr_vector_t * const p_vector, + IN cl_pfn_ptr_vec_find_t pfn_callback, + IN const void *const context) +{ + size_t i; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(pfn_callback); + + i = p_vector->size; + + while (i) { + /* Invoke the callback for the current element. */ + i--; + if (pfn_callback(i, (void *)p_vector->p_ptr_array[i], + (void *)context) == CL_SUCCESS) { + return (i); + } + } + + return (p_vector->size); +} diff --git a/complib/cl_spinlock.c b/complib/cl_spinlock.c new file mode 100644 index 0000000..2d81696 --- /dev/null +++ b/complib/cl_spinlock.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include + +void cl_spinlock_construct(IN cl_spinlock_t * const p_spinlock) +{ + CL_ASSERT(p_spinlock); + + p_spinlock->state = CL_UNINITIALIZED; +} + +cl_status_t cl_spinlock_init(IN cl_spinlock_t * const p_spinlock) +{ + CL_ASSERT(p_spinlock); + + cl_spinlock_construct(p_spinlock); + + /* Initialize with pthread_mutexattr_t = NULL */ + if (pthread_mutex_init(&p_spinlock->mutex, NULL)) + return (CL_ERROR); + + p_spinlock->state = CL_INITIALIZED; + return (CL_SUCCESS); +} + +void cl_spinlock_destroy(IN cl_spinlock_t * const p_spinlock) +{ + CL_ASSERT(p_spinlock); + CL_ASSERT(cl_is_state_valid(p_spinlock->state)); + + if (p_spinlock->state == CL_INITIALIZED) { + p_spinlock->state = CL_UNINITIALIZED; + pthread_mutex_lock(&p_spinlock->mutex); + pthread_mutex_unlock(&p_spinlock->mutex); + pthread_mutex_destroy(&p_spinlock->mutex); + } + p_spinlock->state = CL_UNINITIALIZED; +} + +void cl_spinlock_acquire(IN cl_spinlock_t * const p_spinlock) +{ + CL_ASSERT(p_spinlock); + CL_ASSERT(p_spinlock->state == CL_INITIALIZED); + + pthread_mutex_lock(&p_spinlock->mutex); +} + +void cl_spinlock_release(IN cl_spinlock_t * const p_spinlock) +{ + CL_ASSERT(p_spinlock); + CL_ASSERT(p_spinlock->state == CL_INITIALIZED); + + pthread_mutex_unlock(&p_spinlock->mutex); +} diff --git a/complib/cl_statustext.c b/complib/cl_statustext.c new file mode 100644 index 0000000..b02b8b8 --- /dev/null +++ b/complib/cl_statustext.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Defines string to decode cl_status_t return values. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include + +/* Status values above converted to text for easier printing. */ +const char *cl_status_text[] = { + "CL_SUCCESS", + "CL_ERROR", + "CL_INVALID_STATE", + "CL_INVALID_OPERATION", + "CL_INVALID_SETTING", + "CL_INVALID_PARAMETER", + "CL_INSUFFICIENT_RESOURCES", + "CL_INSUFFICIENT_MEMORY", + "CL_INVALID_PERMISSION", + "CL_COMPLETED", + "CL_NOT_DONE", + "CL_PENDING", + "CL_TIMEOUT", + "CL_CANCELED", + "CL_REJECT", + "CL_OVERRUN", + "CL_NOT_FOUND", + "CL_UNAVAILABLE", + "CL_BUSY", + "CL_DISCONNECT", + "CL_DUPLICATE" +}; diff --git a/complib/cl_thread.c b/complib/cl_thread.c new file mode 100644 index 0000000..e416466 --- /dev/null +++ b/complib/cl_thread.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +/* + * Internal function to run a new user mode thread. + * This function is always run as a result of creation a new user mode thread. + * Its main job is to synchronize the creation and running of the new thread. + */ +static void *__cl_thread_wrapper(void *arg) +{ + cl_thread_t *p_thread = (cl_thread_t *) arg; + + CL_ASSERT(p_thread); + CL_ASSERT(p_thread->pfn_callback); + + p_thread->pfn_callback((void *)p_thread->context); + + return (NULL); +} + +void cl_thread_construct(IN cl_thread_t * const p_thread) +{ + CL_ASSERT(p_thread); + + p_thread->osd.state = CL_UNINITIALIZED; +} + +cl_status_t cl_thread_init(IN cl_thread_t * const p_thread, + IN cl_pfn_thread_callback_t pfn_callback, + IN const void *const context, + IN const char *const name) +{ + int ret; + + CL_ASSERT(p_thread); + + cl_thread_construct(p_thread); + + /* Initialize the thread structure */ + p_thread->pfn_callback = pfn_callback; + p_thread->context = context; + + ret = pthread_create(&p_thread->osd.id, NULL, + __cl_thread_wrapper, (void *)p_thread); + + if (ret != 0) /* pthread_create returns a "0" for success */ + return (CL_ERROR); + + p_thread->osd.state = CL_INITIALIZED; + + return (CL_SUCCESS); +} + +void cl_thread_destroy(IN cl_thread_t * const p_thread) +{ + CL_ASSERT(p_thread); + CL_ASSERT(cl_is_state_valid(p_thread->osd.state)); + + if (p_thread->osd.state == CL_INITIALIZED) + pthread_join(p_thread->osd.id, NULL); + + p_thread->osd.state = CL_UNINITIALIZED; +} + +void cl_thread_suspend(IN const uint32_t pause_ms) +{ + /* Convert to micro seconds */ + usleep(pause_ms * 1000); +} + +void cl_thread_stall(IN const uint32_t pause_us) +{ + /* + * Not quite a busy wait, but Linux is lacking in terms of high + * resolution time stamp information in user mode. + */ + usleep(pause_us); +} + +int cl_proc_count(void) +{ + uint32_t ret; + + ret = get_nprocs(); + if (!ret) + return 1; /* Workaround for PPC where get_nprocs() returns 0 */ + + return ret; +} + +boolean_t cl_is_current_thread(IN const cl_thread_t * const p_thread) +{ + pthread_t current; + + CL_ASSERT(p_thread); + CL_ASSERT(p_thread->osd.state == CL_INITIALIZED); + + current = pthread_self(); + return (pthread_equal(current, p_thread->osd.id)); +} diff --git a/complib/cl_threadpool.c b/complib/cl_threadpool.c new file mode 100644 index 0000000..11abfbb --- /dev/null +++ b/complib/cl_threadpool.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of thread pool. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +static void cleanup_mutex(void *arg) +{ + pthread_mutex_unlock(&((cl_thread_pool_t *) arg)->mutex); +} + +static void *thread_pool_routine(void *context) +{ + cl_thread_pool_t *p_thread_pool = (cl_thread_pool_t *) context; + + do { + pthread_mutex_lock(&p_thread_pool->mutex); + pthread_cleanup_push(cleanup_mutex, p_thread_pool); + while (!p_thread_pool->events) + pthread_cond_wait(&p_thread_pool->cond, + &p_thread_pool->mutex); + p_thread_pool->events--; + pthread_cleanup_pop(1); + /* The event has been signalled. Invoke the callback. */ + (*p_thread_pool->pfn_callback) (p_thread_pool->context); + } while (1); + + return NULL; +} + +cl_status_t cl_thread_pool_init(IN cl_thread_pool_t * const p_thread_pool, + IN unsigned count, + IN void (*pfn_callback) (void *), + IN void *context, IN const char *const name) +{ + int i; + + CL_ASSERT(p_thread_pool); + CL_ASSERT(pfn_callback); + + memset(p_thread_pool, 0, sizeof(*p_thread_pool)); + + if (!count) + count = cl_proc_count(); + + pthread_mutex_init(&p_thread_pool->mutex, NULL); + pthread_cond_init(&p_thread_pool->cond, NULL); + + p_thread_pool->events = 0; + + p_thread_pool->pfn_callback = pfn_callback; + p_thread_pool->context = context; + + p_thread_pool->tid = calloc(count, sizeof(*p_thread_pool->tid)); + if (!p_thread_pool->tid) { + cl_thread_pool_destroy(p_thread_pool); + return CL_INSUFFICIENT_MEMORY; + } + + p_thread_pool->running_count = count; + + for (i = 0; i < count; i++) { + if (pthread_create(&p_thread_pool->tid[i], NULL, + thread_pool_routine, p_thread_pool) != 0) { + cl_thread_pool_destroy(p_thread_pool); + return CL_INSUFFICIENT_RESOURCES; + } + } + + return (CL_SUCCESS); +} + +void cl_thread_pool_destroy(IN cl_thread_pool_t * const p_thread_pool) +{ + int i; + + CL_ASSERT(p_thread_pool); + + for (i = 0; i < p_thread_pool->running_count; i++) + if (p_thread_pool->tid[i]) + pthread_cancel(p_thread_pool->tid[i]); + + for (i = 0; i < p_thread_pool->running_count; i++) + if (p_thread_pool->tid[i]) + pthread_join(p_thread_pool->tid[i], NULL); + + p_thread_pool->running_count = 0; + + free(p_thread_pool->tid); + + pthread_cond_destroy(&p_thread_pool->cond); + pthread_mutex_destroy(&p_thread_pool->mutex); + + p_thread_pool->events = 0; +} + +cl_status_t cl_thread_pool_signal(IN cl_thread_pool_t * const p_thread_pool) +{ + int ret; + CL_ASSERT(p_thread_pool); + pthread_mutex_lock(&p_thread_pool->mutex); + p_thread_pool->events++; + ret = pthread_cond_signal(&p_thread_pool->cond); + pthread_mutex_unlock(&p_thread_pool->mutex); + return ret; +} diff --git a/complib/cl_timer.c b/complib/cl_timer.c new file mode 100644 index 0000000..92b8102 --- /dev/null +++ b/complib/cl_timer.c @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Abstraction of Timer create, destroy functions. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include + +/* Timer provider (emulates timers in user mode). */ +typedef struct _cl_timer_prov { + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cond; + cl_qlist_t queue; + + boolean_t exit; + +} cl_timer_prov_t; + +/* Global timer provider. */ +static cl_timer_prov_t *gp_timer_prov = NULL; + +static void *__cl_timer_prov_cb(IN void *const context); + +/* + * Creates the process global timer provider. Must be called by the shared + * object framework to solve all serialization issues. + */ +cl_status_t __cl_timer_prov_create(void) +{ + CL_ASSERT(gp_timer_prov == NULL); + + gp_timer_prov = malloc(sizeof(cl_timer_prov_t)); + if (!gp_timer_prov) + return (CL_INSUFFICIENT_MEMORY); + else + memset(gp_timer_prov, 0, sizeof(cl_timer_prov_t)); + + cl_qlist_init(&gp_timer_prov->queue); + + pthread_mutex_init(&gp_timer_prov->mutex, NULL); + pthread_cond_init(&gp_timer_prov->cond, NULL); + + if (pthread_create(&gp_timer_prov->thread, NULL, + __cl_timer_prov_cb, NULL)) { + __cl_timer_prov_destroy(); + return (CL_ERROR); + } + + return (CL_SUCCESS); +} + +void __cl_timer_prov_destroy(void) +{ + pthread_t tid; + + if (!gp_timer_prov) + return; + + tid = gp_timer_prov->thread; + pthread_mutex_lock(&gp_timer_prov->mutex); + gp_timer_prov->exit = TRUE; + pthread_cond_broadcast(&gp_timer_prov->cond); + pthread_mutex_unlock(&gp_timer_prov->mutex); + pthread_join(tid, NULL); + + /* Destroy the mutex and condition variable. */ + pthread_mutex_destroy(&gp_timer_prov->mutex); + pthread_cond_destroy(&gp_timer_prov->cond); + + /* Free the memory and reset the global pointer. */ + free(gp_timer_prov); + gp_timer_prov = NULL; +} + +/* + * This is the internal work function executed by the timer's thread. + */ +static void *__cl_timer_prov_cb(IN void *const context) +{ + int ret; + cl_timer_t *p_timer; + + pthread_mutex_lock(&gp_timer_prov->mutex); + while (!gp_timer_prov->exit) { + if (cl_is_qlist_empty(&gp_timer_prov->queue)) { + /* Wait until we exit or a timer is queued. */ + /* cond wait does: + * pthread_cond_wait atomically unlocks the mutex (as per + * pthread_unlock_mutex) and waits for the condition variable + * cond to be signaled. The thread execution is suspended and + * does not consume any CPU time until the condition variable is + * signaled. The mutex must be locked by the calling thread on + * entrance to pthread_cond_wait. Before RETURNING TO THE + * CALLING THREAD, PTHREAD_COND_WAIT RE-ACQUIRES MUTEX (as per + * pthread_lock_mutex). + */ + ret = pthread_cond_wait(&gp_timer_prov->cond, + &gp_timer_prov->mutex); + } else { + /* + * The timer elements are on the queue in expiration order. + * Get the first in the list to determine how long to wait. + */ + + p_timer = + (cl_timer_t *) cl_qlist_head(&gp_timer_prov->queue); + ret = + pthread_cond_timedwait(&gp_timer_prov->cond, + &gp_timer_prov->mutex, + &p_timer->timeout); + + /* + Sleep again on every event other than timeout and invalid + Note: EINVAL means that we got behind. This can occur when + we are very busy... + */ + if (ret != ETIMEDOUT && ret != EINVAL) + continue; + + /* + * The timer expired. Check the state in case it was cancelled + * after it expired but before we got a chance to invoke the + * callback. + */ + if (p_timer->timer_state != CL_TIMER_QUEUED) + continue; + + /* + * Mark the timer as running to synchronize with its + * cancelation since we can't hold the mutex during the + * callback. + */ + p_timer->timer_state = CL_TIMER_RUNNING; + + /* Remove the item from the timer queue. */ + cl_qlist_remove_item(&gp_timer_prov->queue, + &p_timer->list_item); + pthread_mutex_unlock(&gp_timer_prov->mutex); + /* Invoke the callback. */ + p_timer->pfn_callback((void *)p_timer->context); + + /* Acquire the mutex again. */ + pthread_mutex_lock(&gp_timer_prov->mutex); + /* + * Only set the state to idle if the timer has not been accessed + * from the callback + */ + if (p_timer->timer_state == CL_TIMER_RUNNING) + p_timer->timer_state = CL_TIMER_IDLE; + + /* + * Signal any thread trying to manipulate the timer + * that expired. + */ + pthread_cond_signal(&p_timer->cond); + } + } + gp_timer_prov->thread = 0; + pthread_mutex_unlock(&gp_timer_prov->mutex); + pthread_exit(NULL); +} + +/* Timer implementation. */ +void cl_timer_construct(IN cl_timer_t * const p_timer) +{ + memset(p_timer, 0, sizeof(cl_timer_t)); + p_timer->state = CL_UNINITIALIZED; +} + +cl_status_t cl_timer_init(IN cl_timer_t * const p_timer, + IN cl_pfn_timer_callback_t pfn_callback, + IN const void *const context) +{ + CL_ASSERT(p_timer); + CL_ASSERT(pfn_callback); + + cl_timer_construct(p_timer); + + if (!gp_timer_prov) + return (CL_ERROR); + + /* Store timer parameters. */ + p_timer->pfn_callback = pfn_callback; + p_timer->context = context; + + /* Mark the timer as idle. */ + p_timer->timer_state = CL_TIMER_IDLE; + + /* Create the condition variable that is used when cancelling a timer. */ + pthread_cond_init(&p_timer->cond, NULL); + + p_timer->state = CL_INITIALIZED; + + return (CL_SUCCESS); +} + +void cl_timer_destroy(IN cl_timer_t * const p_timer) +{ + CL_ASSERT(p_timer); + CL_ASSERT(cl_is_state_valid(p_timer->state)); + + if (p_timer->state == CL_INITIALIZED) + cl_timer_stop(p_timer); + + p_timer->state = CL_UNINITIALIZED; + + /* is it possible we have some threads waiting on the cond now? */ + pthread_cond_broadcast(&p_timer->cond); + pthread_cond_destroy(&p_timer->cond); + +} + +/* + * Return TRUE if timeout value 1 is earlier than timeout value 2. + */ +static __inline boolean_t __cl_timer_is_earlier(IN struct timespec *p_timeout1, + IN struct timespec *p_timeout2) +{ + return ((p_timeout1->tv_sec < p_timeout2->tv_sec) || + ((p_timeout1->tv_sec == p_timeout2->tv_sec) && + (p_timeout1->tv_nsec < p_timeout2->tv_nsec))); +} + +/* + * Search for a timer with an earlier timeout than the one provided by + * the context. Both the list item and the context are pointers to + * a cl_timer_t structure with valid timeouts. + */ +static cl_status_t __cl_timer_find(IN const cl_list_item_t * const p_list_item, + IN void *const context) +{ + cl_timer_t *p_in_list; + cl_timer_t *p_new; + + CL_ASSERT(p_list_item); + CL_ASSERT(context); + + p_in_list = (cl_timer_t *) p_list_item; + p_new = (cl_timer_t *) context; + + CL_ASSERT(p_in_list->state == CL_INITIALIZED); + CL_ASSERT(p_new->state == CL_INITIALIZED); + + CL_ASSERT(p_in_list->timer_state == CL_TIMER_QUEUED); + + if (__cl_timer_is_earlier(&p_in_list->timeout, &p_new->timeout)) + return (CL_SUCCESS); + + return (CL_NOT_FOUND); +} + +/* + * Calculate 'struct timespec' value that is the + * current time plus the 'time_ms' milliseconds. + */ +static __inline void __cl_timer_calculate(IN const uint32_t time_ms, + OUT struct timespec * const p_timer) +{ + struct timeval curtime, deltatime, endtime; + + gettimeofday(&curtime, NULL); + + deltatime.tv_sec = time_ms / 1000; + deltatime.tv_usec = (time_ms % 1000) * 1000; + timeradd(&curtime, &deltatime, &endtime); + p_timer->tv_sec = endtime.tv_sec; + p_timer->tv_nsec = endtime.tv_usec * 1000; +} + +cl_status_t cl_timer_start(IN cl_timer_t * const p_timer, + IN const uint32_t time_ms) +{ + cl_list_item_t *p_list_item; + + CL_ASSERT(p_timer); + CL_ASSERT(p_timer->state == CL_INITIALIZED); + + pthread_mutex_lock(&gp_timer_prov->mutex); + /* Signal the timer provider thread to wake up. */ + pthread_cond_signal(&gp_timer_prov->cond); + + /* Remove the timer from the queue if currently queued. */ + if (p_timer->timer_state == CL_TIMER_QUEUED) + cl_qlist_remove_item(&gp_timer_prov->queue, + &p_timer->list_item); + + __cl_timer_calculate(time_ms, &p_timer->timeout); + + /* Add the timer to the queue. */ + if (cl_is_qlist_empty(&gp_timer_prov->queue)) { + /* The timer list is empty. Add to the head. */ + cl_qlist_insert_head(&gp_timer_prov->queue, + &p_timer->list_item); + } else { + /* Find the correct insertion place in the list for the timer. */ + p_list_item = cl_qlist_find_from_tail(&gp_timer_prov->queue, + __cl_timer_find, p_timer); + + /* Insert the timer. */ + cl_qlist_insert_next(&gp_timer_prov->queue, p_list_item, + &p_timer->list_item); + } + /* Set the state. */ + p_timer->timer_state = CL_TIMER_QUEUED; + pthread_mutex_unlock(&gp_timer_prov->mutex); + + return (CL_SUCCESS); +} + +void cl_timer_stop(IN cl_timer_t * const p_timer) +{ + CL_ASSERT(p_timer); + CL_ASSERT(p_timer->state == CL_INITIALIZED); + + pthread_mutex_lock(&gp_timer_prov->mutex); + switch (p_timer->timer_state) { + case CL_TIMER_RUNNING: + /* Wait for the callback to complete. */ + pthread_cond_wait(&p_timer->cond, &gp_timer_prov->mutex); + /* Timer could have been queued while we were waiting. */ + if (p_timer->timer_state != CL_TIMER_QUEUED) + break; + + case CL_TIMER_QUEUED: + /* Change the state of the timer. */ + p_timer->timer_state = CL_TIMER_IDLE; + /* Remove the timer from the queue. */ + cl_qlist_remove_item(&gp_timer_prov->queue, + &p_timer->list_item); + /* + * Signal the timer provider thread to move onto the + * next timer in the queue. + */ + pthread_cond_signal(&gp_timer_prov->cond); + break; + + case CL_TIMER_IDLE: + break; + } + pthread_mutex_unlock(&gp_timer_prov->mutex); +} + +cl_status_t cl_timer_trim(IN cl_timer_t * const p_timer, + IN const uint32_t time_ms) +{ + struct timespec newtime; + cl_status_t status; + + CL_ASSERT(p_timer); + CL_ASSERT(p_timer->state == CL_INITIALIZED); + + pthread_mutex_lock(&gp_timer_prov->mutex); + + __cl_timer_calculate(time_ms, &newtime); + + if (p_timer->timer_state == CL_TIMER_QUEUED) { + /* If the old time is earlier, do not trim it. Just return. */ + if (__cl_timer_is_earlier(&p_timer->timeout, &newtime)) { + pthread_mutex_unlock(&gp_timer_prov->mutex); + return (CL_SUCCESS); + } + } + + /* Reset the timer to the new timeout value. */ + + pthread_mutex_unlock(&gp_timer_prov->mutex); + status = cl_timer_start(p_timer, time_ms); + + return (status); +} + +uint64_t cl_get_time_stamp(void) +{ + uint64_t tstamp; + struct timeval tv; + + gettimeofday(&tv, NULL); + + /* Convert the time of day into a microsecond timestamp. */ + tstamp = ((uint64_t) tv.tv_sec * 1000000) + (uint64_t) tv.tv_usec; + + return (tstamp); +} + +uint32_t cl_get_time_stamp_sec(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + + return (tv.tv_sec); +} diff --git a/complib/cl_vector.c b/complib/cl_vector.c new file mode 100644 index 0000000..463cb90 --- /dev/null +++ b/complib/cl_vector.c @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * This file contains ivector and isvector implementations. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +/* + * Define the maximum size for array pages in an cl_vector_t. + * This size is in objects, not bytes. + */ +#define SVEC_MAX_PAGE_SIZE 0x1000 + +/* + * cl_vector_copy_general + * + * Description: + * copy operator used when size of the user object doesn't fit one of the + * other optimized copy functions. + * + * Inputs: + * p_src - source for copy + * + * Outputs: + * p_dest - destination for copy + * + * Returns: + * None + * + */ +static void cl_vector_copy_general(OUT void *const p_dest, + IN const void *const p_src, + IN const size_t size) +{ + memcpy(p_dest, p_src, size); +} + +/* + * cl_vector_copy8 + * + * Description: + * copy operator used when the user structure is only 8 bits long. + * + * Inputs: + * p_src - source for copy + * + * Outputs: + * p_dest - destination for copy + * + * Returns: + * None + * + */ +static void cl_vector_copy8(OUT void *const p_dest, + IN const void *const p_src, IN const size_t size) +{ + CL_ASSERT(size == sizeof(uint8_t)); + UNUSED_PARAM(size); + + *(uint8_t *) p_dest = *(uint8_t *) p_src; +} + +/* + * cl_vector_copy16 + * + * Description: + * copy operator used when the user structure is only 16 bits long. + * + * Inputs: + * p_src - source for copy + * + * Outputs: + * p_dest - destination for copy + * + * Returns: + * None + * + */ +static void cl_vector_copy16(OUT void *const p_dest, + IN const void *const p_src, IN const size_t size) +{ + CL_ASSERT(size == sizeof(uint16_t)); + UNUSED_PARAM(size); + + *(uint16_t *) p_dest = *(uint16_t *) p_src; +} + +/* + * cl_vector_copy32 + * + * Description: + * copy operator used when the user structure is only 32 bits long. + * + * Inputs: + * p_src - source for copy + * + * Outputs: + * p_dest - destination for copy + * + * Returns: + * None + * + */ +static void cl_vector_copy32(OUT void *const p_dest, + IN const void *const p_src, IN const size_t size) +{ + CL_ASSERT(size == sizeof(uint32_t)); + UNUSED_PARAM(size); + + *(uint32_t *) p_dest = *(uint32_t *) p_src; +} + +/* + * cl_vector_copy64 + * + * Description: + * copy operator used when the user structure is only 64 bits long. + * + * Inputs: + * p_src - source for copy + * + * Outputs: + * p_dest - destination for copy + * + * Returns: + * None + * + */ +static void cl_vector_copy64(OUT void *const p_dest, + IN const void *const p_src, IN const size_t size) +{ + CL_ASSERT(size == sizeof(uint64_t)); + UNUSED_PARAM(size); + + *(uint64_t *) p_dest = *(uint64_t *) p_src; +} + +void cl_vector_construct(IN cl_vector_t * const p_vector) +{ + CL_ASSERT(p_vector); + + memset(p_vector, 0, sizeof(cl_vector_t)); + + p_vector->state = CL_UNINITIALIZED; +} + +cl_status_t cl_vector_init(IN cl_vector_t * const p_vector, + IN const size_t min_size, IN const size_t grow_size, + IN const size_t element_size, + IN cl_pfn_vec_init_t pfn_init OPTIONAL, + IN cl_pfn_vec_dtor_t pfn_dtor OPTIONAL, + IN const void *const context) +{ + cl_status_t status = CL_SUCCESS; + + CL_ASSERT(p_vector); + CL_ASSERT(element_size); + + cl_vector_construct(p_vector); + + p_vector->grow_size = grow_size; + p_vector->element_size = element_size; + p_vector->pfn_init = pfn_init; + p_vector->pfn_dtor = pfn_dtor; + p_vector->context = context; + + /* + * Try to choose a smart copy operator + * someday, we could simply let the users pass one in + */ + switch (element_size) { + case sizeof(uint8_t): + p_vector->pfn_copy = cl_vector_copy8; + break; + + case sizeof(uint16_t): + p_vector->pfn_copy = cl_vector_copy16; + break; + + case sizeof(uint32_t): + p_vector->pfn_copy = cl_vector_copy32; + break; + + case sizeof(uint64_t): + p_vector->pfn_copy = cl_vector_copy64; + break; + + default: + p_vector->pfn_copy = cl_vector_copy_general; + break; + } + + /* + * Set the state to initialized so that the call to set_size + * doesn't assert. + */ + p_vector->state = CL_INITIALIZED; + + /* Initialize the allocation list */ + cl_qlist_init(&p_vector->alloc_list); + + /* get the storage needed by the user */ + if (min_size) { + status = cl_vector_set_size(p_vector, min_size); + if (status != CL_SUCCESS) + cl_vector_destroy(p_vector); + } + + return (status); +} + +void cl_vector_destroy(IN cl_vector_t * const p_vector) +{ + size_t i; + void *p_element; + + CL_ASSERT(p_vector); + CL_ASSERT(cl_is_state_valid(p_vector->state)); + + /* Call the user's destructor for each element in the array. */ + if (p_vector->state == CL_INITIALIZED) { + if (p_vector->pfn_dtor) { + for (i = 0; i < p_vector->size; i++) { + p_element = p_vector->p_ptr_array[i]; + /* Sanity check! */ + CL_ASSERT(p_element); + p_vector->pfn_dtor(p_element, + (void *)p_vector->context); + } + } + + /* Deallocate the pages */ + while (!cl_is_qlist_empty(&p_vector->alloc_list)) + free(cl_qlist_remove_head(&p_vector->alloc_list)); + + /* Destroy the page vector. */ + if (p_vector->p_ptr_array) { + free(p_vector->p_ptr_array); + p_vector->p_ptr_array = NULL; + } + } + + p_vector->state = CL_UNINITIALIZED; +} + +cl_status_t cl_vector_at(IN const cl_vector_t * const p_vector, + IN const size_t index, OUT void *const p_element) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + /* Range check */ + if (index >= p_vector->size) + return (CL_INVALID_PARAMETER); + + cl_vector_get(p_vector, index, p_element); + return (CL_SUCCESS); +} + +cl_status_t cl_vector_set(IN cl_vector_t * const p_vector, + IN const size_t index, IN void *const p_element) +{ + cl_status_t status; + void *p_dest; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(p_element); + + /* Determine if the vector has room for this element. */ + if (index >= p_vector->size) { + /* Resize to accomodate the given index. */ + status = cl_vector_set_size(p_vector, index + 1); + + /* Check for failure on or before the given index. */ + if ((status != CL_SUCCESS) && (p_vector->size < index)) + return (status); + } + + /* At this point, the array is guaranteed to be big enough */ + p_dest = cl_vector_get_ptr(p_vector, index); + /* Sanity check! */ + CL_ASSERT(p_dest); + + /* Copy the data into the array */ + p_vector->pfn_copy(p_dest, p_element, p_vector->element_size); + + return (CL_SUCCESS); +} + +cl_status_t cl_vector_set_capacity(IN cl_vector_t * const p_vector, + IN const size_t new_capacity) +{ + size_t new_elements; + size_t alloc_size; + size_t i; + cl_list_item_t *p_buf; + void *p_new_ptr_array; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + /* Do we have to do anything here? */ + if (new_capacity <= p_vector->capacity) { + /* Nope */ + return (CL_SUCCESS); + } + + /* Allocate our pointer array. */ + p_new_ptr_array = malloc(new_capacity * sizeof(void *)); + if (!p_new_ptr_array) + return (CL_INSUFFICIENT_MEMORY); + else + memset(p_new_ptr_array, 0, new_capacity * sizeof(void *)); + + if (p_vector->p_ptr_array) { + /* Copy the old pointer array into the new. */ + memcpy(p_new_ptr_array, p_vector->p_ptr_array, + p_vector->capacity * sizeof(void *)); + + /* Free the old pointer array. */ + free(p_vector->p_ptr_array); + } + + /* Set the new array. */ + p_vector->p_ptr_array = p_new_ptr_array; + + /* + * We have to add capacity to the array. Determine how many + * elements to add. + */ + new_elements = new_capacity - p_vector->capacity; + /* Determine the allocation size for the new array elements. */ + alloc_size = new_elements * p_vector->element_size; + + p_buf = (cl_list_item_t *) malloc(alloc_size + sizeof(cl_list_item_t)); + if (!p_buf) + return (CL_INSUFFICIENT_MEMORY); + else + memset(p_buf, 0, alloc_size + sizeof(cl_list_item_t)); + + cl_qlist_insert_tail(&p_vector->alloc_list, p_buf); + /* Advance the buffer pointer past the list item. */ + p_buf++; + + for (i = p_vector->capacity; i < new_capacity; i++) { + p_vector->p_ptr_array[i] = p_buf; + /* Move the buffer pointer to the next element. */ + p_buf = (void *)(((uint8_t *) p_buf) + p_vector->element_size); + } + + /* Update the vector with the new capactity. */ + p_vector->capacity = new_capacity; + + return (CL_SUCCESS); +} + +cl_status_t cl_vector_set_size(IN cl_vector_t * const p_vector, + IN const size_t size) +{ + cl_status_t status; + size_t new_capacity; + size_t index; + void *p_element; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + /* Check to see if the requested size is the same as the existing size. */ + if (size == p_vector->size) + return (CL_SUCCESS); + + /* Determine if the vector has room for this element. */ + if (size >= p_vector->capacity) { + if (!p_vector->grow_size) + return (CL_INSUFFICIENT_MEMORY); + + /* Calculate the new capacity, taking into account the grow size. */ + new_capacity = size; + if (size % p_vector->grow_size) { + /* Round up to nearest grow_size boundary. */ + new_capacity += p_vector->grow_size - + (size % p_vector->grow_size); + } + + status = cl_vector_set_capacity(p_vector, new_capacity); + if (status != CL_SUCCESS) + return (status); + } + + /* Are we growing the array and need to invoke an initializer callback? */ + if (size > p_vector->size && p_vector->pfn_init) { + for (index = p_vector->size; index < size; index++) { + /* Get a pointer to this element */ + p_element = cl_vector_get_ptr(p_vector, index); + + /* Call the user's initializer and trap failures. */ + status = + p_vector->pfn_init(p_element, + (void *)p_vector->context); + if (status != CL_SUCCESS) { + /* Call the destructor for this object */ + if (p_vector->pfn_dtor) + p_vector->pfn_dtor(p_element, + (void *)p_vector-> + context); + + /* Return the failure status to the caller. */ + return (status); + } + + /* The array just grew by one element */ + p_vector->size++; + } + } else if (p_vector->pfn_dtor) { + /* The array is shrinking and there is a destructor to invoke. */ + for (index = size; index < p_vector->size; index++) { + /* compute the address of the new elements */ + p_element = cl_vector_get_ptr(p_vector, index); + /* call the user's destructor */ + p_vector->pfn_dtor(p_element, + (void *)p_vector->context); + } + } + + p_vector->size = size; + return (CL_SUCCESS); +} + +cl_status_t cl_vector_set_min_size(IN cl_vector_t * const p_vector, + IN const size_t min_size) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + if (min_size > p_vector->size) { + /* We have to resize the array */ + return (cl_vector_set_size(p_vector, min_size)); + } + + /* We didn't have to do anything */ + return (CL_SUCCESS); +} + +void cl_vector_apply_func(IN const cl_vector_t * const p_vector, + IN cl_pfn_vec_apply_t pfn_callback, + IN const void *const context) +{ + size_t i; + void *p_element; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(pfn_callback); + + for (i = 0; i < p_vector->size; i++) { + p_element = cl_vector_get_ptr(p_vector, i); + pfn_callback(i, p_element, (void *)context); + } +} + +size_t cl_vector_find_from_start(IN const cl_vector_t * const p_vector, + IN cl_pfn_vec_find_t pfn_callback, + IN const void *const context) +{ + size_t i; + void *p_element; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(pfn_callback); + + for (i = 0; i < p_vector->size; i++) { + p_element = cl_vector_get_ptr(p_vector, i); + /* Invoke the callback */ + if (pfn_callback(i, p_element, (void *)context) == CL_SUCCESS) + break; + } + return (i); +} + +size_t cl_vector_find_from_end(IN const cl_vector_t * const p_vector, + IN cl_pfn_vec_find_t pfn_callback, + IN const void *const context) +{ + size_t i; + void *p_element; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(pfn_callback); + + i = p_vector->size; + + while (i) { + /* Get a pointer to the element in the array. */ + p_element = cl_vector_get_ptr(p_vector, --i); + CL_ASSERT(p_element); + + /* Invoke the callback for the current element. */ + if (pfn_callback(i, p_element, (void *)context) == CL_SUCCESS) + return (i); + } + + return (p_vector->size); +} diff --git a/complib/ib_statustext.c b/complib/ib_statustext.c new file mode 100644 index 0000000..52b2adc --- /dev/null +++ b/complib/ib_statustext.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Defines string to decode ib_api_status_t return values. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include + +/* ib_api_status_t values above converted to text for easier printing. */ +const char *ib_error_str[] = { + "IB_SUCCESS", + "IB_INSUFFICIENT_RESOURCES", + "IB_INSUFFICIENT_MEMORY", + "IB_INVALID_PARAMETER", + "IB_INVALID_SETTING", + "IB_NOT_FOUND", + "IB_TIMEOUT", + "IB_CANCELED", + "IB_INTERRUPTED", + "IB_INVALID_PERMISSION", + "IB_UNSUPPORTED", + "IB_OVERFLOW", + "IB_MAX_MCAST_QPS_REACHED", + "IB_INVALID_QP_STATE", + "IB_INVALID_EEC_STATE", + "IB_INVALID_APM_STATE", + "IB_INVALID_PORT_STATE", + "IB_INVALID_STATE", + "IB_RESOURCE_BUSY", + "IB_INVALID_PKEY", + "IB_INVALID_LKEY", + "IB_INVALID_RKEY", + "IB_INVALID_MAX_WRS", + "IB_INVALID_MAX_SGE", + "IB_INVALID_CQ_SIZE", + "IB_INVALID_SERVICE_TYPE", + "IB_INVALID_GID", + "IB_INVALID_LID", + "IB_INVALID_GUID", + "IB_INVALID_CA_HANDLE", + "IB_INVALID_AV_HANDLE", + "IB_INVALID_CQ_HANDLE", + "IB_INVALID_EEC_HANDLE", + "IB_INVALID_QP_HANDLE", + "IB_INVALID_PD_HANDLE", + "IB_INVALID_MR_HANDLE", + "IB_INVALID_MW_HANDLE", + "IB_INVALID_RDD_HANDLE", + "IB_INVALID_MCAST_HANDLE", + "IB_INVALID_CALLBACK", + "IB_INVALID_AL_HANDLE", + "IB_INVALID_HANDLE", + "IB_ERROR", + "IB_REMOTE_ERROR", /* Infiniband Access Layer */ + "IB_VERBS_PROCESSING_DONE", + "IB_INVALID_WR_TYPE", + "IB_QP_IN_TIMEWAIT", + "IB_EE_IN_TIMEWAIT", + "IB_INVALID_PORT", + "IB_NOT_DONE", + "IB_UNKNOWN_ERROR" +}; + +/* ib_async_event_t values above converted to text for easier printing. */ +const char *ib_async_event_str[] = { + "IB_AE_SQ_ERROR", + "IB_AE_SQ_DRAINED", + "IB_AE_RQ_ERROR", + "IB_AE_CQ_ERROR", + "IB_AE_QP_FATAL", + "IB_AE_QP_COMM", + "IB_AE_QP_APM", + "IB_AE_EEC_FATAL", + "IB_AE_EEC_COMM", + "IB_AE_EEC_APM", + "IB_AE_LOCAL_FATAL", + "IB_AE_PKEY_TRAP", + "IB_AE_QKEY_TRAP", + "IB_AE_MKEY_TRAP", + "IB_AE_PORT_TRAP", + "IB_AE_SYSIMG_GUID_TRAP", + "IB_AE_BUF_OVERRUN", + "IB_AE_LINK_INTEGRITY", + "IB_AE_FLOW_CTRL_ERROR", + "IB_AE_BKEY_TRAP", + "IB_AE_QP_APM_ERROR", + "IB_AE_EEC_APM_ERROR", + "IB_AE_WQ_REQ_ERROR", + "IB_AE_WQ_ACCESS_ERROR", + "IB_AE_PORT_ACTIVE", /* ACTIVE STATE */ + "IB_AE_PORT_DOWN", /* INIT", ARMED", DOWN */ + "IB_AE_UNKNOWN" +}; + +const char *ib_wc_status_str[] = { + "IB_WCS_SUCCESS", + "IB_WCS_LOCAL_LEN_ERR", + "IB_WCS_LOCAL_OP_ERR", + "IB_WCS_LOCAL_EEC_OP_ERR", + "IB_WCS_LOCAL_PROTECTION_ERR", + "IB_WCS_WR_FLUSHED_ERR", + "IB_WCS_MEM_WINDOW_BIND_ERR", + "IB_WCS_REM_ACCESS_ERR", + "IB_WCS_REM_OP_ERR", + "IB_WCS_RNR_RETRY_ERR", + "IB_WCS_TIMEOUT_RETRY_ERR", + "IB_WCS_REM_INVALID_REQ_ERR", + "IB_WCS_REM_INVALID_RD_REQ_ERR", + "IB_WCS_INVALID_EECN", + "IB_WCS_INVALID_EEC_STATE", + "IB_WCS_UNMATCHED_RESPONSE", /* InfiniBand Access Layer */ + "IB_WCS_CANCELED", /* InfiniBand Access Layer */ + "IB_WCS_UNKNOWN" +}; diff --git a/complib/libosmcomp.map b/complib/libosmcomp.map new file mode 100644 index 0000000..e874c55 --- /dev/null +++ b/complib/libosmcomp.map @@ -0,0 +1,162 @@ +OSMCOMP_2.3 { + global: + complib_init; + complib_exit; + cl_is_debug; + cl_disp_construct; + cl_disp_init; + cl_disp_destroy; + cl_disp_register; + cl_disp_unregister; + cl_disp_post; + cl_disp_shutdown; + cl_disp_get_queue_status; + cl_event_construct; + cl_event_init; + cl_event_destroy; + cl_event_signal; + cl_event_reset; + cl_event_wait_on; + cl_event_wheel_construct; + cl_event_wheel_init; + cl_event_wheel_init_ex; + cl_event_wheel_destroy; + cl_event_wheel_dump; + cl_event_wheel_reg; + cl_event_wheel_unreg; + cl_event_wheel_num_regs; + cl_qlist_insert_array_head; + cl_qlist_insert_array_tail; + cl_qlist_insert_list_head; + cl_qlist_insert_list_tail; + cl_is_item_in_qlist; + cl_qlist_find_next; + cl_qlist_find_prev; + cl_qlist_apply_func; + cl_qlist_move_items; + cl_list_construct; + cl_list_init; + cl_list_destroy; + cl_list_remove_object; + cl_is_object_in_list; + cl_list_insert_array_head; + cl_list_insert_array_tail; + cl_list_find_from_head; + cl_list_find_from_tail; + cl_list_apply_func; + cl_log_event; + cl_qmap_init; + cl_qmap_get; + cl_qmap_get_next; + cl_qmap_apply_func; + cl_qmap_insert; + cl_qmap_remove_item; + cl_qmap_remove; + cl_qmap_merge; + cl_qmap_delta; + cl_map_construct; + cl_map_init; + cl_map_destroy; + cl_map_insert; + cl_map_get; + cl_map_get_next; + cl_map_remove_item; + cl_map_remove; + cl_map_remove_all; + cl_map_merge; + cl_map_delta; + cl_fmap_init; + cl_fmap_match; + cl_fmap_get; + cl_fmap_get_next; + cl_fmap_apply_func; + cl_fmap_insert; + cl_fmap_remove_item; + cl_fmap_remove; + cl_fmap_merge; + cl_fmap_delta; + cl_qcpool_construct; + cl_qcpool_init; + cl_qcpool_destroy; + cl_qcpool_grow; + cl_qcpool_get; + cl_qcpool_get_tail; + cl_qpool_construct; + cl_qpool_init; + cl_cpool_construct; + cl_cpool_init; + cl_pool_construct; + cl_pool_init; + cl_ptr_vector_construct; + cl_ptr_vector_init; + cl_ptr_vector_destroy; + cl_ptr_vector_at; + cl_ptr_vector_set; + cl_ptr_vector_remove; + cl_ptr_vector_set_capacity; + cl_ptr_vector_set_size; + cl_ptr_vector_set_min_size; + cl_ptr_vector_apply_func; + cl_ptr_vector_find_from_start; + cl_ptr_vector_find_from_end; + cl_spinlock_construct; + cl_spinlock_init; + cl_spinlock_destroy; + cl_spinlock_acquire; + cl_spinlock_release; + cl_status_text; + cl_thread_construct; + cl_thread_init; + cl_thread_destroy; + cl_thread_suspend; + cl_thread_stall; + cl_proc_count; + cl_is_current_thread; + cl_thread_pool_construct; + cl_thread_pool_init; + cl_thread_pool_destroy; + cl_thread_pool_signal; + __cl_timer_prov_create; + __cl_timer_prov_destroy; + cl_timer_construct; + cl_timer_init; + cl_timer_destroy; + cl_timer_start; + cl_timer_stop; + cl_timer_trim; + cl_get_time_stamp; + cl_get_time_stamp_sec; + cl_vector_construct; + cl_vector_init; + cl_vector_destroy; + cl_vector_at; + cl_vector_set; + cl_vector_set_capacity; + cl_vector_set_size; + cl_vector_set_min_size; + cl_vector_apply_func; + cl_vector_find_from_start; + cl_vector_find_from_end; + cl_heap_construct; + cl_heap_init; + cl_heap_destroy; + cl_heap_modify_key; + cl_heap_insert; + cl_heap_delete; + cl_heap_extract_root; + cl_heap_resize; + cl_verify_heap_property; + cl_is_stored_in_heap; + cl_atomic_spinlock; + cl_atomic_dec; + ib_error_str; + ib_async_event_str; + ib_wc_status_str; + open_node_name_map; + close_node_name_map; + parse_node_map; + remap_node_name; + clean_nodedesc; + complib_init_v2; + local: *; +}; diff --git a/complib/libosmcomp.ver b/complib/libosmcomp.ver new file mode 100644 index 0000000..6ecd5d8 --- /dev/null +++ b/complib/libosmcomp.ver @@ -0,0 +1,9 @@ +# In this file we track the current API version +# of the complib library interface +# The version is built of the following +# tree numbers: +# API_REV:RUNNING_REV:AGE +# API_REV - advance on any added API +# RUNNING_REV - advance any change to the vendor files +# AGE - number of backward versions the API still supports +LIBVERSION=5:0:0 diff --git a/config/metis.m4 b/config/metis.m4 new file mode 100644 index 0000000..e510f2b --- /dev/null +++ b/config/metis.m4 @@ -0,0 +1,88 @@ + +dnl metis.m4: an autoconf for OpenSM Vendor Selection option +dnl +dnl To use this macro, just do METIS_SEL. +dnl the new configure option --enable-metis will be defined. +dnl The following variables are defined: +dnl METIS_LDADD - LDADD additional libs for linking the vendor lib +AC_DEFUN([METIS_SEL], [ +# --- BEGIN METIS_SEL --- + +dnl Check if they want the metis support +AC_MSG_CHECKING([to enable metis support for nue routing]) +AC_ARG_ENABLE(metis, +[ --enable-metis Enable the metis support for nue routing (default no)], + [case $enableval in + yes) metis_support=yes ;; + no) metis_support=no ;; + esac], + metis_support=no) +AC_MSG_RESULT([$metis_support]) + +if test "x$metis_support" = "xyes"; then + METIS_LDADD="-lmetis" +fi + +dnl Define a way for the user to provide the path to the metis includes +AC_ARG_WITH(metis-includes, + AC_HELP_STRING([--with-metis-includes=], + [define the dir where metis includes are installed]), +AC_MSG_NOTICE(Using metis includes from:$with_metis_includes), +with_metis_includes="") + +if test "x$with_metis_includes" != "x"; then + METIS_INCLUDES="-I$with_metis_includes" +fi + +dnl Define a way for the user to provide the path to the metis libs +AC_ARG_WITH(metis-libs, + AC_HELP_STRING([--with-metis-libs=], + [define the dir where metis libs are installed]), +AC_MSG_NOTICE(Using metis libs from:$with_metis_libs), +with_metis_libs="") + +if test "x$with_metis_libs" != "x"; then + METIS_LDADD="-L$with_metis_libs $METIS_LDADD" +fi + +AC_SUBST(METIS_LDADD) +AC_SUBST(METIS_INCLUDES) + +# --- END METIS_SEL --- +]) dnl METIS_SEL + +dnl Check for the metis lib dependency +AC_DEFUN([METIS_CHECK_LIB], [ +# --- BEGIN METIS_CHECK_LIB --- +if test "$metis_support" != "no"; then + if test "$disable_libcheck" != "yes"; then + sav_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $METIS_LDADD" + AC_CHECK_LIB(metis, METIS_PartGraphKway, + AC_DEFINE(ENABLE_METIS_FOR_NUE, + 1, [Define as 1 if you want to enable metis support for nue routing]), + AC_MSG_ERROR([METIS_PartGraphKway() not found.])) + LDFLAGS=$sav_LDFLAGS + else + AC_DEFINE(ENABLE_METIS_FOR_NUE, + 1, [Define as 1 if you want to enable metis support for nue routing]) + fi +fi +# --- END METIS_CHECK_LIB --- +]) dnl METIS_CHECK_LIB + +dnl Check for the vendor lib dependency +AC_DEFUN([METIS_CHECK_HEADER], [ +# --- BEGIN METIS_CHECK_HEADER --- + +dnl we might be required to ignore this check +if test "$metis_support" != "no"; then + if test "$disable_libcheck" != "yes"; then + sav_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $METIS_INCLUDES" + AC_CHECK_HEADERS(metis.h) + CPPFLAGS=$sav_CPPFLAGS + fi +fi +# --- END METIS_CHECK_HEADER --- +]) dnl METIS_CHECK_HEADER diff --git a/config/osmvsel.m4 b/config/osmvsel.m4 new file mode 100644 index 0000000..87e1d74 --- /dev/null +++ b/config/osmvsel.m4 @@ -0,0 +1,296 @@ + +dnl osmvsel.m4: an autoconf for OpenSM Vendor Selection option +dnl +dnl To use this macro, just do OPENIB_APP_OSMV_SEL. +dnl the new configure option --with-osmv will be defined. +dnl current supported values are: openib(default),sim,gen1 +dnl The following variables are defined: +dnl OSMV_LDADD - LDADD additional libs for linking the vendor lib +AC_DEFUN([OPENIB_APP_OSMV_SEL], [ +# --- BEGIN OPENIB_APP_OSMV_SEL --- + +dnl Define a way for the user to provide the osm vendor type +AC_ARG_WITH(osmv, + AC_HELP_STRING([--with-osmv=], + [define the osm vendor type to build]), +AC_MSG_NOTICE(Using OSM Vendor Type:$with_osmv), +with_osmv="openib") + +dnl Define a way for the user to provide the path to the ibumad installation +AC_ARG_WITH(umad-prefix, + AC_HELP_STRING([--with-umad-prefix=], + [define the dir used as prefix for ibumad installation]), +AC_MSG_NOTICE(Using ibumad installation prefix:$with_umad_prefix), +with_umad_prefix="") + +dnl Define a way for the user to provide the path to the ibumad includes +AC_ARG_WITH(umad-includes, + AC_HELP_STRING([--with-umad-includes=], + [define the dir where ibumad includes are installed]), +AC_MSG_NOTICE(Using ibumad includes from:$with_umad_includes), +with_umad_includes="") + +if test x$with_umad_includes = x; then + if test x$with_umad_prefix != x; then + with_umad_includes=$with_umad_prefix/include + fi +fi + +dnl Define a way for the user to provide the path to the ibumad libs +AC_ARG_WITH(umad-libs, + AC_HELP_STRING([--with-umad-libs=], + [define the dir where ibumad libs are installed]), +AC_MSG_NOTICE(Using ibumad libs from:$with_umad_libs), +with_umad_libs="") + +if test x$with_umad_libs = x; then + if test x$with_umad_prefix != x; then +dnl Should we use lib64 or lib + if test "$(uname -m)" = "x86_64" -o "$(uname -m)" = "ppc64"; then + with_umad_libs=$with_umad_prefix/lib64 + else + with_umad_libs=$with_umad_prefix/lib + fi + fi +fi + +dnl Define a way for the user to provide the path to the simulator installation +AC_ARG_WITH(sim, + AC_HELP_STRING([--with-sim=], + [define the simulator prefix for building sim vendor (default /usr)]), +AC_MSG_NOTICE(Using Simulator from:$with_sim), +with_sim="/usr") + +dnl based on the with_osmv we can try the vendor flag +if test $with_osmv = "openib"; then + AC_DEFINE(OSM_VENDOR_INTF_OPENIB, 1, [Define as 1 for OpenIB vendor]) + OSMV_INCLUDES="-I\$(srcdir)/../include -I\$(srcdir)/../../libibumad/include -I\$(includedir)" + OSMV_LDADD="-L\$(abs_srcdir)/../../libibumad/.libs -L\$(libdir) -libumad" + + if test "x$with_umad_libs" != "x"; then + OSMV_LDADD="-L$with_umad_libs $OSMV_LDADD" + fi + + if test "x$with_umad_includes" != "x"; then + OSMV_INCLUDES="-I$with_umad_includes $OSMV_INCLUDES" + fi + AC_DEFINE(DUAL_SIDED_RMPP, 1, [Define as 1 if you want Dual Sided RMPP Support]) +elif test $with_osmv = "sim" ; then + AC_DEFINE(OSM_VENDOR_INTF_SIM, 1, [Define as 1 for sim vendor]) + OSMV_INCLUDES="-I$with_sim/include -I\$(srcdir)/../include" + OSMV_LDADD="-L$with_sim/lib -libmscli" +elif test $with_osmv = "gen1"; then + AC_DEFINE(OSM_VENDOR_INTF_TS, 1, [Define as 1 for ts vendor]) + + if test -z $MTHOME; then + MTHOME=/usr/local/ibgd/driver/infinihost + fi + + OSMV_INCLUDES="-I$MTHOME/include -I\$(srcdir)/../include" + + dnl we need to find the TS includes somewhere... + osmv_found=0 + if test -z $TSHOME; then + osmv_dir=`uname -r|sed 's/-smp//'` + osmv_dir_smp=`uname -r` + for d in /usr/src/linux-$osmv_dir /usr/src/linux-$osmv_dir_smp /lib/modules/$osmv_dir/build /lib/modules/$osmv_dir_smp/build/; do + if test -f $d/drivers/infiniband/include/ts_ib_useraccess.h; then + OSMV_INCLUDES="$OSMV_INCLUDES -I$d/drivers/infiniband/include" + osmv_found=1 + fi + done + else + if test -f $TSHOME/ts_ib_useraccess.h; then + OSMV_INCLUDES="$OSMV_INCLUDES -I$TSHOME" + osmv_found=1 + fi + fi + if test $osmv_found = 0; then + AC_MSG_ERROR([Fail to find gen1 include files dir]) + fi + OSMV_LDADD="-L/usr/local/ibgd/driver/infinihost/lib -lvapi -lmosal -lmtl_common -lmpga" +elif test $with_osmv = "vapi"; then + AC_DEFINE(OSM_VENDOR_INTF_MTL, 1, [Define as 1 for vapi vendor]) + OSMV_INCLUDES="-I/usr/mellanox/include -I/usr/include -I\$(srcdir)/../include" + OSMV_LDADD="-L/usr/lib -L/usr/mellanox/lib -lib_mgt -lvapi -lmosal -lmtl_common -lmpga" +else + AC_MSG_ERROR([Invalid Vendor Type provided:$with_osmv should be either openib,sim,gen1]) +fi + +AM_CONDITIONAL(OSMV_VAPI, test $with_osmv = "vapi") +AM_CONDITIONAL(OSMV_GEN1, test $with_osmv = "gen1") +AM_CONDITIONAL(OSMV_SIM, test $with_osmv = "sim") +AM_CONDITIONAL(OSMV_OPENIB, test $with_osmv = "openib") +AC_DEFINE(VENDOR_RMPP_SUPPORT, 1, [Define as 1 if you want Vendor RMPP Support]) + +AC_SUBST(OSMV_LDADD) +AC_SUBST(OSMV_INCLUDES) + +# --- END OPENIB_APP_OSMV_SEL --- +]) dnl OPENIB_APP_OSMV_SEL + +dnl Check for the vendor lib dependency +AC_DEFUN([OPENIB_APP_OSMV_CHECK_LIB], [ +# --- BEGIN OPENIB_APP_OSMV_CHECK_LIB --- +if test "$disable_libcheck" != "yes"; then + + dnl based on the with_osmv we can try the vendor flag + if test $with_osmv = "openib"; then + LDADD="$LDADD $OSMV_LDADD" + AC_CHECK_LIB(ibumad, umad_init, [], + AC_MSG_ERROR([umad_init() not found. libosmvendor of type openib requires libibumad.])) + elif test $with_osmv = "sim" ; then + LDFLAGS="$LDFLAGS -L$with_sim/lib" + AC_CHECK_FILE([$with_sim/lib/libibmscli.a], [], + AC_MSG_ERROR([ibms_bind() not found. libosmvendor of type sim requires libibmscli.])) + elif test $with_osmv = "gen1"; then + LDFLAGS="$LDFLAGS -L$MTHOME/lib -L$MTHOME/lib64 -lmosal -lmtl_common -lmpga" + AC_CHECK_LIB(vapi, vipul_init, [], + AC_MSG_ERROR([vipul_init() not found. libosmvendor of type gen1 requires libvapi.])) + elif test $with_osmv != "vapi"; then + AC_MSG_ERROR([OSM Vendor Type not defined: please make sure OPENIB_APP_OSMV SEL is run before CHECK_LIB]) + fi +fi +# --- END OPENIB_APP_OSMV_CHECK_LIB --- +]) dnl OPENIB_APP_OSMV_CHECK_LIB + +dnl Check for the vendor lib dependency +AC_DEFUN([OPENIB_APP_OSMV_CHECK_HEADER], [ +# --- BEGIN OPENIB_APP_OSMV_CHECK_HEADER --- + +dnl we might be required to ignore this check +if test "$disable_libcheck" != "yes"; then + if test $with_osmv = "openib"; then + osmv_headers=infiniband/umad.h + elif test $with_osmv = "sim" ; then + osmv_headers=ibmgtsim/ibms_client_api.h + elif test $with_osmv = "gen1"; then + osmv_headers= + elif test $with_osmv = "vapi"; then + osmv_headers=vapi.h + else + AC_MSG_ERROR([OSM Vendor Type not defined: please make sure OPENIB_APP_OSMV SEL is run before CHECK_HEADER]) + fi + if test "x$osmv_headers" != "x"; then + AC_CHECK_HEADERS($osmv_headers) + fi +fi +# --- END OPENIB_APP_OSMV_CHECK_HEADER --- +]) dnl OPENIB_APP_OSMV_CHECK_HEADER + +dnl Check for socket console support +AC_DEFUN([OPENIB_OSM_CONSOLE_SOCKET_SEL], [ +# --- BEGIN OPENIB_OSM_CONSOLE_SOCKET_SEL --- + +dnl Console over a loopback socket is default if libwrap is available +AC_MSG_CHECKING([to enable console loopback]) +AC_ARG_ENABLE(console-loopback, +[ --enable-console-loopback Enable a console socket on the loopback interface, requires tcp_wrappers (default yes)], +[case $enableval in + yes) console_loopback=yes ;; + no) console_loopback=no ;; + esac], + console_loopback=yes) +AC_MSG_RESULT([$console_loopback]) + +if test $console_loopback = yes; then +AC_CHECK_LIB(wrap, request_init, [], [console_loopback=no + AC_MSG_WARN(libwrap is missing. setting console_loopback=no)]) +fi +if test $console_loopback = yes; then + AC_DEFINE(ENABLE_OSM_CONSOLE_LOOPBACK, + 1, + [Define as 1 if you want to enable a loopback console]) +fi + +dnl Console over a socket connection +AC_MSG_CHECKING([to enable console socket]) +AC_ARG_ENABLE(console-socket, +[ --enable-console-socket Enable a console socket, requires --enable-console-loopback (default no)], +[case $enableval in + yes) console_socket=yes ;; + no) console_socket=no ;; + esac], + console_socket=no) +AC_MSG_RESULT([$console_socket]) + +if test $console_socket = yes; then + if test $console_loopback = no; then + AC_MSG_ERROR([--enable-console-socket requires --enable-console-loopback]) + fi + AC_DEFINE(ENABLE_OSM_CONSOLE_SOCKET, + 1, + [Define as 1 if you want to enable a console on a socket connection]) +fi + +# --- END OPENIB_OSM_CONSOLE_SOCKET_SEL --- +]) dnl OPENIB_OSM_CONSOLE_SOCKET_SEL + + + +dnl Check if they want the PerfMgr +AC_DEFUN([OPENIB_OSM_PERF_MGR_SEL], [ +# --- BEGIN OPENIB_OSM_PERF_MGR_SEL --- + +dnl enable the perf-mgr +AC_MSG_CHECKING([to enable perf mgr]) +AC_ARG_ENABLE(perf-mgr, +[ --enable-perf-mgr Enable the performance manager (default yes)], + [case $enableval in + yes) perf_mgr=yes ;; + no) perf_mgr=no ;; + esac], + perf_mgr=yes) +AC_MSG_RESULT([$perf_mgr]) + +AC_MSG_CHECKING([to enable perf mgr profiling]) +AC_ARG_ENABLE(perf-mgr-profile, +[ --enable-perf-mgr-profile Enable the performance manager profiling (default no)], + [case $enableval in + yes) perf_mgr_profile=yes ;; + no) perf_mgr_profile=no ;; + esac], + perf_mgr_profile=no) +AC_MSG_RESULT([$perf_mgr_profile]) + +if test $perf_mgr = yes; then + AC_DEFINE(ENABLE_OSM_PERF_MGR, + 1, + [Define as 1 if you want to enable the performance manager]) + if test $perf_mgr_profile = yes; then + AC_DEFINE(ENABLE_OSM_PERF_MGR_PROFILE, + 1, + [Define as 1 if you want to enable the performance manager profiling code]) + fi +fi +# --- END OPENIB_OSM_PERF_MGR_SEL --- +]) dnl OPENIB_OSM_PERF_MGR_SEL + + +dnl Check if they want the event plugin +AC_DEFUN([OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL], [ +# --- BEGIN OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL --- + +dnl enable the default-event-plugin +AC_MSG_CHECKING([to enable default event plugin]) +AC_ARG_ENABLE(default-event-plugin, +[ --enable-default-event-plugin Enable a default event plugin "osmeventplugin" (default no)], + [case $enableval in + yes) default_event_plugin=yes ;; + no) default_event_plugin=no ;; + esac], + default_event_plugin=no) +AC_MSG_RESULT([$default_event_plugin]) + +if test $default_event_plugin = yes; then + AC_DEFINE(ENABLE_OSM_DEFAULT_EVENT_PLUGIN, + 1, + [Define as 1 if you want to enable the event plugin]) + DEFAULT_EVENT_PLUGIN=osmeventplugin +else + DEFAULT_EVENT_PLUGIN= +fi +AC_SUBST([DEFAULT_EVENT_PLUGIN]) + +# --- END OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL --- +]) dnl OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..4633fbb --- /dev/null +++ b/configure.ac @@ -0,0 +1,293 @@ +dnl Process this file with autoconf to produce a configure script. + +AC_PREREQ(2.57) +AC_INIT(opensm, 3.3.23, linux-rdma@vger.kernel.org) +AC_CONFIG_SRCDIR([opensm/osm_opensm.c]) +AC_CONFIG_AUX_DIR(config) +AC_CONFIG_MACRO_DIR(config) +AC_CONFIG_HEADERS(include/config.h include/opensm/osm_config.h) +AM_INIT_AUTOMAKE([subdir-objects]) + +AC_SUBST(RELEASE, ${RELEASE:-${VERSION}}) +AC_SUBST(TARBALL, ${TARBALL:-${PACKAGE}-${VERSION}.tar.gz}) + +default_rdma_service=openibd +AC_ARG_WITH([rdma_service], + AC_HELP_STRING([--with-rdma-service=name], + [name of the RDMA service: "rdma" when using /etc/init.d/rdma to start RDMA services; "openibd" when using /etc/init.d/openibd to start RDMA services [default=${default_rdma_service}]])) +AC_SUBST(RDMA_SERVICE, ${with_rdma_service:-${default_rdma_service}}) + +if { rpm -q sles-release || rpm -q openSUSE-release; } >/dev/null 2>&1; then + default_stop="0 1 4 6" +else + default_stop="0 1 6" +fi + +default_start="null" + +AC_SUBST(DEFAULT_START, $default_start) +AC_SUBST(DEFAULT_STOP, $default_stop) + +dnl NOTE: AC_DEFINE's and AC_DEFINE_UNQUOTED's which are used in header files +dnl MUST have a corresponding entry in include/opensm/osm_config.h.in to +dnl ensure plugin compatibility. +AC_DEFINE(_OSM_CONFIG_H_, 1, mark config.h inclusion) + +dnl Defines the Language +AC_LANG_C + +dnl Required for cases make defines a MAKE=make ??? Why +AC_PROG_MAKE_SET +AC_PROG_CC +AC_PROG_LIBTOOL +AC_PROG_INSTALL +AC_PROG_LN_S +AC_PROG_MAKE_SET +AC_PROG_YACC +AC_PROG_LEX + +AC_CHECK_PROGS(_YACC_,$YACC,none) +if test "$_YACC_" = "none" +then + AC_MSG_ERROR([No bison/byacc/yacc found.]) +fi + +AC_CHECK_PROGS(_LEX_,$LEX,none) +if test "$_LEX_" = "none" +then + AC_MSG_ERROR([No flex/lex found.]) +fi + +dnl Checks for libraries +AC_CHECK_LIB(pthread, pthread_mutex_init, [], + AC_MSG_ERROR([pthread_mutex_init() not found. libosmcomp requires libpthread.])) +AC_CHECK_LIB(dl, dlopen, [], + AC_MSG_ERROR([dlopen() not found. OpenSM requires libdl.])) + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST +AC_C_INLINE +AC_TYPE_PID_T +AC_TYPE_SIZE_T +AC_HEADER_TIME +AC_STRUCT_TM +AC_C_VOLATILE + +dnl See if we have __builtin_expect +AC_MSG_CHECKING([if the compiler supports __builtin_expect]) +AC_TRY_COMPILE(, [ return __builtin_expect(1, 1) ? 1 : 0], + [ have_builtin_expect=yes + AC_MSG_RESULT([yes]) ], + [ have_builtin_expect=no + AC_MSG_RESULT([no]) ]) +if test "x_$have_builtin_expect" = "x_yes" ; then + AC_DEFINE([HAVE_BUILTIN_EXPECT], [1], [Define to 1 if the compiler supports __builtin_expect.]) +fi + +dnl We use --version-script with ld if possible +AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, +if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then + ac_cv_version_script=yes +else + ac_cv_version_script=no +fi) +AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") + +dnl Define an input config option to control debug compile +AC_ARG_ENABLE(debug, [ --enable-debug Turn on debugging], +[case "${enableval}" in + yes) debug=true ;; + no) debug=false ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;; +esac],debug=false) +if test x$debug = xtrue ; then + AC_DEFINE(OSM_DEBUG, 1, [ define 1 if OpenSM build is in a debug mode ]) + AC_DEFINE(_DEBUG_, 1, [ define 1 if OpenSM build is in a debug mode ]) +fi +AM_CONDITIONAL(DEBUG, test x$debug = xtrue) + +AC_ARG_ENABLE(libcheck, [ --disable-libcheck do not test for presence of ib libraries], +[if test x$enableval = xno ; then + disable_libcheck=yes +fi]) + +dnl check if they want the socket console +OPENIB_OSM_CONSOLE_SOCKET_SEL + +dnl select performance manager or not +OPENIB_OSM_PERF_MGR_SEL + +dnl resolve config dir. +conf_dir_tmp1="`eval echo ${sysconfdir} | sed 's/^NONE/$ac_default_prefix/'`" +SYS_CONFIG_DIR="`eval echo $conf_dir_tmp1`" + +dnl Check for a different subdir for the config files. +OPENSM_CONFIG_SUB_DIR=opensm +AC_MSG_CHECKING(for --with-opensm-conf-sub-dir) +AC_ARG_WITH(opensm-conf-sub-dir, + AC_HELP_STRING([--with-opensm-conf-sub-dir=dir], + [define a directory name for opensm's conf files / (default "opensm")]), + [ case "$withval" in + no) + ;; + *) + OPENSM_CONFIG_SUB_DIR=$withval + ;; + esac ] +) +dnl this needs to be configured for rpmbuilds separate from the full path +dnl "OPENSM_CONFIG_DIR" +AC_SUBST(OPENSM_CONFIG_SUB_DIR) + +OPENSM_CONFIG_DIR=$SYS_CONFIG_DIR/$OPENSM_CONFIG_SUB_DIR +AC_MSG_RESULT($OPENSM_CONFIG_DIR) +AC_DEFINE_UNQUOTED(OPENSM_CONFIG_DIR, + ["$OPENSM_CONFIG_DIR"], + [Define OpenSM config directory]) +AC_SUBST(OPENSM_CONFIG_DIR) + +dnl Check for a different default OpenSm config file +OPENSM_CONFIG_FILE=opensm.conf +AC_MSG_CHECKING(for --with-opensm-conf-file ) +AC_ARG_WITH(opensm-conf-file, + AC_HELP_STRING([--with-opensm-conf-file=file], + [define a default OpenSM config file (default opensm.conf)]), + [ case "$withval" in + no) + ;; + *) + OPENSM_CONFIG_FILE=$withval + ;; + esac ] +) +AC_MSG_RESULT(${OPENSM_CONFIG_FILE}) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_OPENSM_CONFIG_FILE, + ["$OPENSM_CONFIG_DIR/$OPENSM_CONFIG_FILE"], + [Define a default OpenSM config file]) +AC_SUBST(OPENSM_CONFIG_FILE) + +dnl Check for a different default node name map file +NODENAMEMAPFILE=ib-node-name-map +AC_MSG_CHECKING(for --with-node-name-map ) +AC_ARG_WITH(node-name-map, + AC_HELP_STRING([--with-node-name-map=file], + [define a default node name map file (default ib-node-name-map)]), + [ case "$withval" in + no) + ;; + *) + NODENAMEMAPFILE=$withval + ;; + esac ] +) +AC_MSG_RESULT($NODENAMEMAPFILE) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, + ["$OPENSM_CONFIG_DIR/$NODENAMEMAPFILE"], + [Define a default node name map file]) +AC_SUBST(NODENAMEMAPFILE) + +dnl Check for a different partition conf file +PARTITION_CONFIG_FILE=partitions.conf +AC_MSG_CHECKING(for --with-partitions-conf) +AC_ARG_WITH(partitions-conf, + AC_HELP_STRING([--with-partitions-conf=file], + [define a partitions config file (default partitions.conf)]), + [ case "$withval" in + no) + ;; + *) + PARTITION_CONFIG_FILE=$withval + ;; + esac ] +) +AC_MSG_RESULT($PARTITION_CONFIG_FILE) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_PARTITION_CONFIG_FILE, + ["$OPENSM_CONFIG_DIR/$PARTITION_CONFIG_FILE"], + [Define a Partition config file]) +AC_SUBST(PARTITION_CONFIG_FILE) + +dnl Check for a different QOS policy file +QOS_POLICY_FILE=qos-policy.conf +AC_MSG_CHECKING(for --with-qos-policy-conf) +AC_ARG_WITH(qos-policy-conf, + AC_HELP_STRING([--with-qos-policy-conf=file], + [define a QOS policy config file (default qos-policy.conf)]), + [ case "$withval" in + no) + ;; + *) + QOS_POLICY_FILE=$withval + ;; + esac ] +) +AC_MSG_RESULT($QOS_POLICY_FILE) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_QOS_POLICY_FILE, + ["$OPENSM_CONFIG_DIR/$QOS_POLICY_FILE"], + [Define a QOS policy config file]) +AC_SUBST(QOS_POLICY_FILE) + +dnl For now, this does not need to be configurable +TORUS2QOS_CONF_FILE=torus-2QoS.conf +AC_SUBST(TORUS2QOS_CONF_FILE) + +dnl Check for a different prefix-routes file +PREFIX_ROUTES_FILE=prefix-routes.conf +AC_MSG_CHECKING(for --with-prefix-routes-conf) +AC_ARG_WITH(prefix-routes-conf, + AC_HELP_STRING([--with-prefix-routes-conf=file], + [define a Prefix Routes config file (default is prefix-routes.conf)]), + [ case "$withval" in + no) + ;; + *) + PREFIX_ROUTES_FILE=$withval + ;; + esac ] +) +AC_MSG_RESULT($PREFIX_ROUTES_FILE) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_PREFIX_ROUTES_FILE, + ["$OPENSM_CONFIG_DIR/$PREFIX_ROUTES_FILE"], + [Define a Prefix Routes config file]) +AC_SUBST(PREFIX_ROUTES_FILE) + +dnl Check for a different per-module-logging file +PER_MOD_LOGGING_FILE=per-module-logging.conf +AC_MSG_CHECKING(for --with-per-module-logging-conf) +AC_ARG_WITH(per-module-logging.conf, + AC_HELP_STRING([--with-per-module-logging-conf=file], + [define a Per Module Logging config file (default is per-module-logging.conf)]), + [ case "$withval" in + no) + ;; + *) + PER_MOD_LOGGING_FILE=$withval + ;; + esac ] +) +AC_MSG_RESULT($PER_MOD_LOGGING_FILE) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_PER_MOD_LOGGING_FILE, + ["$OPENSM_CONFIG_DIR/$PER_MOD_LOGGING_FILE"], + [Define a Per Module Logging config file]) +AC_SUBST(PER_MOD_LOGGING_FILE) + +dnl select example event plugin or not +OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL + +dnl Provide user option to select vendor +OPENIB_APP_OSMV_SEL + +dnl Checks for headers and libraries +OPENIB_APP_OSMV_CHECK_HEADER +OPENIB_APP_OSMV_CHECK_LIB + +dnl Provide user option to enable metis support for nue +METIS_SEL + +dnl Checks for headers and libraries +METIS_CHECK_HEADER +METIS_CHECK_LIB + +AC_CONFIG_FILES([man/opensm.8 man/torus-2QoS.8 man/torus-2QoS.conf.5 scripts/opensm.init scripts/redhat-opensm.init scripts/sldd.sh]) + +dnl Create the following Makefiles +AC_OUTPUT([include/opensm/osm_version.h Makefile include/Makefile complib/Makefile libopensm/Makefile libvendor/Makefile opensm/Makefile osmeventplugin/Makefile osmroutingplugin/Makefile osmtest/Makefile opensm.spec]) diff --git a/doc/OpenSM_PKey_Mgr.txt b/doc/OpenSM_PKey_Mgr.txt new file mode 100644 index 0000000..31d4c83 --- /dev/null +++ b/doc/OpenSM_PKey_Mgr.txt @@ -0,0 +1,78 @@ +OpenSM Partition Management +--------------------------- + +Roadmap: +Phase 1 - provide partition management at the EndPort (HCA, Router and Switch + Port 0) level with no routing affects. +Phase 2 - routing engine should take partitions into account. + +Phase 1 functionality: + +Supported Policy: + +1. EndPort partition groups are to be defined by listing the + PortGUIDs as full and limited members. + +2. Each partition group might be assigned an explicit P_Key (only the 15 + LSB bits are valid) or the SM should assign it randomly. + +3. A flag should control the generation of IPoIB broadcast group for + that partition. Extra optional MGIDs can be provided to be setup (on + top of the IPoIB broadcast group). + +4. A global flag "Disconnect Unconfigured EndPorts": If TRUE prevents + EndPorts that are not explicitly defined as part of any partition + (thus "unconfigured") to communicate with any other EndPort. Otherwise, it + will let these EndPorts send packets to all other EndPorts. + +Functionality: + +1. The policy should be updated: + - during SM bringup + - after kill -HUP + - through SNMP (once it is supported) + +2. Partition tables will be updated on full sweep (new port/trap etc). + As a first step, the policy feasibility should be + verified. Feasibility could be limited by the EndPorts supports for + number of partitions, etc. Unrealizable policy should be reported + and extra rules ignored after providing error messages. + +3. Each EndPort will be assigned P_Keys as follows: + + a. Default partition group limited membership as defined by rule #4 below. + (only the SM port will get 0xffff). + + b. P_Keys for all partition groups it is part of as defined in + the policy. + + c. P_Key update will preserve index for the existing P_Keys on the + port. If port has limited resources that will require reuse of, + on index a message will be provided and some of the settings will be + ommitted. P_Key indexes will not change under any circumstances. + +4. Each Switch Leaf Port (a switch port that is connected to an + EndPort) should be configured according to the same rules that + apply to the EndPort connected to that switch port. + This actually enables unauthorized port isolation (with future + usage of M_Key and ProtectBits). + +5. Policy entries matching a non EndPort will be flagged as + erroneous in the log file and ignored. + +6. At the end of the P_Key setting phase, a check for successful + setting should be made. + Errors should be clearly logged and cause a new sweep. + +7. Each partition that is marked to support IPoIB should define a + broadcast MGRP. If the partition does not support IPoIB, it should + define a dummy MGRP with parameters blocking IPoIB drivers from + registering to it. + +Phase 2 functionality: + +The partition policy should be considered during the routing such that +links are associated with particular partition or a set of +partitions. Policy should be enhanced to provide hints for how to do +that (correlating to QoS too). The exact algorithm is TBD. + diff --git a/doc/OpenSM_RN.pdf b/doc/OpenSM_RN.pdf new file mode 100644 index 0000000..700924f Binary files /dev/null and b/doc/OpenSM_RN.pdf differ diff --git a/doc/OpenSM_UM.pdf b/doc/OpenSM_UM.pdf new file mode 100644 index 0000000..ae32826 Binary files /dev/null and b/doc/OpenSM_UM.pdf differ diff --git a/doc/QoS_management_in_OpenSM.txt b/doc/QoS_management_in_OpenSM.txt new file mode 100644 index 0000000..b347c4c --- /dev/null +++ b/doc/QoS_management_in_OpenSM.txt @@ -0,0 +1,501 @@ + + QoS Management in OpenSM + +============================================================================== + Table of contents +============================================================================== + +1. Overview +2. Full QoS Policy File +3. Simplified QoS Policy Definition +4. Policy File Syntax Guidelines +5. Examples of Full Policy File +6. Simplified QoS Policy - Details and Examples +7. SL2VL Mapping and VL Arbitration + + +============================================================================== + 1. Overview +============================================================================== + +When QoS in OpenSM is enabled (-Q or --qos), OpenSM looks for QoS Policy file. +The default name of OpenSM QoS policy file is +/usr/local/etc/opensm/qos-policy.conf. The default may be changed by using -Y +or --qos_policy_file option with OpenSM. + +During fabric initialization and at every heavy sweep OpenSM parses the QoS +policy file, applies its settings to the discovered fabric elements, and +enforces the provided policy on client requests. The overall flow for such +requests is: + - The request is matched against the defined matching rules such that the + QoS Level definition is found. + - Given the QoS Level, path(s) search is performed with the given + restrictions imposed by that level. + +There are two ways to define QoS policy: + - Full policy, where the policy file syntax provides an administrator + various ways to match PathRecord/MultiPathRecord (PR/MPR) request and + enforce various QoS constraints on the requested PR/MPR + - Simplified QoS policy definition, where an administrator would be able to + match PR/MPR requests by various ULPs and applications running on top of + these ULPs. + +While the full policy syntax is very flexible, in many cases the simplified +policy definition would be sufficient. + + +============================================================================== + 2. Full QoS Policy File +============================================================================== + +QoS policy file has the following sections: + +I) Port Groups (denoted by port-groups). +This section defines zero or more port groups that can be referred later by +matching rules (see below). Port group lists ports by: + - Port GUID + - Port name, which is a combination of NodeDescription and IB port number + - PKey, which means that all the ports in the subnet that belong to + partition with a given PKey belong to this port group + - Partition name, which means that all the ports in the subnet that belong + to partition with a given name belong to this port group + - Node type, where possible node types are: CA, SWITCH, ROUTER, ALL, and + SELF (SM's port). + +II) QoS Setup (denoted by qos-setup). +This section describes how to set up SL2VL and VL Arbitration tables on +various nodes in the fabric. +However, this is not supported in OpenSM currently. +SL2VL and VLArb tables should be configured in the OpenSM options file +(default location - /usr/local/etc/opensm/opensm.conf). + +III) QoS Levels (denoted by qos-levels). +Each QoS Level defines Service Level (SL) and a few optional fields: + - MTU limit + - Rate limit + - PKey + - Packet lifetime +When path(s) search is performed, it is done with regards to restriction that +these QoS Level parameters impose. +One QoS level that is mandatory to define is a DEFAULT QoS level. It is +applied to a PR/MPR query that does not match any existing match rule. +Similar to any other QoS Level, it can also be explicitly referred by any +match rule. + +IV) QoS Matching Rules (denoted by qos-match-rules). +Each PathRecord/MultiPathRecord query that OpenSM receives is matched against +the set of matching rules. Rules are scanned in order of appearance in the QoS +policy file such as the first match takes precedence. +Each rule has a name of QoS level that will be applied to the matching query. +A default QoS level is applied to a query that did not match any rule. +Queries can be matched by: + - Source port group (whether a source port is a member of a specified group) + - Destination port group (same as above, only for destination port) + - PKey + - QoS class + - Service ID +To match a certain matching rule, PR/MPR query has to match ALL the rule's +criteria. However, not all the fields of the PR/MPR query have to appear in +the matching rule. +For instance, if the rule has a single criterion - Service ID, it will match +any query that has this Service ID, disregarding rest of the query fields. +However, if a certain query has only Service ID (which means that this is the +only bit in the PR/MPR component mask that is on), it will not match any rule +that has other matching criteria besides Service ID. + + +============================================================================== + 3. Simplified QoS Policy Definition +============================================================================== + +Simplified QoS policy definition comprises of a single section denoted by +qos-ulps. Similar to the full QoS policy, it has a list of match rules and +their QoS Level, but in this case a match rule has only one criterion - its +goal is to match a certain ULP (or a certain application on top of this ULP) +PR/MPR request, and QoS Level has only one constraint - Service Level (SL). +The simplified policy section may appear in the policy file in combine with +the full policy, or as a stand-alone policy definition. +See more details and list of match rule criteria below. + + +============================================================================== + 4. Policy File Syntax Guidelines +============================================================================== + +- Empty lines are ignored. +- Leading and trailing blanks, as well as empty lines, are ignored, so + the indentation in the example is just for better readability. +- Comments are started with the pound sign (#) and terminated by EOL. +- Any keyword should be the first non-blank in the line, unless it's a + comment. +- Keywords that denote section/subsection start have matching closing + keywords. +- Having a QoS Level named "DEFAULT" is a must - it is applied to PR/MPR + requests that didn't match any of the matching rules. +- Any section/subsection of the policy file is optional. + + +============================================================================== + 5. Examples of Full Policy File +============================================================================== + +As mentioned earlier, any section of the policy file is optional, and +the only mandatory part of the policy file is a default QoS Level. +Here's an example of the shortest policy file: + + qos-levels + qos-level + name: DEFAULT + sl: 0 + end-qos-level + end-qos-levels + +Port groups section is missing because there are no match rules, which means +that port groups are not referred anywhere, and there is no need defining +them. And since this policy file doesn't have any matching rules, PR/MPR query +won't match any rule, and OpenSM will enforce default QoS level. +Essentially, the above example is equivalent to not having QoS policy file +at all. + +The following example shows all the possible options and keywords in the +policy file and their syntax: + + # + # See the comments in the following example. + # They explain different keywords and their meaning. + # + port-groups + + port-group # using port GUIDs + name: Storage + # "use" is just a description that is used for logging + # Other than that, it is just a comment + use: SRP Targets + port-guid: 0x10000000000001, 0x10000000000005-0x1000000000FFFA + port-guid: 0x1000000000FFFF + end-port-group + + port-group + name: Virtual Servers + # The syntax of the port name is as follows: + # "node_description/Pnum". + # node_description is compared to the NodeDescription of the node, + # and "Pnum" is a port number on that node. + port-name: vs1 HCA-1/P1, vs2 HCA-1/P1 + end-port-group + + # using partitions defined in the partition policy + port-group + name: Partitions + partition: Part1 + pkey: 0x1234 + end-port-group + + # using node types: CA, ROUTER, SWITCH, SELF (for node that runs SM) + # or ALL (for all the nodes in the subnet) + port-group + name: CAs and SM + node-type: CA, SELF + end-port-group + + end-port-groups + + qos-setup + # This section of the policy file describes how to set up SL2VL and VL + # Arbitration tables on various nodes in the fabric. + # However, this is not supported in OpenSM currently - the section is + # parsed and ignored. SL2VL and VLArb tables should be configured in the + # OpenSM options file (by default - /usr/local/etc/opensm/opensm.conf). + end-qos-setup + + qos-levels + + # Having a QoS Level named "DEFAULT" is a must - it is applied to + # PR/MPR requests that didn't match any of the matching rules. + qos-level + name: DEFAULT + use: default QoS Level + sl: 0 + end-qos-level + + # the whole set: SL, MTU-Limit, Rate-Limit, PKey, Packet Lifetime + qos-level + name: WholeSet + sl: 1 + mtu-limit: 4 + rate-limit: 5 + pkey: 0x1234 + packet-life: 8 + end-qos-level + + end-qos-levels + + # Match rules are scanned in order of their appearance in the policy file. + # First matched rule takes precedence. + qos-match-rules + + # matching by single criteria: QoS class + qos-match-rule + use: by QoS class + qos-class: 7-9,11 + # Name of qos-level to apply to the matching PR/MPR + qos-level-name: WholeSet + end-qos-match-rule + + # show matching by destination group and service id + qos-match-rule + use: Storage targets + destination: Storage + service-id: 0x10000000000001, 0x10000000000008-0x10000000000FFF + qos-level-name: WholeSet + end-qos-match-rule + + qos-match-rule + source: Storage + use: match by source group only + qos-level-name: DEFAULT + end-qos-match-rule + + qos-match-rule + use: match by all parameters + qos-class: 7-9,11 + source: Virtual Servers + destination: Storage + service-id: 0x0000000000010000-0x000000000001FFFF + pkey: 0x0F00-0x0FFF + qos-level-name: WholeSet + end-qos-match-rule + + end-qos-match-rules + + +============================================================================== + 6. Simplified QoS Policy - Details and Examples +============================================================================== + +Simplified QoS policy match rules are tailored for matching ULPs (or some +application on top of a ULP) PR/MPR requests. This section has a list of +per-ULP (or per-application) match rules and the SL that should be enforced +on the matched PR/MPR query. + +Match rules include: + - Default match rule that is applied to PR/MPR query that didn't match any + of the other match rules + - SDP + - SDP application with a specific target TCP/IP port range + - SRP with a specific target IB port GUID + - RDS + - iSER + - iSER application with a specific target TCP/IP port range + - IPoIB with a default PKey + - IPoIB with a specific PKey + - any ULP/application with a specific Service ID in the PR/MPR query + - any ULP/application with a specific PKey in the PR/MPR query + - any ULP/application with a specific target IB port GUID in the PR/MPR query + - any ULP/application with a specific source IB port GUID in the PR/MPR query + - any ULP/application with a specific source or target IB port GUID in the + PR/MPR query + +Since any section of the policy file is optional, as long as basic rules of +the file are kept (such as no referring to nonexisting port group, having +default QoS Level, etc), the simplified policy section (qos-ulps) can serve +as a complete QoS policy file. +The shortest policy file in this case would be as follows: + + qos-ulps + default : 0 #default SL + end-qos-ulps + +It is equivalent to the previous example of the shortest policy file, and it +is also equivalent to not having policy file at all. + +Below is an example of simplified QoS policy with all the possible keywords: + + qos-ulps + default : 0 # default SL + sdp, port-num 30000 : 0 # SL for application running on top + # of SDP when a destination + # TCP/IPport is 30000 + sdp, port-num 10000-20000 : 0 + sdp : 1 # default SL for any other + # application running on top of SDP + rds : 2 # SL for RDS traffic + iser, port-num 900 : 0 # SL for iSER with a specific target + # port + iser : 3 # default SL for iSER + ipoib, pkey 0x0001 : 0 # SL for IPoIB on partition with + # pkey 0x0001 + ipoib : 4 # default IPoIB partition, + # pkey=0x7FFF + any, service-id 0x6234 : 6 # match any PR/MPR query with a + # specific Service ID + any, pkey 0x0ABC : 6 # match any PR/MPR query with a + # specific PKey + srp, target-port-guid 0x1234 : 5 # SRP when SRP Target is located on + # a specified IB port GUID + any, target-port-guid 0x0ABC-0xFFFFF : 6 # match any PR/MPR query with + # a specific target port GUID + any, source-port-guid 0x5678 : 7 # match any PR/MPR query with + # a specific source port + # GUID + any, source-target-port-guid 0x9abcd : 8 # match any PR/MPR query with + # a specific source or target port + # GUID + end-qos-ulps + + +Similar to the full policy definition, matching of PR/MPR queries is done in +order of appearance in the QoS policy file such as the first match takes +precedence, except for the "default" rule, which is applied only if the query +didn't match any other rule. + +All other sections of the QoS policy file take precedence over the qos-ulps +section. That is, if a policy file has both qos-match-rules and qos-ulps +sections, then any query is matched first against the rules in the +qos-match-rules section, and only if there was no match, the query is matched +against the rules in qos-ulps section. + +Note that some of these match rules may overlap, so in order to use the +simplified QoS definition effectively, it is important to understand how each +of the ULPs is matched: + +6.1 IPoIB +IPoIB query is matched by PKey. Default PKey for IPoIB partition is 0x7fff, so +the following three match rules are equivalent: + + ipoib : + ipoib, pkey 0x7fff : + any, pkey 0x7fff : + +6.2 SDP +SDP PR query is matched by Service ID. The Service-ID for SDP is +0x000000000001PPPP, where PPPP are 4 hex digits holding the remote TCP/IP Port +Number to connect to. The following two match rules are equivalent: + + sdp : + any, service-id 0x0000000000010000-0x000000000001ffff : + +6.3 RDS +Similar to SDP, RDS PR query is matched by Service ID. The Service ID for RDS +is 0x000000000106PPPP, where PPPP are 4 hex digits holding the remote TCP/IP +Port Number to connect to. Default port number for RDS is 0x48CA, which makes +a default Service-ID 0x00000000010648CA. The following two match rules are +equivalent: + + rds : + any, service-id 0x00000000010648CA : + +6.4 iSER +Similar to RDS, iSER query is matched by Service ID, where the the Service ID +is also 0x000000000106PPPP. Default port number for iSER is 0x0CBC, which makes +a default Service-ID 0x0000000001060CBC. The following two match rules are +equivalent: + + iser : + any, service-id 0x0000000001060CBC : + +6.5 SRP +Service ID for SRP varies from storage vendor to vendor, thus SRP query is +matched by the target IB port GUID. The following two match rules are +equivalent: + + srp, target-port-guid 0x1234 : + any, target-port-guid 0x1234 : + +Note that any of the above ULPs might contain target port GUID in the PR +query, so in order for these queries not to be recognized by the QoS manager +as SRP, the SRP match rule (or any match rule that refers to the target port +guid only) should be placed at the end of the qos-ulps match rules. + +6.6 MPI +SL for MPI is manually configured by MPI admin. OpenSM is not forcing any SL +on the MPI traffic, and that's why it is the only ULP that did not appear in +the qos-ulps section. + + +============================================================================== + 7. SL2VL Mapping and VL Arbitration +============================================================================== + +OpenSM cached options file has a set of QoS related configuration parameters, +that are used to configure SL2VL mapping and VL arbitration on IB ports. +These parameters are: + - Max VLs: the maximum number of VLs that will be on the subnet. + - High limit: the limit of High Priority component of VL Arbitration + table (IBA 7.6.9). + - VLArb low table: Low priority VL Arbitration table (IBA 7.6.9) template. + - VLArb high table: High priority VL Arbitration table (IBA 7.6.9) template. + - SL2VL: SL2VL Mapping table (IBA 7.6.6) template. It is a list of VLs + corresponding to SLs 0-15 (Note that VL15 used here means drop this SL). + +There are separate QoS configuration parameters sets for various target types: +CAs, routers, switch external ports, and switch's enhanced port 0. The names +of such parameters are prefixed by "qos__" string. Here is a full list +of the currently supported sets: + + qos_ca_ - QoS configuration parameters set for CAs. + qos_rtr_ - parameters set for routers. + qos_sw0_ - parameters set for switches' port 0. + qos_swe_ - parameters set for switches' external ports. + +Here's the example of typical default values for CAs and switches' external +ports (hard-coded in OpenSM initialization): + + qos_ca_max_vls 15 + qos_ca_high_limit 0 + qos_ca_vlarb_high 0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 + qos_ca_vlarb_low 0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 + qos_ca_sl2vl 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + + qos_swe_max_vls 15 + qos_swe_high_limit 0 + qos_swe_vlarb_high 0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 + qos_swe_vlarb_low 0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 + qos_swe_sl2vl 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + +VL arbitration tables (both high and low) are lists of VL/Weight pairs. +Each list entry contains a VL number (values from 0-14), and a weighting value +(values 0-255), indicating the number of 64 byte units (credits) which may be +transmitted from that VL when its turn in the arbitration occurs. A weight +of 0 indicates that this entry should be skipped. If a list entry is +programmed for VL15 or for a VL that is not supported or is not currently +configured by the port, the port may either skip that entry or send from any +supported VL for that entry. + +Note, that the same VLs may be listed multiple times in the High or Low +priority arbitration tables, and, further, it can be listed in both tables. + +The limit of high-priority VLArb table (qos__high_limit) indicates the +number of high-priority packets that can be transmitted without an opportunity +to send a low-priority packet. Specifically, the number of bytes that can be +sent is high_limit times 4K bytes. + +A high_limit value of 255 indicates that the byte limit is unbounded. +Note: if the 255 value is used, the low priority VLs may be starved. +A value of 0 indicates that only a single packet from the high-priority table +may be sent before an opportunity is given to the low-priority table. + +Keep in mind that ports usually transmit packets of size equal to MTU. +For instance, for 4KB MTU a single packet will require 64 credits, so in order +to achieve effective VL arbitration for packets of 4KB MTU, the weighting +values for each VL should be multiples of 64. + +Below is an example of SL2VL and VL Arbitration configuration on subnet: + + qos_ca_max_vls 15 + qos_ca_high_limit 6 + qos_ca_vlarb_high 0:4 + qos_ca_vlarb_low 0:0,1:64,2:128,3:192,4:0,5:64,6:64,7:64 + qos_ca_sl2vl 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + + qos_swe_max_vls 15 + qos_swe_high_limit 6 + qos_swe_vlarb_high 0:4 + qos_swe_vlarb_low 0:0,1:64,2:128,3:192,4:0,5:64,6:64,7:64 + qos_swe_sl2vl 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + +In this example, there are 8 VLs configured on subnet: VL0 to VL7. VL0 is +defined as a high priority VL, and it is limited to 6 x 4KB = 24KB in a single +transmission burst. Such configuration would suit a VL that needs low latency +and uses a small MTU when transmitting packets. Rest of the VLs are defined as +low priority VLs with different weights, while VL4 is effectively turned off. diff --git a/doc/current-routing.txt b/doc/current-routing.txt new file mode 100644 index 0000000..c33bef1 --- /dev/null +++ b/doc/current-routing.txt @@ -0,0 +1,1080 @@ +Current OpenSM Routing +12/13/17 + +OpenSM offers ten routing engines: + +1. Min Hop Algorithm - based on the minimum hops to each node where the +path length is optimized. + +2. UPDN Unicast routing algorithm - also based on the minimum hops to each +node, but it is constrained to ranking rules. This algorithm should be chosen +if the subnet is not a pure Fat Tree, and deadlock may occur due to a +loop in the subnet. + +3. DNUP Unicast routing algorithm - similar to UPDN but allows routing in +fabrics which have some CA nodes attached closer to the roots than some switch +nodes. + +4. Fat-tree Unicast routing algorithm - this algorithm optimizes routing +of fat-trees for congestion-free "shift" communication pattern. +It should be chosen if a subnet is a symmetrical fat-tree. +Similar to UPDN routing, Fat-tree routing is credit-loop-free. + +5. LASH unicast routing algorithm - uses InfiniBand virtual layers +(SL) to provide deadlock-free shortest-path routing while also +distributing the paths between layers. LASH is an alternative +deadlock-free topology-agnostic routing algorithm to the non-minimal +UPDN algorithm avoiding the use of a potentially congested root node. + +6. DOR Unicast routing algorithm - based on the Min Hop algorithm, but +avoids port equalization except for redundant links between the same +two switches. This provides deadlock free routes for hypercubes when +the fabric is cabled as a hypercube and for meshes when cabled as a +mesh (see details below). + +7. Torus-2QoS unicast routing algorithm - a DOR-based routing algorithm +specialized for 2D/3D torus topologies. Torus-2QoS provides deadlock-free +routing while supporting two quality of service (QoS) levels. In addition +it is able to route around multiple failed fabric links or a single failed +fabric switch without introducing deadlocks, and without changing path SL +values granted before the failure. + +8. DFSSSP unicast routing algorithm - a deadlock-free single-source- +shortest-path routing, which uses the SSSP algorithm (see algorithm 9.) +as the base to optimize link utilization and uses InfiniBand virtual lanes +(SL) to provide deadlock-freedom. + +9. SSSP unicast routing algorithm - a single-source-shortest-path routing +algorithm, which globally balances the number of routes per link to +optimize link utilization. This routing algorithm has no restrictions +in terms of the underlying topology. + +10. Nue unicast routing algorithm - a 100%-applicable and deadlock-free +routing which can be used for any arbitrary or faulty network topology +and any number of virtual lanes (this includes the absence of VLs as well). +Paths are globally balanced w.r.t the number of routes per link, and are +kept as short as possible while enforcing deadlock-freedom within the VL +constraint. + +OpenSM provides an optional unicast routing cache (enabled by -A or +--ucast_cache options). When enabled, unicast routing cache prevents +routing recalculation (which is a heavy task in a large cluster) when +there was no topology change detected during the heavy sweep, or when +the topology change does not require new routing calculation, e.g. when +one or more CAs/RTRs/leaf switches going down, or one or more of these +nodes coming back after being down. +A very common case that is handled by the unicast routing cache is host +reboot, which otherwise would cause two full routing recalculations: one +when the host goes down, and the other when the host comes back online. + +OpenSM also supports a file method which can load routes from a table. See +modular-routing.txt for more information on this. + +The basic routing algorithm is comprised of two stages: +1. MinHop matrix calculation + How many hops are required to get from each port to each LID ? + The algorithm to fill these tables is different if you run standard +(min hop) or Up/Down. + For standard routing, a "relaxation" algorithm is used to propagate +min hop from every destination LID through neighbor switches + For Up/Down routing, a BFS from every target is used. The BFS tracks link +direction (up or down) and avoid steps that will perform up after a down +step was used. + +2. Once MinHop matrices exist, each switch is visited and for each target LID, +a decision is made as to what port should be used to get to that LID. + This step is common to standard and Up/Down routing. Each port has a +counter counting the number of target LIDs going through it. + When there are multiple alternative ports with same MinHop to a LID, +the one with less previously assigned LIDs is selected. + If LMC > 0, more checks are added: Within each group of LIDs assigned to +same target port, + a. use only ports which have same MinHop + b. first prefer the ones that go to different systemImageGuid (then +the previous LID of the same LMC group) + c. if none - prefer those which go through another NodeGuid + d. fall back to the number of paths method (if all go to same node). + + +Effect of Topology Changes + +OpenSM will preserve existing routing in any case where there is no change in +the fabric switches unless the -r (--reassign_lids) option is specified. + +-r +--reassign_lids + This option causes OpenSM to reassign LIDs to all + end nodes. Specifying -r on a running subnet + may disrupt subnet traffic. + Without -r, OpenSM attempts to preserve existing + LID assignments resolving multiple use of same LID. + +If a link is added or removed, OpenSM does not recalculate +the routes that do not have to change. A route has to change +if the port is no longer UP or no longer the MinHop. When routing changes +are performed, the same algorithm for balancing the routes is invoked. + +In the case of using the file based routing, any topology changes are +currently ignored The 'file' routing engine just loads the LFTs from the file +specified, with no reaction to real topology. Obviously, this will not be able +to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent +switches will be skipped. Multicast is not affected by 'file' routing engine +(this uses min hop tables). + + +Min Hop Algorithm +----------------- + +The Min Hop algorithm is invoked by default if no routing algorithm is +specified. It can also be invoked by specifying '-R minhop'. + +The Min Hop algorithm is divided into two stages: computation of +min-hop tables on every switch and LFT output port assignment. Link +subscription is also equalized with the ability to override based on +port GUID. The latter is supplied by: + +-i +--ignore_guids + This option provides the means to define a set of ports + (by guids) that will be ignored by the link load + equalization algorithm. + +LMC awareness routes based on (remote) system or switch basis. + + +UPDN Routing Algorithm +---------------------- + +Purpose of UPDN Algorithm + +The UPDN algorithm is designed to prevent deadlocks from occurring in loops +of the subnet. A loop-deadlock is a situation in which it is no longer +possible to send data between any two hosts connected through the loop. As +such, the UPDN routing algorithm should be used if the subnet is not a pure +Fat Tree, and one of its loops may experience a deadlock (due, for example, +to high pressure). + +The UPDN algorithm is based on the following main stages: + +1. Auto-detect root nodes - based on the CA hop length from any switch in +the subnet, a statistical histogram is built for each switch (hop num vs +number of occurrences). If the histogram reflects a specific column (higher +than others) for a certain node, then it is marked as a root node. Since +the algorithm is statistical, it may not find any root nodes. The list of +the root nodes found by this auto-detect stage is used by the ranking +process stage. + + Note 1: The user can override the node list manually. + Note 2: If this stage cannot find any root nodes, and the user did not + specify a guid list file, OpenSM defaults back to the Min Hop + routing algorithm. + +2. Ranking process - All root switch nodes (found in stage 1) are assigned +a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the +subnet are ranked incrementally. This ranking aids in the process of enforcing +rules that ensure loop-free paths. + +3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from +each (CA or switch) node in the subnet. During the BFS process, the FDB table +of each switch node traversed by BFS is updated, in reference to the starting +node, based on the ranking rules and guid values. + +At the end of the process, the updated FDB tables ensure loop-free paths +through the subnet. + +Note: Up/Down routing does not allow LID routing communication between +switches that are located inside spine "switch systems". +The reason is that there is no way to allow a LID route between them +that does not break the Up/Down rule. +One ramification of this is that you cannot run SM on switches other +than the leaf switches of the fabric. + + +UPDN Algorithm Usage + +Activation through OpenSM + +Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. +Use `-a ' for adding an UPDN guid file that contains the +root nodes for ranking. +If the `-a' option is not used, OpenSM uses its auto-detect root nodes +algorithm. + +Notes on the guid list file: +1. A valid guid file specifies one guid in each line. Lines with an invalid +format will be discarded. +2. The user should specify the root switch guids. However, it is also +possible to specify CA guids; OpenSM will use the guid of the switch (if +it exists) that connects the CA to the subnet as a root node. + + +To learn more about deadlock-free routing, see the article +"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" +by William J Dally and Charles L Seitz (1985). + + +DNUP Routing Algorithm +---------------------- + +Purpose: + +The DNUP algorithm is designed to serve a similar purpose to UPDN. However +it is intended to work in network topologies which are unsuited to +UPDN due to nodes being connected closer to the roots than some of +the switches. An example would be a fabric which contains nodes and +uplinks connected to the same switch. The operation of DNUP is the +same as UPDN with the exception of the ranking process. In DNUP all +switch nodes are ranked based solely on their distance from CA Nodes, +all switch nodes directly connected to at least one CA are assigned a +value of 1 all other switch nodes are assigned a value of one more than +the minimum rank of all neighbor switch nodes. + + +Fat-tree Routing Algorithm +-------------------------- + +Purpose: + +The fat-tree algorithm optimizes routing for "shift" communication pattern. +It should be chosen if a subnet is a symmetrical or almost symmetrical +fat-tree of various types. +It supports not just K-ary-N-Trees, by handling for non-constant K, +cases where not all leafs (CAs) are present, any Constant +Bisectional Ratio (CBB) ratio. As in UPDN, fat-tree also prevents +credit-loop-deadlocks. + +If the root guid file is not provided ('-a' or '--root_guid_file' options), +the topology has to be pure fat-tree that complies with the following rules: + - Tree rank should be between two and eight (inclusively) + - Switches of the same rank should have the same number + of UP-going port groups*, unless they are root switches, + in which case the shouldn't have UP-going ports at all. + - Switches of the same rank should have the same number + of DOWN-going port groups, unless they are leaf switches. + - Switches of the same rank should have the same number + of ports in each UP-going port group. + - Switches of the same rank should have the same number + of ports in each DOWN-going port group. + - All the CAs have to be at the same tree level (rank). + +If the root guid file is provided, the topology doesn't have to be pure +fat-tree, and it should only comply with the following rules: + - Tree rank should be between two and eight (inclusively) + - All the Compute Nodes** have to be at the same tree level (rank). + Note that non-compute node CAs are allowed here to be at different + tree ranks. + +* ports that are connected to the same remote switch are referenced as +'port group'. +** list of compute nodes (CNs) can be specified by '-u' or '--cn_guid_file' +OpenSM options. + +Note that although fat-tree algorithm supports trees with non-integer CBB +ratio, the routing will not be as balanced as in case of integer CBB ratio. +In addition to this, although the algorithm allows leaf switches to have any +number of CAs, the closer the tree is to be fully populated, the more effective +the "shift" communication pattern will be. +In general, even if the root list is provided, the closer the topology to a +pure and symmetrical fat-tree, the more optimal the routing will be. + +The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) +in the same directory where the OpenSM log resides. This ordering file provides +the CN order that may be used to create efficient communication pattern, that +will match the routing tables. + +Routing between non-CN nodes + + +The use of the cn_guid_file option allows non-CN nodes to be located on different levels in the fat tree. +In such case, it is not guaranteed that the Fat Tree algorithm will route between two non-CN nodes. +In the scheme below, N1, N2 and N3 are non-CN nodes. Although all the CN have routes to and from them, +there will not necessarily be a route between N1,N2 and N3. +Such routes would require to use at least one of the Switch the wrong way around +(In fact, go out of one of the top Switch through a downgoing port while we are supposed to go up). + + Spine1 Spine2 Spine 3 + / \ / | \ / \ + / \ / | \ / \ + N1 Switch N2 Switch N3 + /|\ /|\ + / | \ / | \ + Going down to compute nodes + +To solve this problem, a list of non-CN nodes can be specified by \'-G\' or \'--io_guid_file\' option. +Theses nodes will be allowed to use switches the wrong way around a specific number of times (specified by \'-H\' or \'--max_reverse_hops\'. +With the proper max_reverse_hops and io_guid_file values, you can ensure full connectivity in the Fat Tree. + +In the scheme above, with a max_reverse_hop of 1, routes will be instantiated between N1<->N2 and N2<->N3. +With a max_reverse_hops value of 2, N1,N2 and N3 will all have routes between them. + +Please note that using max_reverse_hops creates routes that use the switch in a counter-stream way. +This option should never be used to connect nodes with high bandwidth traffic between them ! It should only be used +to allow connectivity for HA purposes or similar. +Also having routes the other way around can in theory cause credit loops. + +Use these options with extreme care ! + + +Usage: + +Activation through OpenSM + +Use '-R ftree' option to activate the fat-tree algorithm. + +Note: LMC > 0 is not supported by fat-tree routing. If this is +specified, the default routing algorithm is invoked instead. + + +LASH Routing Algorithm +---------------------- + +LASH is an acronym for LAyered SHortest Path Routing. It is a +deterministic shortest path routing algorithm that enables topology +agnostic deadlock-free routing within communication networks. + +When computing the routing function, LASH analyzes the network +topology for the shortest-path routes between all pairs of sources / +destinations and groups these paths into virtual layers in such a way +as to avoid deadlock. + +Note LASH analyzes routes and ensures deadlock freedom between switch +pairs. The link from HCA between and switch does not need virtual +layers as deadlock will not arise between switch and HCA. + +In more detail, the algorithm works as follows: + +1) LASH determines the shortest-path between all pairs of source / +destination switches. Note, LASH ensures the same SL is used for all +SRC/DST - DST/SRC pairs and there is no guarantee that the return +path for a given DST/SRC will be the reverse of the route SRC/DST. + +2) LASH then begins an SL assignment process where a route is assigned +to a layer (SL) if the addition of that route does not cause deadlock +within that layer. This is achieved by maintaining and analysing a +channel dependency graph for each layer. Once the potential addition +of a path could lead to deadlock, LASH opens a new layer and continues +the process. + +3) Once this stage has been completed, it is highly likely that the +first layers processed will contain more paths than the latter ones. +To better balance the use of layers, LASH moves paths from one layer +to another so that the number of paths in each layer averages out. + +Note, the implementation of LASH in opensm attempts to use as few layers +as possible. This number can be less than the number of actual layers +available. + +In general LASH is a very flexible algorithm. It can, for example, +reduce to Dimension Order Routing in certain topologies, it is topology +agnostic and fares well in the face of faults. + +It has been shown that for both regular and irregular topologies, LASH +outperforms Up/Down. The reason for this is that LASH distributes the +traffic more evenly through a network, avoiding the bottleneck issues +related to a root node and always routes shortest-path. + +The algorithm was developed by Simula Research Laboratory. + +To learn more about LASH and the flexibility behind it, the requirement +for layers, performance comparisons to other algorithms, see the +following articles: + +"Layered Routing in Irregular Networks", Lysne et al, IEEE +Transactions on Parallel and Distributed Systems, VOL.16, No12, +December 2005. + +"Routing for the ASI Fabric Manager", Solheim et al. IEEE +Communications Magazine, Vol.44, No.7, July 2006. + +"Layered Shortest Path (LASH) Routing in Irregular System Area +Networks", Skeie et al. IEEE Computer Society Communication +Architecture for Clusters 2002. + + +Use '-R lash -Q ' option to activate the LASH algorithm. + +Note: QoS support has to be turned on in order that SL/VL mappings are +used. + +Note: LMC > 0 is not supported by the LASH routing. If this is +specified, the default routing algorithm is invoked instead. + +For open regular cartesian meshes the DOR algorithm is the ideal +routing algorithm. For toroidal meshes on the other hand there +are routing loops that can cause deadlocks. LASH can be used to +route these cases. The performance of LASH can be improved by +preconditioning the mesh in cases where there are multiple links +connecting switches and also in cases where the switches are not +cabled consistently. An option exists for LASH to do this. To +invoke this use '-R lash -Q --do_mesh_analysis'. This will +add an additional phase that analyses the mesh to try to determine +the dimension and size of a mesh. If it determines that the mesh +looks like an open or closed cartesian mesh it reorders the ports +in dimension order before the rest of the LASH algorithm runs. + +DOR Routing Algorithm +--------------------- + +The Dimension Order Routing algorithm is based on the Min Hop +algorithm and so uses shortest paths. Instead of spreading traffic +out across different paths with the same shortest distance, it chooses +among the available shortest paths based on an ordering of dimensions. +Each port must be consistently cabled to represent a hypercube +dimension or a mesh dimension. Paths are grown from a destination +back to a source using the lowest dimension (port) of available paths +at each step. This provides the ordering necessary to avoid deadlock. +When there are multiple links between any two switches, they still +represent only one dimension and traffic is balanced across them +unless port equalization is turned off. In the case of hypercubes, +the same port must be used throughout the fabric to represent the +hypercube dimension and match on both ends of the cable. In the case +of meshes, the dimension should consistently use the same pair of +ports, one port on one end of the cable, and the other port on the +other end, continuing along the mesh dimension. + +Use '-R dor' option to activate the DOR algorithm. + +Torus-2QoS Routing Algorithm +---------------------------- + +Torus-2QoS is a routing algorithm designed for large-scale 2D/3D torus fabrics. +The torus-2QoS routing engine can provide the following functionality on +a 2D/3D torus: +- routing that is free of credit loops +- two levels of QoS, assuming switches support 8 data VLs +- ability to route around a single failed switch, and/or multiple failed + links, without + - introducing credit loops + - changing path SL values +- very short run times, with good scaling properties as fabric size + increases + +Unicast Routing: + +Torus-2QoS is a DOR-based algorithm that avoids deadlocks that would otherwise +occur in a torus using the concept of a dateline for each torus dimension. +It encodes into a path SL which datelines the path crosses as follows: + + sl = 0; + for (d = 0; d < torus_dimensions; d++) + /* path_crosses_dateline(d) returns 0 or 1 */ + sl |= path_crosses_dateline(d) << d; + +For a 3D torus, that leaves one SL bit free, which torus-2QoS uses to +implement two QoS levels. + +Torus-2QoS also makes use of the output port dependence of switch SL2VL +maps to encode into one VL bit the information encoded in three SL bits. +It computes in which torus coordinate direction each inter-switch link +"points", and writes SL2VL maps for such ports as follows: + + for (sl = 0; sl < 16; sl ++) + /* cdir(port) reports which torus coordinate direction a switch port + * "points" in, and returns 0, 1, or 2 */ + sl2vl(iport,oport,sl) = 0x1 & (sl >> cdir(oport)); + +Thus, on a pristine 3D torus, i.e., in the absence of failed fabric switches, +torus-2QoS consumes 8 SL values (SL bits 0-2) and 2 VL values (VL bit 0) +per QoS level to provide deadlock-free routing on a 3D torus. + +Torus-2QoS routes around link failure by "taking the long way around" any +1D ring interrupted by a link failure. For example, consider the 2D 6x5 +torus below, where switches are denoted by [+a-zA-Z]: + + | | | | | | + 4 --+----+----+----+----+----+-- + | | | | | | + 3 --+----+----+----D----+----+-- + | | | | | | + 2 --+----+----I----r----+----+-- + | | | | | | + 1 --m----S----n----T----o----p-- + | | | | | | + y=0 --+----+----+----+----+----+-- + | | | | | | + + x=0 1 2 3 4 5 + +For a pristine fabric the path from S to D would be S-n-T-r-D. In the +event that either link S-n or n-T has failed, torus-2QoS would use the path +S-m-p-o-T-r-D. Note that it can do this without changing the path SL +value; once the 1D ring m-S-n-T-o-p-m has been broken by failure, path +segments using it cannot contribute to deadlock, and the x-direction +dateline (between, say, x=5 and x=0) can be ignored for path segments on +that ring. + +One result of this is that torus-2QoS can route around many simultaneous +link failures, as long as no 1D ring is broken into disjoint segments. For +example, if links n-T and T-o have both failed, that ring has been broken +into two disjoint segments, T and o-p-m-S-n. Torus-2QoS checks for such +issues, reports if they are found, and refuses to route such fabrics. + +Note that in the case where there are multiple parallel links between a pair +of switches, torus-2QoS will allocate routes across such links in a round- +robin fashion, based on ports at the path destination switch that are active +and not used for inter-switch links. Should a link that is one of several +such parallel links fail, routes are redistributed across the remaining +links. When the last of such a set of parallel links fails, traffic is +rerouted as described above. + +Handling a failed switch under DOR requires introducing into a path at +least one turn that would be otherwise "illegal", i.e. not allowed by DOR +rules. Torus-2QoS will introduce such a turn as close as possible to the +failed switch in order to route around it. + +In the above example, suppose switch T has failed, and consider the path +from S to D. Torus-2QoS will produce the path S-n-I-r-D, rather than the +S-n-T-r-D path for a pristine torus, by introducing an early turn at n. +Normal DOR rules will cause traffic arriving at switch I to be forwarded +to switch r; for traffic arriving from I due to the "early" turn at n, +this will generate an "illegal" turn at I. + +Torus-2QoS will also use the input port dependence of SL2VL maps to set VL +bit 1 (which would be otherwise unused) for y-x, z-x, and z-y turns, i.e., +those turns that are illegal under DOR. This causes the first hop after +any such turn to use a separate set of VL values, and prevents deadlock in +the presence of a single failed switch. + +For any given path, only the hops after a turn that is illegal under DOR +can contribute to a credit loop that leads to deadlock. So in the example +above with failed switch T, the location of the illegal turn at I in the +path from S to D requires that any credit loop caused by that turn must +encircle the failed switch at T. Thus the second and later hops after the +illegal turn at I (i.e., hop r-D) cannot contribute to a credit loop +because they cannot be used to construct a loop encircling T. The hop I-r +uses a separate VL, so it cannot contribute to a credit loop encircling T. + +Extending this argument shows that in addition to being capable of routing +around a single switch failure without introducing deadlock, torus-2QoS can +also route around multiple failed switches on the condition they are +adjacent in the last dimension routed by DOR. For example, consider the +following case on a 6x6 2D torus: + + + | | | | | | + 5 --+----+----+----+----+----+-- + | | | | | | + 4 --+----+----+----D----+----+-- + | | | | | | + 3 --+----+----I----u----+----+-- + | | | | | | + 2 --+----+----q----R----+----+-- + | | | | | | + 1 --m----S----n----T----o----p-- + | | | | | | + y=0 --+----+----+----+----+----+-- + | | | | | | + + x=0 1 2 3 4 5 + + +Suppose switches T and R have failed, and consider the path from S to D. +Torus-2QoS will generate the path S-n-q-I-u-D, with an illegal turn at +switch I, and with hop I-u using a VL with bit 1 set. + +As a further example, consider a case that torus-2QoS cannot route without +deadlock: two failed switches adjacent in a dimension that is not the last +dimension routed by DOR; here the failed switches are O and T: + + | | | | | | + 5 --+----+----+----+----+----+-- + | | | | | | + 4 --+----+----+----+----+----+-- + | | | | | | + 3 --+----+----+----+----D----+-- + | | | | | | + 2 --+----+----I----q----r----+-- + | | | | | | + 1 --m----S----n----O----T----p-- + | | | | | | + y=0 --+----+----+----+----+----+-- + | | | | | | + + x=0 1 2 3 4 5 + +In a pristine fabric, torus-2QoS would generate the path from S to D as +S-n-O-T-r-D. With failed switches O and T, torus-2QoS will generate the +path S-n-I-q-r-D, with illegal turn at switch I, and with hop I-q using a +VL with bit 1 set. In contrast to the earlier examples, the second hop +after the illegal turn, q-r, can be used to construct a credit loop +encircling the failed switches. + +Multicast Routing: + +Since torus-2QoS uses all four available SL bits, and the three data VL +bits that are typically available in current switches, there is no way +to use SL/VL values to separate multicast traffic from unicast traffic. +Thus, torus-2QoS must generate multicast routing such that credit loops +cannot arise from a combination of multicast and unicast path segments. + +It turns out that it is possible to construct spanning trees for multicast +routing that have that property. For the 2D 6x5 torus example above, here +is the full-fabric spanning tree that torus-2QoS will construct, where "x" +is the root switch and each "+" is a non-root switch: + + 4 + + + + + + + | | | | | | + 3 + + + + + + + | | | | | | + 2 +----+----+----x----+----+ + | | | | | | + 1 + + + + + + + | | | | | | + y=0 + + + + + + + + x=0 1 2 3 4 5 + +For multicast traffic routed from root to tip, every turn in the above +spanning tree is a legal DOR turn. + +For traffic routed from tip to root, and some traffic routed through the +root, turns are not legal DOR turns. However, to construct a credit loop, +the union of multicast routing on this spanning tree with DOR unicast +routing can only provide 3 of the 4 turns needed for the loop. + +In addition, if none of the above spanning tree branches crosses a dateline +used for unicast credit loop avoidance on a torus, and if multicast traffic +is confined to SL 0 or SL 8 (recall that torus-2QoS uses SL bit 3 to +differentiate QoS level), then multicast traffic also cannot contribute to +the "ring" credit loops that are otherwise possible in a torus. + +Torus-2QoS uses these ideas to create a master spanning tree. Every +multicast group spanning tree will be constructed as a subset of the master +tree, with the same root as the master tree. + +Such multicast group spanning trees will in general not be optimal for +groups which are a subset of the full fabric. However, this compromise must +be made to enable support for two QoS levels on a torus while preventing +credit loops. + +In the presence of link or switch failures that result in a fabric for +which torus-2QoS can generate credit-loop-free unicast routes, it is also +possible to generate a master spanning tree for multicast that retains the +required properties. For example, consider that same 2D 6x5 torus, with +the link from (2,2) to (3,2) failed. Torus-2QoS will generate the following +master spanning tree: + + 4 + + + + + + + | | | | | | + 3 + + + + + + + | | | | | | + 2 --+----+----+ x----+----+-- + | | | | | | + 1 + + + + + + + | | | | | | + y=0 + + + + + + + + x=0 1 2 3 4 5 + +Two things are notable about this master spanning tree. First, assuming +the x dateline was between x=5 and x=0, this spanning tree has a branch +that crosses the dateline. However, just as for unicast, crossing a +dateline on a 1D ring (here, the ring for y=2) that is broken by a failure +cannot contribute to a torus credit loop. + +Second, this spanning tree is no longer optimal even for multicast groups +that encompass the entire fabric. That, unfortunately, is a compromise that +must be made to retain the other desirable properties of torus-2QoS routing. + +In the event that a single switch fails, torus-2QoS will generate a master +spanning tree that has no "extra" turns by appropriately selecting a root +switch. In the 2D 6x5 torus example, assume now that the switch at (3,2), +i.e. the root for a pristine fabric, fails. Torus-2QoS will generate the +following master spanning tree for that case: + + | + 4 + + + + + + + | | | | | | + 3 + + + + + + + | | | | | + 2 + + + + + + | | | | | + 1 +----+----x----+----+----+ + | | | | | | + y=0 + + + + + + + | + + x=0 1 2 3 4 5 + +Assuming the y dateline was between y=4 and y=0, this spanning tree has +a branch that crosses a dateline. However, again this cannot contribute +to credit loops as it occurs on a 1D ring (the ring for x=3) that is +broken by a failure, as in the above example. + +Torus Topology Discovery: + +The algorithm used by torus-2QoS to construct the torus topology from the +undirected graph representing the fabric requires that the radix of each +dimension be configured via torus-2QoS.conf. It also requires that the +torus topology be "seeded"; for a 3D torus this requires configuring four +switches that define the three coordinate directions of the torus. + +Given this starting information, the algorithm is to examine the cube +formed by the eight switch locations bounded by the corners (x,y,z) and +(x+1,y+1,z+1). Based on switches already placed into the torus topology at +some of these locations, the algorithm examines 4-loops of interswitch +links to find the one that is consistent with a face of the cube of switch +locations, and adds its switches to the discovered topology in the correct +locations. + +Because the algorithm is based on examining the topology of 4-loops of links, +a torus with one or more radix-4 dimensions requires extra initial seed +configuration. See torus-2QoS.conf(5) for details. Torus-2QoS will detect +and report when it has insufficient configuration for a torus with radix-4 +dimensions. + +In the event the torus is significantly degraded, i.e., there are many +missing switches or links, it may happen that torus-2QoS is unable to place +into the torus some switches and/or links that were discovered in the +fabric, and will generate a warning in that case. A similar condition +occurs if torus-2QoS is misconfigured, i.e., the radix of a torus dimension +as configured does not match the radix of that torus dimension as wired, +and many switches/links in the fabric will not be placed into the torus. + +Quality Of Service Configuration: + +OpenSM will not program switches and channel adapters with SL2VL maps or VL +arbitration configuration unless it is invoked with -Q. Since torus-2QoS +depends on such functionality for correct operation, always invoke OpenSM +with -Q when torus-2QoS is in the list of routing engines. + +Any quality of service configuration method supported by OpenSM will work +with torus-2QoS, subject to the following limitations and considerations. + +For all routing engines supported by OpenSM except torus-2QoS, there is a +one-to-one correspondence between QoS level and SL. Torus-2QoS can only +support two quality of service levels, so only the high-order bit of any SL +value used for unicast QoS configuration will be honored by torus-2QoS. + +For multicast QoS configuration, only SL values 0 and 8 should be used with +torus-2QoS. + +Since SL to VL map configuration must be under the complete control of +torus-2QoS, any configuration via qos_sl2vl, qos_swe_sl2vl, etc., must and +will be ignored, and a warning will be generated. + +For inter-switch links, Torus-2QoS uses VL values 0-3 to implement one of +its supported QoS levels, and VL values 4-7 to implement the other. For +endport links (CA, router, switch management port), Torus-2QoS uses VL +value 0 for one of its supported QoS levels and VL value 1 to implement +the other. Hard-to-diagnose application issues may arise if traffic is +not delivered fairly across each of these two VL ranges. For +inter-switch links, Torus-2QoS will detect and warn if VL arbitration is +configured unfairly across VLs in the range 0-3, and also in the range +4-7. Note that the default OpenSM VL arbitration configuration does +not meet this constraint, so all torus-2QoS users should configure VL +arbitration via qos_ca_vlarb_high, qos_swe_vlarb_high, qos_ca_vlarb_low, +qos_swe_vlarb_low, etc. + +Note that torus-2QoS maps SL values to VL values differently +for inter-switch and endport links. This is why qos_vlarb_high and +qos_vlarb_low should not be used, as using them may result in +VL arbitration for a QoS level being different across inter-switch +links vs. across endport links. + +Operational Considerations: + +Any routing algorithm for a torus IB fabric must employ path SL values to +avoid credit loops. As a result, all applications run over such fabrics +must perform a path record query to obtain the correct path SL for +connection setup. Applications that use rdma_cm for connection setup will +automatically meet this requirement. + +If a change in fabric topology causes changes in path SL values required to +route without credit loops, in general all applications would need to +repath to avoid message deadlock. Since torus-2QoS has the ability to +reroute after a single switch failure without changing path SL values, +repathing by running applications is not required when the fabric is routed +with torus-2QoS. + +Torus-2QoS can provide unchanging path SL values in the presence of subnet +manager failover provided that all OpenSM instances have the same idea of +dateline location. See torus-2QoS.conf(5) for details. + +Torus-2QoS will detect configurations of failed switches and links that +prevent routing that is free of credit loops, and will log warnings and +refuse to route. If "no_fallback" was configured in the list of OpenSM +routing engines, then no other routing engine will attempt to route the +fabric. In that case all paths that do not transit the failed components +will continue to work, and the subset of paths that are still operational +will continue to remain free of credit loops. OpenSM will continue to +attempt to route the fabric after every sweep interval, and after any +change (such as a link up) in the fabric topology. When the fabric +components are repaired, full functionality will be restored. + +In the event OpenSM was configured to allow some other engine to route the +fabric if torus-2QoS fails, then credit loops and message deadlock are +likely if torus-2QoS had previously routed the fabric successfully. Even if +the other engine is capable of routing a torus without credit loops, +applications that built connections with path SL values granted under +torus-2QoS will likely experience message deadlock under routing generated +by a different engine, unless they repath. + +To verify that a torus fabric is routed free of credit loops, use ibdmchk +to analyze data collected via ibdiagnet -vlr. + +DFSSSP and SSSP Routing Algorithm +--------------------------------- + +The (Deadlock-Free) Single-Source-Shortest-Path routing algorithm is +designed to optimize link utilization thru global balancing of routes, +while supporting arbitrary topologies. The DFSSSP routing algorithm +uses InfiniBand virtual lanes (SL) to provide deadlock-freedom. + +The DFSSSP algorithm consists of five major steps: +1) It discovers the subnet and models the subnet as a directed + multigraph in which each node represents a node of the physical + network and each edge represents one direction of the full-duplex + links used to connect the nodes. +2) A loop, which iterates over all CA and switches of the subnet, will + perform three steps to generate the linear forwarding tables for + each switch: +2.1) use Dijkstra's algorithm to find the shortest path from all nodes + to the current selected destination; +2.2) update the edge weights in the graph, i.e. add the number of + routes, which use a link to reach the destination, to the link/edge; +2.3) update the LFT of each switch with the outgoing port which was used + in the current step to route the traffic to the destination node. +3) After the number of available virtual lanes or layers in the subnet + is detected and a channel dependency graph is initialized for each layer, + the algorithm will put each possible route of the subnet into the first + layer. +4) A loop iterates over all channel dependency graphs (CDG) and performs + the following substeps: +4.1) search for a cycle in the current CDG; +4.2) when a cycle is found, i.e. a possible deadlock is present, + one edge is selected and all routes, which induced this edge, are moved + to the "next higher" virtual layer (CDG[i+1]); +4.3) the cycle search is continued until all cycles are broken and + routes are moved "up". +5) When the number of needed layers does not exceeds the number of + available SL/VL to remove all cycles in all CDGs, the routing is + deadlock-free and an relation table is generated, which contains + the assignment of routes from source to destination to a SL + +Note on SSSP: +This algorithm does not perform the steps 3)-5) and can not be +considered to be deadlock-free for all topologies. But on the one +hand, you can choose this algorithm for really large networks +(5,000+ CAs and deadlock-free by design) to reduce +the runtime of the algorithm. On the other hand, you might use +the SSSP routing algorithm as an alternative, when all deadlock-free +routing algorithms fail to route the network for whatever reason. +In the last case, SSSP was designed to deliver an equal or higher +bandwidth due to better congestion avoidance than the Min Hop +routing algorithm. + +Notes for usage: + a) running DFSSSP: '-R dfsssp -Q' + a.1) QoS has to be configured to equally spread the load on the + available SL or virtual lanes + a.2) applications must perform a path record query to get path SL for + each route, which the application will use to transmit packages + b) running SSSP: '-R sssp' + c) both algorithms support LMC > 0 + +Hints for separate optimization of compute and I/O traffic: +Having more nodes (I/O and compute) connected to a switch than incoming links +can result in a 'bad' routing of the I/O traffic as long as (DF)SSSP routing +is not aware of the dedicated I/O nodes, i.e., in the following network +configuration CN1-CN3 might send all I/O traffic via Link2 to IO1,IO2: + + CN1 Link1 IO1 + \ /----\ / + CN2 -- Switch1 Switch2 -- CN4 + / \----/ \ + CN3 Link2 IO2 + +To prevent this from happening (DF)SSSP can use both the compute node guid +file and the I/O guid file specified by the '-u' or '--cn_guid_file' and +'-G' or '--io_guid_file' options (similar to the Fat-Tree routing). +This ensures that traffic towards compute nodes and I/O nodes is balanced +separately and therefore distributed as much as possible across the available +links. Port GUIDs, as listed by ibstat, must be specified (not Node GUIDs). +The priority for the optimization is as follows: + compute nodes -> I/O nodes -> other nodes +Possible use case scenarios: + a) neither '-u' nor '-G' are specified: all nodes a treated as 'other nodes' + and therefore balanced equally; + b) '-G' is specified: traffic towards I/O nodes will be balanced optimally; + c) the system has three node types, such as login/admin, compute and I/O, + but the balancing focus should be I/O, then one has to use '-u' and '-G' + with I/O guids listed in cn_guid_file and compute node guids listed in + io_guid_file; + d) ... + +For more information about the algorithms, i.e. balancing the routes and +moving the routes to different virtual layers, and about comparison with +other routing algorithms, please refer to the following articles: +1. J. Domke, T. Hoefler and W. Nagel: Deadlock-Free Oblivious Routing + for Arbitrary Topologies, In Proceedings of the 25th IEEE International + Parallel & Distributed Processing Symposium (IPDPS 2011) +2. T. Hoefler, T. Schneider and A. Lumsdaine: Optimized Routing for + Large-Scale InfiniBand Networks, In 17th Annual IEEE Symposium on High + Performance Interconnects (HOTI 2009) + +Nue Routing Algorithm +--------------------- + +The implementation of Nue routing for OpenSM is a 100%-applicable, balanced, and +deadlock-free unicast routing engine (which also configures multicast tables, +see 'Note on multicast' below). The key points of this algorithm are the +following: + - 100% fault-tolerant, oblivious routing strategy + - topology-agnostic, i.e., applicable to every topology (no matter if topology + is regular, irregular after faults, or random) + - 100% deadlock-free routing within the resource limits (i.e., it never exceeds + the given number of available virtual lanes, and it does not necessarily + require virtual lanes) for every topology + - very good path balancing and therefore high throughput + - QoS (via SLs/VLs) + deadlock-freedom can be combined (since both rely on VLs) + - forwarding tables are fast to calculate: O(n^2 * log n), however slightly + slower compared to topology-aware routings (for obvious reasons), and + - the path-to-VL mapping only depends on the destination, which may be useful + for scalable, efficient path resolution and caching mechanisms. +From a very high level perspective, Nue routing is similar to DFSSSP (see above) +in the sense that both use Dijkstra and edge weight updates for path balancing. +However, the fundamental difference is that Nue routing doesn't perform the path +calculation on the graph representing the real fabric, and instead routes +directly within the channel dependency graph. This approach allows Nue routing +to place routing restrictions (to avoid any credit loops) in an on-demand +manner, which overcomes the problem of all other good VL-based algorithms. +Meaning, the competitors cannot control or limit the use of VLs, and might run +out of them and have to give up. On the flip side, Nue may have to use detours +for a few routes, and hence cannot really be considered "shortest-path" routing, +because of the impossibility lemma 6.1 (see ref. [2] Chapter 6) to accomplish +deadlock-free, shortest-path routing with an limited number of available virtual +lanes for arbitrary network topologies. + +Conceptually, Nue routing works as follows: + * Assume N is the set of destinations and k the number of available VLs + * Assume that virtual lanes (VLs) are combined into virtual layers + * Partition N into k disjoint subsets N_1 ,..., N_k of destinations + * foreach virtual layer L_i with i in {1 ,..., k} do: + * Create a convex subgraph H_i for N_i + * Identify central node n_r in N_i of convex H_i via Brandes' algorithm + * Create a new complete channel dependency graph D_i for layer L_i + * Define escape paths D* in D_i for spanning tree rooted at n_r + * foreach destination node n in N_i do: + * Identify all deadlock-free paths towards n + * Store these paths in unicast forwarding tables + * Update channel weights in D_i for these paths +and the service level for applications returned in path request is determined +by Nue (for k>1) as follows: + * Assuming an input of a given source/destination node (LID) pair + * If Nue mapped the destination to layer L_x, then SL == x-1 is returned, while + presuming that SL2VL tables are being mapped 1:1 for the number of layers. + +While other VL-based routings usually gradually construct the channel dependency +graph after all paths have been calculated, Nue creates a "complete" version of +it which holds all possible dependencies allowing it to directly search for +cycles after "using" a dependency on the path to a destination. Since it would +be infeasible to search for cycles each and every time a channel dependency is +added, Nue employs the notion of individually colored subgraphs (one for each +destination) and only performs real cycle searches in the complete CDG when +adding new edges between nodes of the same subgraph, see Section 6.2.6.1 of +reference [2] for details on this optimization. + +Use of METIS library: +In step 2 of the previously shown pseudo code, Nue routing separates the LIDs +into multiple subsets, one for every virtual layer. Nue has two options to +perform this partitioning (not to be confused with IB partitions): the first is +a fairly simple semi-random assignment of LIDs to layers/subsets, and the second +partitioning uses the METIS library to partition the network graph into k +approximately equal sized parts. The latter approach has shown better results +in terms of path balancing and avoidance of using the escape paths, and hence +it is HIGHLY advised to install/use the METIS library with OpenSM (enforced +via `--enable-metis' configure flag when building OpenSM). For the rare case, +that METIS isn't packaged with the Linux distro, here is a link to the official +website to download and install METIS 5.1.0 manually: + http://glaros.dtc.umn.edu/gkhome/metis/metis/overview +OpenSM's configure script also provides options in case METIS header and library +aren't found in the default path. + +Runtime options for Nue: +The behavior of Nue routing can be directly influenced by two osm.conf +parameters (one is also available as command line option): + - nue_max_num_vls: which controls/limits the number of virtual lanes which Nue + is allowed to use (detailed explanation in osm.conf file); this option is + also available via command line + - nue_include_switches: the option (if TRUE) enforces Nue to treat switches + as "normal" endpoints sending/receiving data traffic, which is usually + not the case (also with other routings); hence, paths to switches will be + included when calculating deadlock-free ucast tables (suggestion for IB + subnets: FALSE) +Furthermore, Nue supports TRUE and FALSE settings of avoid_throttled_links, +use_ucast_cache, and qos (more on this hereafter); and lmc > 0. + +Notes on Quality of Service (QoS): +The advantage of Nue is that it works with AND without QoS being enabled, i.e., +the usage of SLs/VLs for deadlock-freedom can be avoided. Here are the three +possible usage scenarios: + a) nue_max_num_vls = 1 and qos = disabled => Nue assumes that only 1 virtual + layer (identical to the physical network; or OperVLs equal to VL0) is + usable and all paths are to be calculated within this one layer. Hence, + there is no need for special SL2VL mappings in the network and the use of + specific SLs by applications. So, enabling QoS is not required for + credit-free unicast routing tables. + b) nue_max_num_vls = 1 and qos = enabled => This combination works essentially + like (a), meaning the SL returned for path record requests is not defined + by Nue, since all paths are deadlock-free without using VLs. However, any + separate QoS settings may influence the SL returned to applications. + c) nue_max_num_vls > 1 and qos = enabled => In this configuration, applications + have to query and obey the SL for path records as returned by Nue because + otherwise the deadlock-freedom cannot be guaranteed anymore. Furthermore, + errors in the fabric may require applications to repath to avoid message + deadlocks (this is only a current limitation of the implementation since + Nue uses METIS to assign destination LIDs to VLs, and a network fault + may change the outcome of METIS' partitioning; so, if anyone needs to + avoid repaths, then please contact the developer). Since Nue operates on + virtual layer, admins should configure the SL2VL mapping tables in an + homogeneous 1-to-1 manner across the entire subnet to separate the layers + and avoid paths from changing into the wrong layer (example mapping is + VL_i = SL_i % qos_max_vls for all i 0..15). Depending on the actual + setting of nue_max_num_vls, one can further differentiate: + c.1) All operational VLs are used for deadlock-free routes (either by setting + nue_max_num_vls to qos_max_vls or to 0 for "auto detection"), and hence + real QoS to prioritize traffic in the subnet isn't supported. The VL + arbitration settings for this usage scenarios should be configured + equally for all VLs and for all switches to avoid bandwidth limitations + for subset of destinations, e.g. + qos_max_vls 8 + qos_high_limit 4 + qos_vlarb_high 0:64,1:64,2:64,3:64,4:64,5:64,6:64,7:64 + qos_vlarb_low 0:4,1:4,2:4,3:4,4:4,5:4,6:4,7:4 + qos_sl2vl 0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 + when assuming 8 operational VLs in the fabric. + c.2) Nue is limited to a subset of operational VLs allowing the mix of + deadlock-freedom based on some SLs/VLs and real QoS with the remaining + set of SLs/VLs; e.g., using VL0-3 to avoid credit-loops (= 1. QoS level) + and VL4-7 used2for the second QoS level. In this example, application + should either comply with the returned SL (in the path query) or select + SL + 4 to use the second QoS level. (Reason: Nue itself is unaware of + these QoS levels and only returns SLs in the range of 0 to #layers-1) + d) nue_max_num_vls > 1 and qos = disabled => SHOULD NOT BE USED TOGETHER, since + the SL2VL mapping for switches must be configured correctly. +As an additional note, using more VLs for Nue usually improves the overall +network throughput (as shown in [1] and [2]), so there are trade offs admins +may have to consider when configuring the subnet manager with Nue routing. + +Note on multicast: +The Nue routing engine configures multicast forwarding tables by utilizing a +spanning tree calculation routed at a subnet switch suggested by OpenSM's +internal osm_mcast_mgr_find_root_switch(...) fn. This spanning tree for a mcast +group will try to use the least overloaded links (w.r.t the ucast paths-per-link +metric/weight) in the fabric. However, Nue routing currently does not guarantee +deadlock-freedom for the set of multicast routes on all topologies, nor for the +combination of deadlock-free unicast routes with additional multicast routes. +Assuming, for a given topology the calculated mcast routes are dl-free, then +an admin may fix the latter problem by separating the VLs, e.g., using VL0-6 for +ucast by specifying nue_max_num_vls=7 and utilizing VL7 for mcast. + +For more information about Nue routing and comparisons with other OpenSM routing +algorithms, please refer to the following publications: +1. J. Domke, T. Hoefler and S. Matsuoka: "Routing on the Dependency Graph: A New + Approach to Deadlock-Free High-Performance Routing", in HPDC'16 + (online: http://doi.acm.org/10.1145/2907294.2907313) +2. J. Domke "Routing on the Channel Dependency Graph: A New Approach to + Deadlock-Free, Destination-Based, High-Performance Routing for Lossless + Interconnection Networks", 2017, Dissertation, TU Dresden + (online: http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa-225902) diff --git a/doc/modular-routing.txt b/doc/modular-routing.txt new file mode 100644 index 0000000..3f2174b --- /dev/null +++ b/doc/modular-routing.txt @@ -0,0 +1,77 @@ +Modular Routine Engine + +Modular routing engine structure has been added to allow +for ease of "plugging" new routing modules. + +Currently, only unicast callbacks are supported. Multicast +can be added later. + +One of existing routing modules is up-down "updn", which may +be activated with '-R updn' option (instead of old '-u'). + +General usage is: +$ opensm -R 'module-name' + +There is also a trivial routing module which is able +to load LFT tables from a file. + +Main features: + +- this will load switch LFTs and/or LID matrices (min hops tables) +- this will load switch LFTs according to the path entries introduced in + the file +- no additional checks will be performed (such as "is port connected", etc.) +- in case when fabric LIDs were changed this will try to reconstruct LFTs + correctly if endport GUIDs are represented in the file (in order + to disable this GUIDs may be removed from the file or zeroed) + +The file format is compatible with output of 'ibroute' util and for +whole fabric may be generated with script like this: + + for sw_lid in `ibswitches | awk '{print $NF}'` ; do + ibroute $sw_lid + done > /path/to/lfts_file + +, or using DR paths: + + for sw_dr in `ibnetdiscover -v \ + | sed -ne '/^DR path .* switch /s/^DR path \[\(.*\)\].*$/\1/p' \ + | sed -e 's/\]\[/,/g' \ + | sort -u` ; do + ibroute -D ${sw_dr} + done > /path/to/lfts_file + +This script is dump_lfts.sh + +In order to activate new module use: + + opensm -R file -U /path/to/lfts_file + +If the lfts_file is not found or is in error, the default routing +algorithm is utilized. + +The ability to dump switch lid matrices (aka min hops tables) to file and +later to load these is also supported. + +The usage is similar to unicast forwarding tables loading from a lfts +file (introduced by 'file' routing engine), but new lid matrix file +name should be specified by -M or --lid_matrix_file option. For example: + + opensm -R file -M ./opensm-lid-matrix.dump + +The dump file is named 'opensm-lid-matrix.dump' and will be generated in +standard opensm dump directory (/var/log by default) when +OSM_LOG_ROUTING logging flag is set. + +When routing engine 'file' is activated, but the lfts file is not specified +or cannot be opened, the default lid matrix algorithm will be used. + +There is also a switch forwarding tables dumper which generates +a file compatible with dump_lfts.sh output. This file can be used +as input for forwarding tables loading by 'file' routing engine. +Both or one of options -U and -M can be specified together with '-R file'. + +NOTE: ibroute has been updated (for switch management ports) to support this. +Also, lmc was added to switch management ports. ibroute needs to be r7855 or +later from the trunk. + diff --git a/doc/opensm-coding-style.txt b/doc/opensm-coding-style.txt new file mode 100644 index 0000000..379042c --- /dev/null +++ b/doc/opensm-coding-style.txt @@ -0,0 +1,34 @@ +This short (hopefully) memo is about to define the coding style +recommended for OpenSM development. + +The goal of this is to make OpenSM code base to be standard in terms of +the rest of OpenIB management software, OpenIB projects and Linux in +general. And in this way to make OpenSM more developer friendly and to +involve more open source programmers to be part of OpenSM development +process. + +The goal of this is not to provide long and boring list of coding style +paradigms, but rather to define general coding style concept and to +suggest a way for such a concept to be implemented in the existing +OpenSM code base. + +The OpenSM project is an OpenIB and Linux centric project, so we think +it is reasonable to use the coding style most popular with OpenIB +projects (linux/Documentation/CodingStyle) as the starting point rather +than reinventing one more coding style rule-set. + +Some things from there in short: tab character for indentation and space +character for alignment, K&R style braces, short local and meanful +global names, please no confused Hungary style, short functions. And of +course to be reasonable about all above. + + +Some ideas about existing OpenSM code improvements in terms of the +Coding style: + +* When writing new code, please try to follow the new Coding style. +* Coding style improvement patches are desired and accepted, but please + try to not mix coding style improvement with functional and other + changes in one patch. +* When you are going to improve coding style for existing code, please + try to do it for entire file(s). diff --git a/doc/opensm-sriov.txt b/doc/opensm-sriov.txt new file mode 100644 index 0000000..bbdff48 --- /dev/null +++ b/doc/opensm-sriov.txt @@ -0,0 +1,154 @@ +OpenSM SRIOV (Alias GUID) Support +12/9/11 + +Overview + +In order to support virtualized environments, alias GUID support is added to OpenSM. +This support allows an SA client to add and remove additional port GUIDs based on +SubAdmSet/SubAdmDelete of GUIDInfoRecord. A set with a GUID of 0 in a valid GUIDInfoRecord +index as indicated by the component mask indicates that the additional GUID is to +be SM assigned. The OpenIB OUI (0x001405), along with a configured byte and a +pseudorandom number is currently used for this algorithm (which may be changed in the future). + +Most SA queries are updated to handle alias GUIDs as part of any GID specified inside +the query. These include SA path record, multipath record, multicast record, and service record. +There are only a few SA queries (InformInfoRecord, InformInfo) that are not currently updated +for alias GUID support. + +In terms of the IBA spec, alias GUIDs are termed additional port GUIDs. + + +IBA 1.2.1 Volume 1 Changes for Alias GUID Support + +The following are MgtWG WG APPROVED spec changes to IBA 1.2.1 volume 1 relative to this support: + +RefID 4704 +15.2.5.18 GUIDInfoRecord p. 932 line 14 +Table 213 GUIDInfoRecord + +An entity that wishes to add or remove additional port GUIDs can do so using the SubnAdmSet() and SubnAdmDelete() methods +with the GUIDInfoRecord attribute. This causes the SM to set the updated GUIDs in the specified port via the SM GUIDInfo attribute. + +SubnAdmSet() method is used to add additional port GUIDs. SubnAdmDelete() method is used to remove previously added additional +port GUIDs. + +o15-0.x.y: If SA supports additional port GUIDs, then both SubAdmSet(GUIDInfoRecord) and SubnAdmDelete(GUIDInfoRecord) are +supported. + +o15-0.x.y: If SA supports additional port GUIDs, the component mask for SubAdmSet(GUIDInfoRecord) and +SubnAdmDelete(GUIDInfoRecord) is required to include both LID and block number. If the component mask does not include both of +these, SA shall return an error status of ERR_REQ_INSUFFICIENT_COMPONENTS in its response to the corresponding method. + +o15-0.x.y: If SA supports additional port GUIDs, for a SubAdmSet(GUIDInfoRecord), the component mask indicates which GUID indices +are to be set. A GUID can either be added or replaced. A GUID of 0 indicates that the GUID for this index is to be assigned by +the SM. + +o15-0.x.y: If SA supports additional port GUIDs, for a SubAdmDelete(GUIDInfoRecord), the component mask indicates which GUID +indices are to be removed. + +o15-0.x.y: If SA supports additional port GUIDs, the SA shall return an error status of ERR_REQ_INVALID to any attempt to set +or delete block number 0 index 0. + + +RefID 4705 +p.899 line 26 15.2.4 +Table 188 SA-Specific Optional Capabilities + + +IsAdditionalGUIDsSupported | CM2 | 5 | If this value is 1, SA shall support the ability to add and remove additional port GUIDs + via SubAdmSet/SubnAdmDelete(GUIDInfoRecord) as described in + + + +RefID 4706 +p.904 line 21 15.2.5.1 +Table 192 Subnet Administration Attribute / Method Map + + + +b: SubAdmSet and SubAdmDelete of GUIDInfoRecord are supported if SA:ClassPortInfo.CapabilityMask2 indicates +IsAdditionalGUIDsSupported. + +RefID 4714 +Clarify GUID 0 in SA Set GUIDInfoRecord response +p.932 line 14 15.2.18 + + +o15-0.x.y: If SA supports additional port GUIDs, for a SubAdmSet(GUIDInfoRecord), the component mask indicates which GUID indices +are to be set. A GUID can either be added or replaced. A GUID of 0 indicates that the GUID for this index is to be assigned by +the SM. + + +o15-0.x.y: If SA supports additional port GUIDs, for a SubAdmSet(GUIDInfoRecord), the component mask indicates which GUID indices +are to be set. A GUID can either be added or replaced. In the request, a GUID of 0 indicates that the GUID for this index is to +be assigned by the SM. In the response, a GUID of 0 indicates that the GUID requested for this index was not accepted by the SA. + +RefID 4776 +SM GUIDInfo initialization + + +GUIDInfo Description + +The requirements for setting additional + +GUIDs are beyond the scope of the specification. + + + + + + +OpenSM SRIOV Configuration + +Some new options were added for SRIOV configuration of OpenSM. + +1. Allow both pkeys +-W or --allow_both_pkeys on the command line or +allow_both_pkeys TRUE +in the options file. Default is false. + +allow_both_pkeys indicates whether both full and +limited membership on the same partition is allowed or not. + +In order to support allow_both_pkeys, the partition file syntax is +extended with "both" flag (in addition to "full" and "limited"). + +defmember=full|limited|both +or +[PortGUID[=full|=limited|=both]] + +2. SM assigned GUID byte +sm_assigned_guid +in the options file. Default is 0. + +An SM assigned GUID byte is added as a configuration option +where an alias GUID is formed from OpenFabrics OUI +followed by 40 bits xy 00 ab cd ef where xy is the SM assigned guid byte +and ab cd ef is an SM autogenerated 24 bits. + +The SM assigned GUID byte should be configured as subnet unique. + +Also, the algorithm to obtain a "unique" SM assigned GUID is changing from +being based on a static monatomically incrementing counter for the SM +autogenerated part (like SA MCMemberRecord SM assigned MGIDs). +The number of retries to find an unused GUID is currently hardcoded at 1000. + +Note that it is not a current requirement to maintain SM assigned GUIDs across OpenSM +failover. Note also that on reregistration, a host may reregister the previously SM +assigned GUID. + + +Operational Notes + +Duplicated alias GUIDs are detected against alias and physical GUIDs and result in +rejection of such registrations. + +When a port is dropped, any alias GUID registrations are removed. These are +reregistered by client reregistration mechanism. The exception +to this is service registrations as these are not currently reregistered by +the ULPs/applications that use them. + +Futures + +1. An alias GUID enforcement feature to which physical ports are allowed to request which +alias GUIDs. diff --git a/doc/opensm_release_notes-3.1.10.txt b/doc/opensm_release_notes-3.1.10.txt new file mode 100644 index 0000000..2b6253d --- /dev/null +++ b/doc/opensm_release_notes-3.1.10.txt @@ -0,0 +1,492 @@ + OpenSM Release Notes 3.1.10 + ============================= + +Version: OpenFabrics Enterprise Distribution (OFED) 1.3 +Repo: git://git.openfabrics.org/~ofed_1_3/management.git (release) + git://git.openfabrics.org/~sashak/management.git (development) +Date: February 2008 + +1 Overview +---------- +This document describes the contents of the OpenSM OFED 1.3 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. The OpenSM version for this release +is openib-3.1.10 + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliance statements +4 Major Bug Fixes +5 Main Verification Flows +6 Qualified software stacks and devices + +1.1 Major New Features + +* QoS manager (experimental) + This QoS manager implementation is in accordance with IBA QoS Annex. + Highly configurable QoS Policy is parsed from OpenSM QoS policy file. + Valid QoS parameters will be reported in SA PathRecord and + MultiPathRecord. In addition simple QoS levels per ULPs configuration + is supported too. + +* Performance Manager + When enabled it collects a fabric port counters and able to log it or + to pass to external program via event plugin interface. It handles + counters overflow, supports LID/QP redirection and is able to work + when OpenSM is in master, standby, and inactive states. + +* Dimension Order routing (DOR) algorithm + DOR Unicast routing algorithm - based on the Min Hop algorithm, but + avoids port equalization except for redundant links between the + same two switches. This provides deadlock free routes for hypercubes + when the fabric is cabled as a hypercube and for meshes when cabled + as a mesh (see details in OpenSM man page). + +* Routing improvements + Speedup the current routing algorithms default MinHops, Up/Down and + LASH and lid matrix generation. Fat Tree routing engine is able to work + with not pure fat free topology. + +* Multiple IB routers support + OpenSM now able to keep configurable subnet prefix to router table. + SA will report path to this routers when SA PathRecord was issued with + non-local DGID. + +* Node map + This is possible to name nodes in this config file. Those names will be + used for logging and by QoS configuration. + +* PKey index support + Proper support for PKey index in GSI queries. + +* Incremental LFTs, PKey, SL2VL, and VLarbitration table updates + OpenSM will only fetch those tables in first heavy sweep and then + will maintain this internally. + +* Fast port and switch detector + When port and/or switch was externally reset and it was fast so sweep + doesn't find this device as disconnected OpenSM will detect this by + changed port states and handle accordingly. + +* Duplicated GUIDs/port moving detector + OpenSM will be able to detect port moving during a fabric discovery + and will not report duplicated GUIDs in this case. + +* Multicast rerouting speedup + Now OpenSM will calculate and setup multicast forwarding tables for + all altered multicast groups and not for each one. + +* Event plugin API + OpenSM allows to load dynamically various plugin modules. + +* Many generic improvements + +1.2 Minor New Features: + +* Daemon mode can be activated with -B option. + +* Support multiple scopes for IPoIB multicast groups in partition config. + +* Loopback connection handling + Loopback connection is not interpreted as duplicated GUID anymore. + +* Connect root nodes option for Up/Down routing engine. + When this option is specified Up/Down will create routing paths between + its root nodes. + +* Dump and log filenames changed from osm* to opensm*. + +* Support loopback console + Socket console with only local access. + +* Configurable config directory (the default value is /etc/opensm) and + configurable default values of OpenSM config filenames. + +* Add option for force SDR link speed + Add option to opensm.opts to force link speed. Currently, only forcing + to SDR link speed is supported. This option is not supported as a + command line option. + +* Better packaging + Building and RPM packaging were improved and simplified. + +* Handle "babbling" ports + When a babbling port (port which causes a frequent trap generation) is + detected, OpenSM will disable the port which should terminate the trap + storm. + +1.3 Library API Changes + + None + +1.4 Software Dependencies + +OpenSM depends on the installation of either OFED 1.3, OFED 1.2, OFED 1.1, +OFED 1.0, OpenIB gen2 (e.g. IBG2 distribution), OpenIB gen1 (e.g. IBGD +distribution), or Mellanox VAPI stacks. The qualified driver versions +are provided in Table 2, "Qualified IB Stacks". + +Also building of QoS manager policy file parser requires flex, and either +bison or byacc installed. + +1.5 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +are listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +3 Unsupported IB Compliance Statements +-------------------------------------- +The following section lists all the IB compliance statements which +OpenSM does not support. Please refer to the IB specification for detailed +information regarding each compliance statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key matches, or + silently drop the packet if M_Key does not match. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to 0, + except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-24.1.1.5 and C14-62.1.1.22 (Initialization): + GUIDInfo - SM should enable assigning Port GUIDInfo. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:P_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignores SL and FlowLabelTClass). + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Major Bug Fixes +----------------- + +The following is a list of bugs that were fixed. Note that other less critical +or visible bugs were also fixed. + +* osm_ucast_ftree.c: do load-leveling of non-CN routes + +* osm_ucast_ftree.c: ignore port 0 and loopbacks on switches + +* lash: fix possible segfault in osm_get_lash_sl() + +* osm_ucast_ftree.c: fixing coredump in fat-tree routing + +* osm_sa_slvl_record: fix overflow crash + +* Break multicast rerouting requests processing when heavy sweep is + scheduled. + +* updn: report fallback properly + +* Fix incorrect identification of routing engine used + +* Don't zero base LID when invalid value is received + +* lash: fix wrong allocation size + +* Fixing broken logic in 'process world' part of LinkRecord processing + +* Fix lmc_mask bit order in osm_sa_link_record.c + +* Adding missing comparison by to_lid/from_lid in LinkRecord processing + +* Broken logic when scanning subnet for PIR request + +* No interactive games in daemon mode + +* Fixing memory leak in node description + +* Fix PortInfo update issues for switch port 0 + +* Changed method_mask type in user_mad interface in accordance with + kernel ABI + +* Use umad_get_issm_path() in osm_vendor_set_sm() + +* Report message fix + +* Uninitialized variables usage fix + +* osm_ucast_ftree.c: Possible NULL ptr seg fault + +* osm_mcast_mgr.c: Possible NULL ptr seg fault + +* TrapRepress was failing for mkey != 0 + +* IB_PR_COMPMASK was used in MPR + +* Set hop limit when creating ipoib multicast groups + +* Fix outstanding mad counters tracking on the error paths. + +* Report new ports before handover mastership + +* Fix opvls and neighbormtu when remote port invalid. + +* Bug in coding trying to set vl_arb_high_limit when PortInfo.base_lid + was still zero. + +* Protect SMInfo response against port moving issue. + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a stand-alone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configurations. The regression includes + multiple OpenSM dedicated tests. +* cluster testing - when we run OpenSM to setup a large cluster, perform + hand-off, reboots and reconnects, verify routing correctness and SA + responsiveness at the ULP level (IPoIB and SDP). + +5.1 osmtest + +osmtest is an automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, link and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* GUIDInfo Record: + - All GUIDInfoRecords in subnet are obtained + +* MultiPathRecord: + - Perform some compliant and noncompliant MultiPathRecord requests + - Validation is via status in responses and IB analyzer + +* PKeyTableRecord: + - Perform some compliant and noncompliant PKeyTableRecord queries + - Validation is via status in responses and IB analyzer + +* LinearForwardingTableRecord: + - Perform some compliant and noncompliant LinearForwardingTableRecord queries + - Validation is via status in responses and IB analyzer + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnecting/connecting ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries, both single and RMPP and + check for the rate of responses as well as their validity. + + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limited to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are run on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed in bringing the + fabric up. The resulting routing is verified to be correct as well. + +* LID Manager: + Using LMC = 2 the fabric is initialized with LIDs. Faults such as + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure that all + LIDs that could possibly be maintained are kept, as well as that all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes randomly join the 0xc000 group and eventually the + resulting routing is verified for completeness and adherence to + Up/Down routing rules. + +* osmtest: + The complete osmtest flow as described in the previous table is run on + the simulated fabrics. + +* Stress Test: + This flow merges fabric, LID and stability issues with continuous + PathRecord, ServiceRecord and Multicast Join/Leave activity to + stress the SM/SA during continuous sweeps. InformInfo Set/Delete/Get + were added to the test such both existing and non existing nodes + perform them in random order. + +5.3 OpenSM Regression + +Using a back-to-back or single switch connection, the following set of +tests is run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomly + dropping SMP packets, used to test OpenSM adaptation to an unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verifies that it + handles them gracefully. + +* SA Query Test: This test exhaustively checks the SA responses to all + possible single component mask. To do that the test examines the + entire set of records the SA can provide, classifies them by their + field values and then selects every field (using component mask and a + value) and verifies that the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setups of 16 to 32 nodes (or more if a beta site +is available). Each test is validated by running all-to-all ping through the IB +interface. The test procedure includes: + +* Cluster bringup + +* Hand-off between 2 or 3 SM's while performing: + - Node reboots + - Switch power cycles (disconnecting the SM's) + +* Unresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualification +---------------- + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +-----------------------------------------|-------------------------- +OFED | 1.3 +OFED | 1.2 +OFED | 1.1 +OFED | 1.0 +OpenIB Gen2 (IBG2 distribution) | 1.0 +OpenIB Gen1 (IBGD distribution) | 1.8.0 +VAPI (Mellanox InfiniBand HCA Driver) | 3.2 and later + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Mellanox +Device | FW versions +------------------------------------|------------------------------- +InfiniScale | fw-43132 5.2.000 (and later) +InfiniScale III | fw-47396 0.5.000 (and later) +InfiniHost | fw-23108 3.5.000 (and later) +InfiniHost III Lx | fw-25204 1.2.000 (and later) +InfiniHost III Ex (InfiniHost Mode) | fw-25208 4.8.200 (and later) +InfiniHost III Ex (MemFree Mode) | fw-25218 5.3.000 (and later) +ConnectX IB | fw-25408 2.3.000 (and later) + +QLogic/PathScale +Device | Note +--------|----------------------------------------------------------- +iPath | QHT6040 (PathScale InfiniPath HT-460) +iPath | QHT6140 (PathScale InfiniPath HT-465) +iPath | QLE6140 (PathScale InfiniPath PE-880) +iPath | QLE7240 +iPath | QLE7280 + +Note 1: OpenSM does not run on an IBM Galaxy (eHCA) as it does not expose +QP0 and QP1. However, it does support it as a device on the subnet. + +Note 2: QoS firmware and Mellanox devices + +HCAs: QoS supported by ConnectX. The current FW release +doesn't support QoS. QoS-enabled FW release (2_5_000) is +planned for May. If someone wishes to get QoS-enabled FW +before the official release, they should contact Mellanox FAE. + +Switches: QoS supported by InfiniScale III +Any InfiniScale III FW that is supported by OpenSM supports QoS. diff --git a/doc/opensm_release_notes-3.1.11.txt b/doc/opensm_release_notes-3.1.11.txt new file mode 100644 index 0000000..5d8366c --- /dev/null +++ b/doc/opensm_release_notes-3.1.11.txt @@ -0,0 +1,492 @@ + OpenSM Release Notes 3.1.11 + ============================= + +Version: OpenFabrics Enterprise Distribution (OFED) 1.3 +Repo: git://git.openfabrics.org/~ofed_1_3/management.git (release) + git://git.openfabrics.org/~sashak/management.git (development) +Date: May 2008 + +1 Overview +---------- +This document describes the contents of the OpenSM OFED 1.3 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. The OpenSM version for this release +is openib-3.1.11 + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliance statements +4 Major Bug Fixes +5 Main Verification Flows +6 Qualified software stacks and devices + +1.1 Major New Features + +* QoS manager (experimental) + This QoS manager implementation is in accordance with IBA QoS Annex. + Highly configurable QoS Policy is parsed from OpenSM QoS policy file. + Valid QoS parameters will be reported in SA PathRecord and + MultiPathRecord. In addition simple QoS levels per ULPs configuration + is supported too. + +* Performance Manager + When enabled it collects a fabric port counters and able to log it or + to pass to external program via event plugin interface. It handles + counters overflow, supports LID/QP redirection and is able to work + when OpenSM is in master, standby, and inactive states. + +* Dimension Order routing (DOR) algorithm + DOR Unicast routing algorithm - based on the Min Hop algorithm, but + avoids port equalization except for redundant links between the + same two switches. This provides deadlock free routes for hypercubes + when the fabric is cabled as a hypercube and for meshes when cabled + as a mesh (see details in OpenSM man page). + +* Routing improvements + Speedup the current routing algorithms default MinHops, Up/Down and + LASH and lid matrix generation. Fat Tree routing engine is able to work + with not pure fat free topology. + +* Multiple IB routers support + OpenSM now able to keep configurable subnet prefix to router table. + SA will report path to this routers when SA PathRecord was issued with + non-local DGID. + +* Node map + This is possible to name nodes in this config file. Those names will be + used for logging and by QoS configuration. + +* PKey index support + Proper support for PKey index in GSI queries. + +* Incremental LFTs, PKey, SL2VL, and VLarbitration table updates + OpenSM will only fetch those tables in first heavy sweep and then + will maintain this internally. + +* Fast port and switch detector + When port and/or switch was externally reset and it was fast so sweep + doesn't find this device as disconnected OpenSM will detect this by + changed port states and handle accordingly. + +* Duplicated GUIDs/port moving detector + OpenSM will be able to detect port moving during a fabric discovery + and will not report duplicated GUIDs in this case. + +* Multicast rerouting speedup + Now OpenSM will calculate and setup multicast forwarding tables for + all altered multicast groups and not for each one. + +* Event plugin API + OpenSM allows to load dynamically various plugin modules. + +* Many generic improvements + +1.2 Minor New Features: + +* Daemon mode can be activated with -B option. + +* Support multiple scopes for IPoIB multicast groups in partition config. + +* Loopback connection handling + Loopback connection is not interpreted as duplicated GUID anymore. + +* Connect root nodes option for Up/Down routing engine. + When this option is specified Up/Down will create routing paths between + its root nodes. + +* Dump and log filenames changed from osm* to opensm*. + +* Support loopback console + Socket console with only local access. + +* Configurable config directory (the default value is /etc/opensm) and + configurable default values of OpenSM config filenames. + +* Add option for force SDR link speed + Add option to opensm.opts to force link speed. Currently, only forcing + to SDR link speed is supported. This option is not supported as a + command line option. + +* Better packaging + Building and RPM packaging were improved and simplified. + +* Handle "babbling" ports + When a babbling port (port which causes a frequent trap generation) is + detected, OpenSM will disable the port which should terminate the trap + storm. + +1.3 Library API Changes + + None + +1.4 Software Dependencies + +OpenSM depends on the installation of either OFED 1.3, OFED 1.2, OFED 1.1, +OFED 1.0, OpenIB gen2 (e.g. IBG2 distribution), OpenIB gen1 (e.g. IBGD +distribution), or Mellanox VAPI stacks. The qualified driver versions +are provided in Table 2, "Qualified IB Stacks". + +Also building of QoS manager policy file parser requires flex, and either +bison or byacc installed. + +1.5 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +are listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +3 Unsupported IB Compliance Statements +-------------------------------------- +The following section lists all the IB compliance statements which +OpenSM does not support. Please refer to the IB specification for detailed +information regarding each compliance statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key matches, or + silently drop the packet if M_Key does not match. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to 0, + except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-24.1.1.5 and C14-62.1.1.22 (Initialization): + GUIDInfo - SM should enable assigning Port GUIDInfo. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:P_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignores SL and FlowLabelTClass). + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Major Bug Fixes +----------------- + +The following is a list of bugs that were fixed. Note that other less critical +or visible bugs were also fixed. + +* osm_ucast_ftree.c: do load-leveling of non-CN routes + +* osm_ucast_ftree.c: ignore port 0 and loopbacks on switches + +* lash: fix possible segfault in osm_get_lash_sl() + +* osm_ucast_ftree.c: fixing coredump in fat-tree routing + +* osm_sa_slvl_record: fix overflow crash + +* Break multicast rerouting requests processing when heavy sweep is + scheduled. + +* updn: report fallback properly + +* Fix incorrect identification of routing engine used + +* Don't zero base LID when invalid value is received + +* lash: fix wrong allocation size + +* Fixing broken logic in 'process world' part of LinkRecord processing + +* Fix lmc_mask bit order in osm_sa_link_record.c + +* Adding missing comparison by to_lid/from_lid in LinkRecord processing + +* Broken logic when scanning subnet for PIR request + +* No interactive games in daemon mode + +* Fixing memory leak in node description + +* Fix PortInfo update issues for switch port 0 + +* Changed method_mask type in user_mad interface in accordance with + kernel ABI + +* Use umad_get_issm_path() in osm_vendor_set_sm() + +* Report message fix + +* Uninitialized variables usage fix + +* osm_ucast_ftree.c: Possible NULL ptr seg fault + +* osm_mcast_mgr.c: Possible NULL ptr seg fault + +* TrapRepress was failing for mkey != 0 + +* IB_PR_COMPMASK was used in MPR + +* Set hop limit when creating ipoib multicast groups + +* Fix outstanding mad counters tracking on the error paths. + +* Report new ports before handover mastership + +* Fix opvls and neighbormtu when remote port invalid. + +* Bug in coding trying to set vl_arb_high_limit when PortInfo.base_lid + was still zero. + +* Protect SMInfo response against port moving issue. + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a stand-alone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configurations. The regression includes + multiple OpenSM dedicated tests. +* cluster testing - when we run OpenSM to setup a large cluster, perform + hand-off, reboots and reconnects, verify routing correctness and SA + responsiveness at the ULP level (IPoIB and SDP). + +5.1 osmtest + +osmtest is an automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, link and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* GUIDInfo Record: + - All GUIDInfoRecords in subnet are obtained + +* MultiPathRecord: + - Perform some compliant and noncompliant MultiPathRecord requests + - Validation is via status in responses and IB analyzer + +* PKeyTableRecord: + - Perform some compliant and noncompliant PKeyTableRecord queries + - Validation is via status in responses and IB analyzer + +* LinearForwardingTableRecord: + - Perform some compliant and noncompliant LinearForwardingTableRecord queries + - Validation is via status in responses and IB analyzer + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnecting/connecting ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries, both single and RMPP and + check for the rate of responses as well as their validity. + + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limited to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are run on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed in bringing the + fabric up. The resulting routing is verified to be correct as well. + +* LID Manager: + Using LMC = 2 the fabric is initialized with LIDs. Faults such as + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure that all + LIDs that could possibly be maintained are kept, as well as that all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes randomly join the 0xc000 group and eventually the + resulting routing is verified for completeness and adherence to + Up/Down routing rules. + +* osmtest: + The complete osmtest flow as described in the previous table is run on + the simulated fabrics. + +* Stress Test: + This flow merges fabric, LID and stability issues with continuous + PathRecord, ServiceRecord and Multicast Join/Leave activity to + stress the SM/SA during continuous sweeps. InformInfo Set/Delete/Get + were added to the test such both existing and non existing nodes + perform them in random order. + +5.3 OpenSM Regression + +Using a back-to-back or single switch connection, the following set of +tests is run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomly + dropping SMP packets, used to test OpenSM adaptation to an unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verifies that it + handles them gracefully. + +* SA Query Test: This test exhaustively checks the SA responses to all + possible single component mask. To do that the test examines the + entire set of records the SA can provide, classifies them by their + field values and then selects every field (using component mask and a + value) and verifies that the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setups of 16 to 32 nodes (or more if a beta site +is available). Each test is validated by running all-to-all ping through the IB +interface. The test procedure includes: + +* Cluster bringup + +* Hand-off between 2 or 3 SM's while performing: + - Node reboots + - Switch power cycles (disconnecting the SM's) + +* Unresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualification +---------------- + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +-----------------------------------------|-------------------------- +OFED | 1.3 +OFED | 1.2 +OFED | 1.1 +OFED | 1.0 +OpenIB Gen2 (IBG2 distribution) | 1.0 +OpenIB Gen1 (IBGD distribution) | 1.8.0 +VAPI (Mellanox InfiniBand HCA Driver) | 3.2 and later + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Mellanox +Device | FW versions +------------------------------------|------------------------------- +InfiniScale | fw-43132 5.2.000 (and later) +InfiniScale III | fw-47396 0.5.000 (and later) +InfiniHost | fw-23108 3.5.000 (and later) +InfiniHost III Lx | fw-25204 1.2.000 (and later) +InfiniHost III Ex (InfiniHost Mode) | fw-25208 4.8.200 (and later) +InfiniHost III Ex (MemFree Mode) | fw-25218 5.3.000 (and later) +ConnectX IB | fw-25408 2.3.000 (and later) + +QLogic/PathScale +Device | Note +--------|----------------------------------------------------------- +iPath | QHT6040 (PathScale InfiniPath HT-460) +iPath | QHT6140 (PathScale InfiniPath HT-465) +iPath | QLE6140 (PathScale InfiniPath PE-880) +iPath | QLE7240 +iPath | QLE7280 + +Note 1: OpenSM does not run on an IBM Galaxy (eHCA) as it does not expose +QP0 and QP1. However, it does support it as a device on the subnet. + +Note 2: QoS firmware and Mellanox devices + +HCAs: QoS supported by ConnectX. The current FW release +doesn't support QoS. QoS-enabled FW release (2_5_000) is +planned for May. If someone wishes to get QoS-enabled FW +before the official release, they should contact Mellanox FAE. + +Switches: QoS supported by InfiniScale III +Any InfiniScale III FW that is supported by OpenSM supports QoS. diff --git a/doc/opensm_release_notes-3.2.txt b/doc/opensm_release_notes-3.2.txt new file mode 100644 index 0000000..5a83092 --- /dev/null +++ b/doc/opensm_release_notes-3.2.txt @@ -0,0 +1,618 @@ + OpenSM Release Notes 3.2 + ============================= + +Version: OpenSM 3.2.x +Repo: git://git.openfabrics.org/~sashak/management.git +Date: Dec 2008 + +1 Overview +---------- +This document describes the contents of the OpenSM 3.2 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. The OpenSM version for this release +is opensm-3.2.5 + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliance statements +4 Bug Fixes +5 Main Verification Flows +6 Qualified Software Stacks and Devices + +1.1 Major New Features + +* Cached Routing + OpenSM provides an optional unicast routing cache (enabled by '-A' or + '--ucast_cache' options). When enabled, unicast routing cache prevents + routing recalculation (which is a heavy task in a large cluster) when + there was no topology change detected during the heavy sweep, or when + the topology change does not require new routing calculation, e.g. when + one or more CAs/RTRs/leaf switches going down, or one or more of these + nodes coming back after being down. + +* Routing Chaining + Routing chaining is the ability to configure the order in which routing + algorithms are applied in opensm, i.e. '-R ftree,updn,minhop' - try + using ftree routing. If ftree fails, try updn. If updn fails, try + minhop. + +* IPv6 Solicited Node Multicast addresses consolidation + When this mode is used (enabled with --consolidate_ipv6_snm_req option) + OpenSM will map all IPv6 Solicited Node Multicast address join requests + into a single Multicast group with address ff10:601b::1:ff00:0. In this + way limited MLID space is saved. This IBA noncompliant feature is very + useful with large (~> 1024 nodes) clusters. + +* OpenSM sweep state machine rework + Huge and buggy OpenSM sweep state machine was fully rewritten in safer + and more effective synchronous manner. + +* Multi lid routing balancing for updn/minhop routing algorithms + When LMC > 0 is used OpenSM will ensure to generate routing paths via + different switches and when possible chassis. + +* Preserve base lid routes when LMC > 0 + When LMC > 0 is used OpenSM will preserve routing paths for base lids + as it would be with LMC = 0. In this way traffic on each LID level is + not affected by LMC changes. + +* Ordered routing paths balancing + This adds ability to predefine the port order in which routing paths + balancing is performed by OpenSM. Helps to improve performance + dramatically (40-50%) for applications with known communication + pattern. Activated with --guid_routing_order_file command line option. + +* Unified OpenSM configuration + Now there is "conventional" config file instead of hidden option cache + file (opensm.opts). OpenSM will find this in a default place (consult + man page for exact value) or the file name can be specified with '-F' + command line option. Also there is an option ('-c') to generate config + file template. + +* Query remote SMs during light sweep + Master OpenSM will query remote standby SMs periodically to catch its + possible state changes and react accordingly (as required by IBA spec). + +* Predefined port ids for Up/Down algorithm + This is useful as Up/Down fine tuning tool - the algorithm will use + predefined port IDs instead of GUIDs for its decision about direction. + Activated with --ids_guid_file command line option. + +* Improved plugin API version 2. + Now OpenSM will provide to plugins the access to all data structures. + This make it possible to implement powerful multi purpose plugins. All + OpenSM header files are installed now and specific configuration/build + options are exported via generated osm_config.h header file. + +* Many code improvements, optimizations and cleanups + +* Automatic daily snapshots generation. + This is is not a "feature", but simplifies the access to recent OpenSM + bits. + +1.2 Minor New Features: + +* Cleanup cl_qlock_pool memory allocator - speedup memory allocations + +* Support for configurable (via OSM_UMAD_MAX_PENDING environment variable) + size of pending MADs pool. + +* Set packet life time to subnet timeout option rather than default + +* Enforce routing paths rebalancing on switch reconnection + +* In Up/Down routing algorithm compare GUID values in host byte order + +* Add 'switchbalance' and 'lidbalance' commands for OpenSM console + +* Respond to new trap 144 node description update flag + +* Add '--connect_roots' command line options. This preserves connectivity + between root nodes in Up/Down routing algorithm + +* Setting SL in the IPoIB MCast groups in accordance with QoS policy + +* Dump auto detected root node guids in Up/Down routing algorithm + +* Unify OpenSM dumpers code + +* Unify various guid files parsers - add generic nodenamemap style parser + +* When root node guids were provided in file update the list on each + Up/Down run + +* During ./configure show values of configuration dirs and files + +* Make prefix routes config file name configurable + +* Add a Performance Manager HOWTO to the docs and the dist + +* Support separate SA and SM keys as clarified in IBA 1.2.1 + +* Remove AM_MAINTAINER_MODE in ./configure + +* Make vendor type OSM_VENDOR_INTF_OPENIB (libibumad) to be default + +* Build osm_perfmgr_db.* content only when PerfMgr is enabled. + +* Move PerfMgr event_db_dump_file to common OpenSM dump dir + +* Allow space separated strings as values in OpenSM config + +* Support for multiple event plugins + +* Add '--version' command line option + +* Add '--create-config ' command line option + +* Speedup and simplify logging code + +* Speedup multicast processing in SA DB + +* In log messages convert unicast LIDs from hex to decimal format and + GIDs from hex to IPv6 address format + +* Handle all possible ports in "ignore-guids" file + +* Add 'reroute' console command + +* Remove many install-exec-hook from Makefiles + +* Some cleanups in LASH routing algorithm code + +* In Makefiles remove -rpath and explicit -lpthread, -ldl from LDFLAGS + (move to configurator) + +* Install all OpenSM header files + +* Improve locking in SM Info receiver + +* Add new OSM_EVENT_ID_SUBNET_UP event for plugins + +* Redo lex and yacc files generation in conventional way + +* Add a missing Node Description check on light sweep. + +* Move vendor specific compilation defines from command to generated + config.h file + +* Provide useful error message when log file opening fails + +* Add generated osm_config.h file with OpenSM specific defines + +* Display port number in decimal in log messages + +* Replace osm_vendor_select.h by generated osm_config.h + +* Unify options listing in OpenSM usage message + +* LFT buffers handling simplification + +* Add 'dump_conf' console command + +* OpenSM performs sweep on SIGCONT (coming out of suspend). + +* When our SM is in Standby state and its priority is increased + (via console command), notify master SM by sending Trap 144. + +* When entering standby state (after discovery) notify master SM + with Trap 144. + +* support more PortInfo:CapabilityMask bits + +* When babbling port policy is on disable the port with the least hop + count. + +1.3 Library API Changes + + None + +1.4 Software Dependencies + +OpenSM depends on the installation of either OFED 1.x, OpenIB gen2 (e.g. +IBG2 distribution), OpenIB gen1 (e.g. IBGD distribution), or Mellanox +VAPI stacks. The qualified driver versions are provided in Table 2, +"Qualified IB Stacks". + +Also, building of QoS manager policy file parser requires flex, and either +bison or byacc installed. + +1.5 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +are listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +3 Unsupported IB Compliance Statements +-------------------------------------- +The following section lists all the IB compliance statements which +OpenSM does not support. Please refer to the IB specification for detailed +information regarding each compliance statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key matches, or + silently drop the packet if M_Key does not match. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to 0, + except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-24.1.1.5 and C14-62.1.1.22 (Initialization): + GUIDInfo - SM should enable assigning Port GUIDInfo. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:M_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignores SL and FlowLabelTClass). + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Bug Fixes +----------- + +4.1 Major Bug Fixes + +* Set SA attribute offset to 0 when no records are returned + +* Send trap 64 only after new ports are in ACTIVE state. + +* Fix in sending client reregistration bit + +* Fix default OpenSM SM (and SA) Key byte order + +* Fix in sending Multicast groups creation/deletion notification (Traps + 66,67) + +* Don't startup automatically on SuSE based systems + +4.2 Other Bug Fixes + +* opensm/osm_console.c: fix seg fault when running "portstatus ca" in + the console + +* opensm: fix potential core dumps where osm_node_get_physp_ptr can + return NULL + +* opensm/osm_mcast_mgr: limit spanning tree creation recursion to value + of max hops (64) + +* opensm: switch LFTs incremental update fix + +* opensm/osm_state_mgr.c: fix segmentation fault + +* opensm: eliminate some potential NULL pointer dereferences + +* opensm/osm_console.c: fix guid parsing + +* opensm: fix off by 1 issue with max_lid and max_multicat_lid_ho + +* opensm: fix potentially wrong port_guid initialization + +* opensm/configure.in: fix wrong HAVE_DEFAULT_OPENSM_CONFIG_FILE define + generation + +* opensm: fix snprintf() usage + +* opensm/osm_sa_lft_record: validate LFT block number + +* opensm/osm_sa_lft_record: pass block parameter in host byte order + +* opensm/include/Makefile.am: don't duplicate header files in EXTRA_DIST + +* opensm/osm_sa_class_port_info.c: fix over bound array access + +* osmtest/osmt_service.c: fix over bound array access + +* osmtest: fix qpn encoding in osmtest_informinfo_request() + +* opensm/osm_vendor_mlx_sa.c: handling attribute offset of 0 + +* opensm: fix segfault corner case when osm_console_init fails + +* opensm/console: close console socket on cleanup path + +* opensm/osm_ucast_lash: fix buffer overflow + +* opensm: fix broken IPv6 SNM consolidation code + +* opensm/osm_sa_lft_record.c: fix block number encoding byte order + +* opensm/osm_sa: fix memory leak in SA responder + +* opensm/osm_mcast_mgr: fix memory leak + +* opensm: fix qos config parsing bugs + +* opensm/osm_mcast_tbl.c: fix sending invalid MF block due to max mlid + overflow + +* opensm: log_max_size config parameter in MB + +* opensm/osm_ucast_lash: fix extra memory allocations + +* opensm: fix race in main OpenSM flow + +* opensm/ftree: fix GUID check against cn_guid_file + +* opensm/ftree: save FLT buffers memory allocations + +* opensm/osm_sa_link_record.c: prevent potential endless recursion + +* opensm: remove SM from sm_guid_tbl when IsSM port capability flag is + not set + +* opensm: fix QoS config bug + +* opensm: don't reassign zeroed params from config file + +* Other less critical or visible bugs were also fixed. + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a stand-alone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configurations. The regression includes + multiple OpenSM dedicated tests. +* cluster testing - when we run OpenSM to setup a large cluster, perform + hand-off, reboots and reconnects, verify routing correctness and SA + responsiveness at the ULP level (IPoIB and SDP). + +5.1 osmtest + +osmtest is an automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, link and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* GUIDInfo Record: + - All GUIDInfoRecords in subnet are obtained + +* MultiPathRecord: + - Perform some compliant and noncompliant MultiPathRecord requests + - Validation is via status in responses and IB analyzer + +* PKeyTableRecord: + - Perform some compliant and noncompliant PKeyTableRecord queries + - Validation is via status in responses and IB analyzer + +* LinearForwardingTableRecord: + - Perform some compliant and noncompliant LinearForwardingTableRecord queries + - Validation is via status in responses and IB analyzer + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnecting/connecting ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries, both single and RMPP and + check for the rate of responses as well as their validity. + + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limited to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are run on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed in bringing the + fabric up. The resulting routing is verified to be correct as well. + +* LID Manager: + Using LMC = 2 the fabric is initialized with LIDs. Faults such as + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure that all + LIDs that could possibly be maintained are kept, as well as that all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes randomly join the 0xc000 group and eventually the + resulting routing is verified for completeness and adherence to + Up/Down routing rules. + +* osmtest: + The complete osmtest flow as described in the previous table is run on + the simulated fabrics. + +* Stress Test: + This flow merges fabric, LID and stability issues with continuous + PathRecord, ServiceRecord and Multicast Join/Leave activity to + stress the SM/SA during continuous sweeps. InformInfo Set/Delete/Get + were added to the test such both existing and non existing nodes + perform them in random order. + +5.3 OpenSM Regression + +Using a back-to-back or single switch connection, the following set of +tests is run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomly + dropping SMP packets, used to test OpenSM adaptation to an unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verifies that it + handles them gracefully. + +* SA Query Test: This test exhaustively checks the SA responses to all + possible single component mask. To do that the test examines the + entire set of records the SA can provide, classifies them by their + field values and then selects every field (using component mask and a + value) and verifies that the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setups of 16 to 32 nodes (or more if a beta site +is available). Each test is validated by running all-to-all ping through the IB +interface. The test procedure includes: + +* Cluster bringup + +* Hand-off between 2 or 3 SM's while performing: + - Node reboots + - Switch power cycles (disconnecting the SM's) + +* Unresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualified Software Stacks and Devices +--------------------------------------- + +OpenSM Compatibility +-------------------- +Note that OpenSM version 3.2.1 and earlier used a value of 1 in host +byte order for the default SM_Key, so there is a compatibility issue +with these earlier versions of OpenSM when the 3.2.2 or later version +is running on a little endian machine. This affects SM handover as well +as SA queries (saquery tool in infiniband-diags). + + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +-----------------------------------------|-------------------------- +OFED | 1.4 +OFED | 1.3 +OFED | 1.2 +OFED | 1.1 +OFED | 1.0 +OpenIB Gen2 (IBG2 distribution) | 1.0 +OpenIB Gen1 (IBGD distribution) | 1.8.0 +VAPI (Mellanox InfiniBand HCA Driver) | 3.2 and later + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Mellanox +Device | FW versions +------------------------------------|------------------------------- +InfiniScale | fw-43132 5.2.000 (and later) +InfiniScale III | fw-47396 0.5.000 (and later) +InfiniScale IV | fw-48436 7.1.000 (and later) +InfiniHost | fw-23108 3.5.000 (and later) +InfiniHost III Lx | fw-25204 1.2.000 (and later) +InfiniHost III Ex (InfiniHost Mode) | fw-25208 4.8.200 (and later) +InfiniHost III Ex (MemFree Mode) | fw-25218 5.3.000 (and later) +ConnectX IB | fw-25408 2.3.000 (and later) + +QLogic/PathScale +Device | Note +--------|----------------------------------------------------------- +iPath | QHT6040 (PathScale InfiniPath HT-460) +iPath | QHT6140 (PathScale InfiniPath HT-465) +iPath | QLE6140 (PathScale InfiniPath PE-880) +iPath | QLE7240 +iPath | QLE7280 + +Note 1: OpenSM does not run on an IBM Galaxy (eHCA) as it does not expose +QP0 and QP1. However, it does support it as a device on the subnet. + +Note 2: QoS firmware and Mellanox devices + +HCAs: QoS supported by ConnectX. QoS-enabled FW release is 2_5_000 and +later. + +Switches: QoS supported by InfiniScale III +Any InfiniScale III FW that is supported by OpenSM supports QoS. diff --git a/doc/opensm_release_notes-3.3.txt b/doc/opensm_release_notes-3.3.txt new file mode 100644 index 0000000..e494f61 --- /dev/null +++ b/doc/opensm_release_notes-3.3.txt @@ -0,0 +1,2077 @@ + OpenSM Release Notes 3.3 + ============================= + +Version: OpenSM 3.3.x +Repo: git://github.com/linux-rdma/opensm +Date: April 2019 + +1 Overview +---------- +This document describes the contents of the OpenSM 3.3 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. The OpenSM version for this release +is opensm-3.3.23. + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliance statements +4 Bug Fixes +5 Main Verification Flows +6 Qualified Software Stacks and Devices + +1.1 Major New Features + +* Support for HDR link speed and 2x link width + IBTA has recently (beyond IBA 1.3) added support for HDR link speed + and 2x link width. + +* Nue routing algorithm. + The Nue routing is a novel topology-agnostic routing approach which + implicitly avoids deadlocks during the path calculation instead of + solving both problems separately. Nue routing heuristically optimizes + the load balancing, i.e., the number of routes per link, while + enforcing deadlock-freedom without exceeding a given number of + virtual lanes (VLs). Our Nue implementation for the InfiniBand subnet + manager supports any number of virtual lanes. + +* Support for ignoring throttled links with DFSSSP. + Throttled links, either because they are faulty or throttled intentionally, + aren't good for the achievable performance of an HPC system. + During the discovery process, (DF)SSSP analyzes the theoretical maximum + enabled link speed, which both ends of a peer link support and compares it + to the actual link speed. If these numbers don't match, then the link is + ignored as potential path for routes between nodes. A similar comparison + is done for link width. + +* Support for long transaction timeout for SM class transactions. + Currently, it is only used for optimized SL2VLMappingTable operations. + Option is long_transaction_timeout with default of 500 msec. + +* Mesh Analysis for LASH routing algorithm. + The performance of LASH can be improved by preconditioning the mesh in + cases where there are multiple links connecting switches and also in + cases where the switches are not cabled consistently. + Activated with --do_mesh_analysis command line and config file option. + +* Reloadable OpenSM configuration (preliminary implementation) + It is possible now to reload OpenSM configuration parameters on the + fly without restarting. + +* Routing paths sorted balancing (for UpDown and MinHops) + This sorts the port order in which routing paths balancing is performed + by OpenSM. Helps to improve performance dramatically (40-50%) for most + popular application communication patterns. + To overwrite this behavior use --guid_routing_order_file command line + option. + +* Weighted Lid Matrices calculation (for UpDown, MinHop and DOR). + This low level routing fine-tuning feature provides the means to + define a weighting factor per port for customizing the least weight + hops for the routing. Custom weights are provided using file specified + with '--hop_weights_file' command line option. + +* I/O nodes connectivity (for FatTree). + This provides possibility to define the set of I/O nodes for the + Fat-Tree routing algorithm. I/O nodes are non-CN nodes allowed to use + up to N (specified using --max_reverse_hops) switches the wrong way + around to improve connectivity. I/O nodes list is provided using file + and --io_guid_file command line option. + +* MGID to MLID compression - infrastructure for many MGIDs to single MLID + compression. This becomes helpful when number of multicast groups + exceeds subnet's MLID routing capability (normally 1024 groups). In such + cases many multicast groups (MGID) can be routed using same MLID value. + +* Torus-2QoS unicast routing algorithm - a DOR-based routing algorithm + specialized for 2D/3D torus topologies. Torus-2QoS provides deadlock-free + routing while supporting two quality of service (QoS) levels. In addition + it is able to route around multiple failed fabric links or a single failed + fabric switch without introducing deadlocks, and without changing path SL + values granted before the failure. + +* DNUP Unicast routing algorithm - similar to UPDN but allows routing in + fabrics which have some CA nodes attached closer to the roots than some + switch nodes. + +* SSSP Unicast routing algorithm - SSSP unicast routing algorithm - a + single-source-shortest-path routing algorithm, which globally balances the + number of routes per link to optimize link utilization. This routing + algorithm has no restrictions in terms of the underlying topology. + +* DFSSSP unicast routing algorithm - a deadlock-free single-source- + shortest-path routing, which uses the SSSP algorithm as the base to optimize + link utilization and uses Infiniband virtual lanes (SL) to provide deadlock- + freedom. + +* SRIOV (Alias GUID) Support - In order to support virtualized environments, + alias GUID support is added to OpenSM. This support allows an SA client to + add and remove additional port GUIDs based on SubAdmSet/SubAdmDelete of + GUIDInfoRecord. + +* Extended speed support + This provides support for FDR and EDR speeds. + +* Congestion control support (experimental) + +* Many code improvements, optimizations and cleanups. + +* Windows support (early stage). + +1.2 Minor New Features: + +924f030 Add support for registering an opensm plugin as a new routing engine +82c3ea6 Add '--subnet_prefix' and '--dump_files_dir' options +dfc383e osm_port_info_rcv.c: Optimize PKEY sending during heavy sweep +6abbbcd osm_subnet.c: Add latest Bull device IDs to device white lists +6a24bcd osm_subnet.c: Add additional device ID to is_mlnx_ext_port_info_supported +0ab8d12 Add force_link_width option +3f28045 Handle other 2x and/or HDR not supported SA rate cases +5b56f06 osm_subnet.c: Add additional device IDs to is_mlnx_ext_port_info_supported +f92b33c Add support for additional Mellanox OUIs +b8e2a5b Add option and support for only using the original extended SA rates +deb194f osm_subnet.c: Add additional ConnectX-5 device ID to is_mlnx_ext_port_info_supported +e4e95b3 dfsssp: Replace the internal heap implementation +0dc7b6d Add support for send only full member multicast joins +f8344d0 osm_[base.h helper.c]: Add new Mellanox OUI 0xec0d9a +48457af Add initial policy for long transaction timeout +0b4e9a6 Add timeout parameter for SM class get transactions +2acf61f ib_types.h: Add new rates to return values for ib_[multi]path_rec_rate +f48d4c7 ib_types.h: Add CapabiltyMask2 bit definition for CPI CapabilityMask +012ff1d osm_helper.c: Add support for dumping PortInfo:CapabilityMask2 +b937054 ib_types.h: Add additional PortInfo:CapabilityMask2 definitions +ab8b977 [ib_types.h, osm_helper.c]: Change IB_PORT_CAP_RESV13 to IB_PORT_CAP_HAS_CABLE_INFO +771c101 ib_types.h: Add IsPMKeySupported ClassPortInfo CapabilityMask2 bit +ab0671c ib_types.h: Add additional optional counters to PortCountersExtended +abe9803 ib_types.h: Add optional QP1Dropped counter to PortCounters attribute +566222b osm_subnet.c: Add Connect-X5 support to is_mlnx_ext_port_info_supported +f85c48f Add Bull device IDs to device white lists +6531f0b osm_subnet.c: Add support for Bull device ID to is_mlnx_ext_port_info_supported +f30ebc4 osm_[helper.c base.h]: Add support for Bull OUI +fbe5107 osm_subnet.c: Add support for Switch-IB2 in is_mlnx_ext_port_info_supported +6eb8b96 ftree: Additional ftree indexing algorithm +c1f7ffd Support configuration for index 0 in pkey table +1b73efd osm_sa_mcmember_record.c: Conditionalize multicast join parameter +validation on new mcgroup_join_validation option +4cd7ec3 osm_sa_mcmember_record.c: Validate IPoIB non broadcast group +parameters on group creation +fd754d2 Support another new MLNX OUI +66301d8 osm_subnet.c: Add more supported device IDs +948e2b1 Add separate dispatcher for SA set and delete requests +672de65 perfmgr: Add xmit_wait to event plugin error counters data structure +d86e7e3 perfmgr: Add xmit_wait support +7e6bdef opensm/man/opensm.8.in: Add section for MKey support +aebe678 opensm: Add support for multicast service records +5509234 opensm/scripts/sldd.sh: Update to support guid2mkey/neighbors +2ae1477 opensm: Ensure sweep interval/mkey lease are sensibly set +cefe79b opensm: Check for valid mkey protection level in config file +8fa0d2c opensm: Add neighboring link cache file +5088d08 opensm: Log errors on SubnGet timeouts +9eed9c6 opensm: Add support for setting mkey protection levels +5c4157d opensm: Add locking where necessary around osm_req_* +2f74f34 opensm: Allow recovery of subnets with misset mkeys +e5dc557 opensm: Add guid2mkey cache file support +3659b37 opensm/osm_sa_class_port_info.c: Indicate support for PortInfo CapMask2 + matching in SA ClassPortInfo:CapabilityMask2 +8016d3b opensm/osm_base.h: Add some SA ClassPortInfo CapabilityMask2 bits +1a31c44 opensm/osm_perfmgr.c: Use non conflicting error codes in log messages +03a75d0 opensm/osm_sa_path_record.c: Restore osm_get_path_params functionality +a9340cf opensm: Support (null) being specified for per_module_logging_file + option +00375aa opensm/osm_perfmgr.c: Eliminate compile warning +7868c98 opensm: Remove unused per_module_logging option +49c460b Call drop manager before checking for other Master SM in the fabric +c0604f3 Increase p_port->discovery_count only when received PortInfo for port 0 + of the switch +24b30d2 opensm/osm_node_info_rcv.c: Handle non-compliant SMA gracefully +a4f2689 opensm/osm_vendor_ibumad: Add management class into match criteria +4be6375 opensm/osm_sa*.c: Log requester port GUID at DEBUG level +4cca51d opensm/osm_sa_mcmember_record.c: Log requester port GUID at DEBUG level +15b3eae opensm/osm_sa_path_record.c: Log requester port GUID at DEBUG level +0b580ca opensm/osm_sa_path_record.c: Add debug logging to + pr_match_mgrp_attributes +ed4b7fb opensm/osm_sa_mcmember_record.c: In mcmr_rcv_join_mgrp, add MGID to log + message +872dae4 opensm/osm_sa_mcmember_record.c: Dump MCMemberRecord in mcmr_query_mgrp +71f2ce7 Sending SL2VL and VLARB SET MADs in distributed manner +f07bcc1 opensm/osm_subnet.c: Cosmetic formatting change +c823a5b opensm/osm_link_mgr.c: Set PortInfo:PortState to LinkDown when remote + port isn't accessible +ab88df6 Add support to reread configuration file when stacked in rediscovery + loop. +67c9bae opensm: Move per_mod_log_tbl array from subn to log structure +2cbd9f5 opensm/cl_atomic_osd.h: Cosmetic formatting change +cd63dec opensm/osm_helper.c: Add CapabilityMask2 to notice dump for trap 144 +9205812 opensm/ib_types.h: Add CapabilityMask2 to notice for trap 144 +25de706 opensm/osm_sa_path_record.c: Add missing end-of-line in the log message +3551530 opensm/osm_trap_rcv.c: Remove vestigial comment +067d217 opensm/osm_inform.c: Make log message format consistent for error + messages +759b82a opensm/osm_trap_rcv.c: Add better logging for traps 257 and 258 +b806657 opensm/osm_sa_mad_ctrl.c: Eliminate commented out code line +a851693 opensm/perfmgr: add logging of error counters +52fa659 opensm/console: add perfmgr "print_errors" (pe) console command. +a832ce2 opensm/console: Add human readable output for perfmgr data counters +77a1756 opensm/console: add abreviations for perfmgr commands +aca9a07 opensm/console; add port option to perfmgr print_counters +00628cc opensm/console: add "print all" to print_counters console command +f5de9b5 opensm: perfmgr mark inactive nodes in perfmgr db +ff06340 opensm: perfmgr delete "inactive" nodes from the DB +059d8f4 opensm/console: protect against 0 entered for the perfmgr sweep_time +cc86607 opensm/perfmgr: Add config option to ignore Channel Adapters. +91f0c00 opensm/osm_node_info_rcv.c: In ni_rcv_process_existing_ca_or_router, + handle error +c930a23 opensm/osm_subnet.c: Indicate lmc and lmc_esp0 are not changeable + "on the fly" +7825e67 opensm/libopensm.map: Removed unimplemented routine +7de7b04 opensm/main.c: Handle daemon mode with guid specified as 0 more + gracefully +9478fbf opensm/osm_subnet.c: Support MLNX ExtendedPortInfo for ConnectIB device +f8fc334 opensm/osm_node_info_rec.c: Also handle non compliant SMA in + ni_rcv_process_existing +b4a481d opensm: Eliminate unneeded field in DR path structure +01bc8c9 opensm/osm_state_mgr.c: Force subn->need_update when coming out of + STANDBY +276be8b opensm: Dump info functions update for per module logging +64b512a opensm/osm_vendor_ibumad.c: Make binding log message clearer +e70c8c1 opensm: Add enum for FILE_ID for per module logging +2a2db8c opensm: Add per module logging support +e21b106 opensm: Cosmetic changes +3ddb2e3 opensm: Add partition manager configuration doc to docs +9586649 opensm/opensm_release_notes-3.3.txt: Update Unsupported IB Compliance + Statements +826b5c4 opensm/osm_ucast_dfsssp.c: Use osm_log_is_active +b2cad9d opensm/complib/cl_fleximap.h: Cosmetic changes +322a310 opensm/osm_ucast_ftree.c: Add a couple of asserts +e3a946d opensm: Add FDR10 support +6cea3df opensm/osm_sa_portinfo_record.c: Add SA PortInfoRecord support + for CapabilityMask2 matching +9ac7eeb opensm: Add infrastructure support for CapabilityMask2 field in + PortInfo +8bc7c30 opensm: make loopback console compile on by default. +566b462 opensm/ib_types.h: Update SA PortInfoRecord component masks +480de4a opensm: Use forward extensible and safer way to compare mkey_lmc + field in PortInfo attribute +22ca966 opensm: enable perfmgr build by default +4250c59 opensm: perfmgr only run sweep timer when enabled +1810672 Support scatter ports +dd21107 Support port shifting +1c2a298 OpenSM torus routing order list +b92d21f opensm: Create all directories in database path on Windows +83b6752 opensm/osm_subnet.c: In osm_subn_destroy, delete service records +0fc8124 opensm: Add OSM_VENDOR_ID_OPENIB support +c0d8b56 opensm/osm_sa_node_record.c: In nr_rcv_create_nr, only set some + variables when needed +9acaba0 opensm/osm_helper.c: Add some missing fields to + osm_dump_portinfo_record +8a43aea Allow comma in plugin names parsing +cdf227c opensm: Proper mfttop initialization when starting/restarting +cde0c0d opensm: Convert remaining helper routines for GID printing format +bc5743c opensm: Add support for MaxCreditHint and LinkRoundTripLatency to + osm_dump_port_info +6cd34ab opensm: Add Dell to known vendor list +003d6bd opensm: Add more info for traps 144 and 256-259 in osm_dump_notice +5b0c5de opensm/osm_ucat_ftree.c Enhance min hops counters usage +0715b92 ib_types.h: Add ib_switch_info_get_state_opt_sl2vlmapping routine +2ddba79 opensm: Remove some __ and __osm_ prefixes +ea0691f opensm/iba/ib_types.h: Add PortXmit/RcvDataSL PerfMgt attributes +9c79be5 ib_types.h: Adding BKEY violation trap (259) +c608ea6 opensm: Add and utilize ib_gid_is_notzero routine +b639e64 opensm: Handle trap repress on trap 144 generation +b034205 Add pkey table support to osm_get_all_port_attr +876605b opensm/ib_types.h: Add attribute ID for PortCountersExtended +aae3bbc opensm: PortInfo requests for discovered switches +0147b09 opensm/osm_lid_mgr: use single array for used_lids +a9225b0 opensm/Makefile.am: remove osm_build_id.h junk file generation +8e3a57d opensm/osm_console.c: Add list of SMs to status command +3d664b9 opensm/osm_console.c : Added dump_portguid function to console to + generate a list of port guids matching one or more regexps +85b35bc opensm/osm_helper.c: print port number as decimal +8674cb7 opensm: sort port order for routing by switch loads +80c0d48 opensm: rescan config file even in standby +8b7aa5e opensm/osm_subnet.c enable log_max_size opt update +8558ee5 opensm/include/iba/ib_types.h: Add xmit_wait for PortCounters +ecde2f7 opensm/osm_subnet.c support subnet configuration rescan and update +58c45e4 opensm/osm_log.c save log_max_size in subnet opt in MB +cf88e93 opensm: Add new partition keyword for all hca, switches and routers +4bfd4e0 opensm: remove libibcommon build dependencies +3718fc4 opensm/event_plugin: link opensm with -rdynamic flag +587ce14 opensm/osm_inform.c report IB traps to plugin +ced5a6e opensm/opensm/osm_console.c: move reporting of plugins to "status" + command. +696aca2 opensm: Add configurable retries for transactions +0d932ff opensm/osm_sa_mcmember_record.c: optimization in zero mgid comparison +254c2ef opensm/osm_sm_mad_ctrl.c: In sm_mad_ctrl_send_err_cb, set init + failure on PKeyTable and QoS initialization failure +83bd10a opensm: Reduce heap consumption by multicast routing tables (MFTs) +cd33bc5 opensm: Add some additional HP vendor IDs/OUIs +f78ec3a opensm/osm_mcast_tbl.(h c): Make max_mlid_ho be maximum MLID configured +2d13530 opensm: Add infrastructure support for PortInfo + IsMulticastPkeyTrapSuppressionSupported +3ace760 opensm: Reduce heap consumption by unicast routing tables (LFTs) +eec568e osmtest: Add SA get PathRecord stress test +aabc476 opensm: Add infrastructure support for more newly allocated PortInfo + CapabilityMask bits +c83c331 opensm: improve multicast re-routing requests processing +46db92f opensm: Parallelize (Stripe) MFT sets across switches +00c6a6e opensm: Parallelize (Stripe) LFT sets across switches +e21c651 opensm/osm_base.h: Add new SA ClassPortInfo:CapabilityMask2 bit + allocations +09056b1 opensm/ib_types.h: Add CounterSelect2 field to PortCounters attribute +6a63003 opensm: Add ability to configure SMSL +25f071f opensm/lash: Set minimum VL for LASH to use +622d853 opensm/osm_ucast_ftree.cd: Added support for same level links +8146ba7 opensm: Add new Sun vendor ID +1d7dd18 opensm/osm_ucast_ftree.c: Enhanced Fat-Tree algorithm +e07a2f1 Add LMC support to DOR routing +1acfe8a opensm: Add SuperMicro to list of recognized vendors +f02f40e opensm: implement 'connect_roots' option in fat-tree routing +748d41e opensm SA DB dump/restore: added option to dump SA DB on every sweep +b03a95e complib/cl_fleximap: add cl_fmap_match() function +b7a8a87 opensm/include/iba/ib_types.h: adding Congestion Control definitions +fa356f8 opensm: Add support for optimized SLtoVLMappingTable programming +8aaae91 Dimension port order file support +7662eec opensm: Add option to specify prefix to syslog messages +2382cf3 opensm: Add update_desc command to opensm console +7cbe193 opensm: toggle sweeping V3 +6f61d8f opensm/osmeventplugin: added new events to monitor SM +84cf603 opensm/main.c: force stdout to be line-buffered +b3bb0ab opensm/osm_dump.c: Dump SL2VL tables if routing engine might have + modified them +8a08719 opensm/osm_dump.c: dump SL2VL tables in debug verbosity level when + QoS is on +fc908c9 opensm/osm_sa_multipath_record.c: Add mtu validation if supplied +687e1f8 opensm/osm_sa_mcmember_record.c: Add mtu validation if supplied +76f5b09 opensm/osm_sa_path_record.c: Add mtu validation if supplied +9f38fae opensm/osm_helper: Add ib_mtu_is_valid +655230b opensm/osm_sa_multipath_record.c: Add rate validation if supplied +cb1484d opensm/osm_sa_mcmember_record.c: Add rate validation if supplied +485d068 opensm/osm_sa_path_record.c: Add rate validation if supplied +300f4d9 opensm/osm_helper: Add ib_rate_is_valid +9b50961 opensm: Change osm_routing_engine struct to not use C++ reserved word +c9c0aa6 opensm/man/torus-2QoS.conf.5.in: Update portgroup_max_ports section +3c97f06 opensm: Add the precreation of multicast groups +a9b9f09 opensm/osm_sminfo_rcv.c: Handle SMP status +1190c15 opensm/osm_switch.c: In osm_switch_set_hops, return, error when port_num is invalid +3d149db Changed sl_path API to include slid and dlid only +6cfb0eb Optimized and deadlock-free routing algorithm for InfiniBand +45f93ec opensm: Add additional IBM vendor ID/OUI +c386eb2 opensm/osm_state_mgr.c: Cosmetic change to log message +703e596 opensm: Add support for partition enforcement types to accomodate IBA extended link speeds +a2a03a8 Check block_num validity in set_guidinfo() and del_guidinfo() requests +e6ec61f end error resoponse to invalid LID in GUIDInfo request +7fce500 opensm/Makefile.am: Add doc/opensm-sriov.txt to docs +264aeb1 opensm: Add documentation for SRIOV support +c639832 opensm: Enhance osm_physp_share_this_pkey for allow_both_pkeys policy +b17b63c opensm: When allowing both pkeys, on a switch external, (peer) port eliminate limited pkey when full pkey with same base is present +a758da2 opensm: Add command line option for allow_both_pkeys +f412de3 opensm: Update partition documentation and man page for (allowing) both (limited and full) memberships in the same partition +726ce6a Support allowing both full and limited members of same partition +4ccf32f opensm/PKeyMgr: Support pkey index reuse when there are no longer any previously unused indices available +eb375a6 opensm/osm_pkey_mgr.c: Detect pkey table overflow in pkey_mgr_update_port +411e742 opensm/PkeyMgr: Don't change end port pkey index when simultaneously adding and removing partitions +15e7223 opensm/osm_sa_guidinfo_record.c: In set_guidinfo, better SM reassigned guid handing +e79b725 opensm: Handle SubnSet GUIDInfo asynchronously from GUIDInfoRecord handling +96c741d opensm: Some cosmetic formatting changes +1d5e370 opensm/osm_sa_guidinfo_record.c: Better status for SA response +efd3ba2 opensm/osm_sa.c: Change log level of message +db8b7da opensm/osm_sa_service_record.c: Alias GUID support +5330986 opensm/osm_sa_multipath_record.c: Add support for alias GUIDs +44168c9 opensm/osm_sa_guidinfo_record.c: In del_guidinfo, validate guid not in use +63eb65b opensm: Add multicast support for alias GUIDs +700d15f opensm/osm_sa_path_record.c: Add support for alias GUIDs +f818387 opensm/osm_sa_guidinfo_record.c: Use OSM_VENDOR_ID_OPENIB define rather than IB_OPENIB_OUI +97e360e opensm: Dump/load SA GUIDInfoRecords +fe74f1d opensm: Make SA assigned guids persistent across port down/up events +eb8f1d9 opensm: Add support for alias GUIDs +b3b1861 opensm: osm_subnet.c: Updated patch to add error-reporting to the parsing of opensm.conf +cd8a708 opensm/man/opensm.8.in: Add description for OSM_LOG_SYS logging flag +1308e5c opensm/osm_console.c: Add display of FDR10 ports to portstatus_parse +f4722b0 opensm: Reset client reregistration when receiving handover +5fdb0b9 opensm/configure.in: Remove Default-Start from opensmd init script +b966100 opensm/osm_sm_state_mgr.c: Start sweep immedeately when recieving HANDOVER in DISCOVERING state +4349c07 opensm: Add physp_p discovery count support +d706cbf opensm/osm_sm_mad_ctrl.c: Upon receiving trap repress we should decrease qp0_mads_outstanding_on_wire +e1e4706 opensm: Revert "opensm/osm_ucast_ftree: When roots are not connected, update hop count but not lft" +7a49366 opensm: Changed #if to #ifdef when using ENABLE_OSM_PERF_MGR_PROFILE +34af1a5 opensm/osm_torus.c: torus routing should fail with VLCap 1 on switch external ports +2b8940b opensm/osm_torus.c: In dump_torus, make sure switch is present before dumping +2364e8e opensm: Add better error output when parsing node name maps +7204287 opensm/osm_sa_mcmember_record.c: Only use scope from MC group in copy_from_create_mc_rec +4eb0f69 opensm/osm_dump.c: Remove incorrect assert +1ddb44b OpenSM: dfsssp - moved paths from one to another VL might be counted multiple times +a0a9eea OpenSM: DFSSSP does not find LIDs due to wrong byte order (v2) +13fd952 OpenSM: dfsssp - avoid unnecessary nested loop in vltable_print for OSM_LOG_INFO +1cbed64 OpenSM: dfsssp - change the port traversal for sssp +c2f0bb9 OpenSM: dfsssp - add support for base/enhanced switch port +64873ff OpenSM: dfsssp ignores differences in the lmc value +162493e opensm/osm_req.c: In req_determine_mkey, use osm_physp APIs +fc53bdc opensm/osm_torus.c: Dump torus when OSM_LOG_ROUTING specified +434afa6 opensm/torus: Add configuration for max_changes to report +35bcddf opensm/osm_torus.c: Consolidate some parsing with parse_unsigned +3f3d26f opensm/osm_prtn_config.c: Insert autogenerated pkey into MGID for IPoIB +d80ca1e opensm/osm_port_info_rcv.c: Don't modify subnet minimal values when PortState DOWN +f97a2e9 opensm/osm_ucast_ftree.c: Eliminate unneeded NULL pointer checks prior to calls to free +8a77f23 opensm/osm_ucast_ftree.c: Remove duplicate free in fabric_create_leaf_switch_array +338a67b opensm/osm_subnet.c: Improve error messages in subn_validate_neighbor +0a1c9bf opensm/osm_req.c: In req_determine_mkey, add more info when ERR 1107 occurs +b974ca3 opensm/osm_torus.c: Improve some misconfiguration error messages +e986d57 opensm/osm_torus.c: Minor simplification to check_qos_config +95b6f83 opensm: Update doc for changes to torus routing for, endport support +998fb43 opensm/osm_torus.c: Add copyright +268d327 opensm/osm_torus.c: Improve QoS configuration +d1d2de5 opensm: Update doc for changes to torus routing for CA, support +a362b5b opensm/osm_torus.c: Require only 2 data VLs supported (PortInfo.VLCap) and use VLs 0-1 on CA links +3f536ae opensm: Add routing specific update_vlarb hook routine +a2415f3 Add Per Module Logging support for Congestion Manager +a6d3694 opensm: clean up error message, function name is printed by logging code +2cf8078 opensm/osm_sa_mcmember_record.c: Return proper scope for query with valid SA key +c1a206e opensm: update performance manager documentation +f204e9c opensm: Protect against spurious wakeups when calling cl_event_wait_on +7db5c75 opensm/osm_subnet.c: Only parameters that marked with can_update flag should be updated during conf file rescan +282714a /etc/init.d/opensmd: Improve systemd integration +8b116d7 opensm/osm_trap_rcv.c: Eliminate unneeded trap_rcv_process_response routine +4008c2f opensm.spec.in: Improve portability +01ab744 Make it possible to enable opensm with chkconfig +c68ae63 Add command-line option --pidfile +ffb9a82 opensm/osm_torus.c: Check fabric minimum data VLs on switch external ports +d0258f3 opensm: Track minimum value in the fabric for data VLs supported on switch external ports +36159c2 opensm/osm_torus.c: Cosmetic formatting change +dba3f92 opensm/osm_helper.c: Add some missing new lines to log message output +94468b5 opensm/osm_ucast_updn.c: Add error codes to a couple of log messages +86a203b Correct option names in opensm man page +3482438 opensm/osm_sa.h: Cosmetic commentary change +824e1f0 opensm/ib_types.h: Commentary and cosmetic formatting change +3711e8a opensm/complib/cl_spinlock.h: Remove some unimplemented routines +0c3f57f opensm/osm_torus.c: Add error code to error log message +8e6dfbd opensm: Add .gitignore +6248495 opensm/osm_port_info_rcv.c: use PF() hint on fatal conditions +dff5927 opensm/osm_port_info_rcv.c: check received local_port_num +b7b1505 opensm/libvendor/osm_vendor_ibumad.c: validate response MAD properties +c215eea opensm/libvendor/osm_vendor_ibumad.c:rename "mad" to "p_mad" to indicate pointer +e00f67b opensm/libvendor/osm_vendor_ibumad_sa.c: use wrapper function instead of direct access +ebc772c opensm/osm_node_info_rcv.c: using "PF" hint for all the fatal conditions +e2f06cd opensm/complib: define macros for for "if" statements with branch prediction hints +0e33c11 opensm/configure.in: check that compiler supports __builtin_expect() +2cb1854 OpenSM: Add new Mellanox OUI +8e5fc57 opensm: osm_pkey: Remove unused variables +e96a933 opensm: Manage ports that do not support congestion control +fbb74ca opensm: improve search common pkeys. +db34e30 opensm/osm_vendor_ibumad.c: Add management class to error log message +ba36f81 opensm/perfmgr/console: add 'pm sweep' +f118668 opensm/osm_sa: Improve SA record response handling (osm_sa_respond) +b09ac65 opensm/osm_sa_informinfo.c: Add trusted support for InformInfo/InformInfoRecord +5b2390e dfsssp: optimization for dedicated compute and IO nodes +33f4f17 Add new option for guid_routing_order_no_scatter +a5f54f1 osm_ucast_mgr.c: After applying guid_order options, add nodes sorted +by neighbor switch load +5a97c78 Try default partition config if parsing partitions.conf fails +cc5fa17 Add support for synchronizing in memory files with storage +9853841 Implement atomic update operation for sa_db_file +3e146fa Add option to disable M_Key lookup +51b187b Add flags to OSM_EVENT_ID_UCAST_ROUTING_DONE +c97bd7b Permit toggling log flush from console +8395084 OpenSM: dfsssp - add support for multicast +96eafa0 ftree: Allow defining only io_guids file and consider rest of nodes as +CN nodes +6cd6114 opensm: Add support for LFT changed event +b73c378 opensm/perfmgr; add support for PortCountersExtended NOIETF +70f5820 opensm/perfmgr: Issue PortCountersExtended query when supported + +1.3 Library API Changes + +2fd785f Deprecate complib_init() due to use of exit() function +399fe25 Eliminate circular dependencies in shared libraries +d7135b1 complib/cl_types.h: Remove unimplemented function cl_panic +04b0c34 complib: cl_vector_copy16/32/64 should be static functions +925e1ce Add timeout parameter for SM class set transactions +f60e241 osm_madw.h: Remove unused bind_info in osm_madw structure +aa63c3c complib: Add a d-ary heap +44a6358 libvendor/osm_vendor_ibumad.c: Support GRH (for GS classes) +de02954 osm_trap_rcv.c: Log DR path to node when trap 128 + +1.4 Software Dependencies + +OpenSM depends on the installation of libibumad package (distributed as +part of OFA IB management together with OpenSM) and IB stack presence, +in particular libibumad uses user_mad kernel interface ('ib_umad' kernel +module). The qualified driver versions are provided in Table 2, +"Qualified IB Stacks". + +Also, building of QoS manager policy file parser requires flex, and either +bison or byacc installed. + +1.5 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +are listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +3 Unsupported IB Compliance Statements +-------------------------------------- +The following section lists all the IB compliance statements which +OpenSM does not support. Please refer to the IB specification for detailed +information regarding each compliance statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key matches, or + silently drop the packet if M_Key does not match. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to 0, + except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:M_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignores SL and FlowLabelTClass). + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Bug Fixes +----------- + +4.1 Major Bug Fixes + +73f6461 osm_opensm.c: Fix seg fault in destroy_routing_engine +9a7fa48 osm_multicast.c: Fix potential crash in osm_mgrp_delete_port +af9fc7a osm_sa_mcmember_record.c: Use neighbor MTU rather than MTUCap in mgrp_request_is_realizable +9a4e79b dfsssp - detect and try to repair an odd network state +85f841c osm_ucast_[dfsssp ftree].c: Fix memory leak when io/cn guid file have duplicated guids +8980f12 osm_port_info_rcv.c: Fix min_ca_rate determination in pi_rcv_process_endport +a7db80a osm_lid_mgr.c: Clean up LID ranges that are beyond current LMC setting +677bc59 osm_ucast_updn.c: Add memory allocation failure handling in updn_build_lid_matrices +2f4713d osm_qos_policy.c: Partition SL independence from QoS matching rules +0d88e64 osm_state_mgr.c: opensm does not generate SA_DB_DUMPED event after heavy sweep +a686cec osm_ucast_mgr.c: Fix minhop tables miscalculation due to variable wraparound +aa7d21a Fix SM-Key management +f6af5af osm_port_info_rcv.c: Fix bug in portinfo set failure handling +62ea61a osm_state_mgr.c: Fix bug in handling of PortInfo Set timeout +34e81df Change precedence in pkey manager with indx0 feature +aa905e2 osm_pkey.[h c], osm_prtn.c: Create method to set pkey at indx0 +8a696eb osm_base.h: Fix OSM_CAP2_IS_PORT_INFO_CAPMASK2_MATCH_SUPPORTED +definition +70a364f opensm/osm_pkey_mgr.c: Fix pkey index wraparound/reuse +6e27bba osm_multicast.c: Dump sa after deleting well known mc group +df212a6 osm_multicast.c: Fix not dumping empty predefined mc groups +1007810 osm_sa_guidinfo_record.c: Fix memory leak in set_guidinfo +6272a56 osm_ucast_dfsssp.c: In dfsssp_do_mcast_routing, update adj_list first +44b4edc osm_ucast_mgr.c: LFT update breaks if IB_SMP_DATA_SIZE changes +19dbb67 osm_ucast_dfsssp.c: Fix dangling pointer when dfsssp used with ucast +cache +76bd8cd osm_sa_mcmember_record.c: Add error logging for mismatches to +validate_other_comp_fields +122cad3 osm_sa_mcmember_record.c: When joining MC group and MC group already +exists, validate other components if supplied +909976d Add more checks for physp validity +b37560b osm_mcast_mgr.c: Add check for physp validity to create_mgrp_switch_map +e4e449e osm_ucast_ftree.c: Fix wrong is_io assignment to the second HCA port +e9bbf44 osm_ucast_ftree.c: Fix wrong handling of dual port HCA +ccab3b8 osm_ucast_ftree.c: ftree calculates wrong number of compute nodes per +leaf +06ae3ad osm_sa.c: Fix osm_db_file_dump +f1ab545 osm_drop_mgr.c: Do not set SM port discovery count to 0 when switch +remote port is nonresponsive +cd12dcd osm_port_info_rcv.c: Fix calculation of minimum data VLs +cb20b11 osm_ucast_ftree.c: Fail routing if max rank is 0 and more than one +root +c633e05 osm_ucast_ftree.c: Mark HCA ports connected to unranked switches +f004819 osm_sa.c: Fix race condition when writing SA DB file +a64de2e osm_prtn_config.c: Fix wrong pkey table calculation in +allow_both_pkeys mode +0b6d8ea osm_sa_mcmember_record.c: Validate port's neighbor_mtu when joining MC +group +80ca401 osm_sa_path_record.c: When both [D S] GID and LID are supplied, +validate that underlying ports are the same +1fa8089 osm_sa_path_record.c: Handle path query by DGID and SLID +141293d osmtest/osmt_multicast.c: Fix MC join with unrealistic rate test +f471571 osm_subnet.c: Remove support for changing allow_both_pkeys on the fly +cd31ff6 osm_port_info_rcv.c: Sending MEPI(Get) to local switch ports on hop 0 +4c3d782 osm_state_mgr.c: Mark port as undiscovered when removed by drop_mgr +a1c4bc2 osm_console.c: Fix display of negative counters in console +a6c2bf8 Add cleanup of SA cache after handover +075ff44 osm_mcast_tbl.c: Wrong assert placement in osm_mcast_tbl_get_block +953b70d osm_opensm.c: Fix race condition between traps handling and SA +shutdown +5e50a3c osm_sa_mcmember_record.c: Extend P_Pkey validation on MCMemberRecord(Join) +3fcb121 osm_subnet.c: Improved m_key_lookup description in generated conf file +b4d20d7 osm_port_info_rcv.c: In pi_rcv_process_switch_port0, store port 0's +PortInfo before querying switch external ports +062d35d osm_port_info_rcv.c: In pi_rcv_process_switch_port0, determine mkey +899664d osm_ucast_mgr.c: Support diverse paths for LMC > 0 when scatter_ports +is enabled +6dc0b80 osm_opensm.c: Call cl_disp_shutdown for SA SET dispatcher +3199d34 osmeventplugin.c: Modify osm_event_plugin_t initialization +94c2d74 Revert "osm_ucast_mgr.c: Force unicast routing to fail when lft +allocations fail" patch +1d205bc osm_ucast_mgr.c: Select exit ports for lid offset > 0 using new_lft +2a32095 osm_ucast_cache.c: Rewrite p_sw->lft allocation in osm_ucast_cache_process +e2ee83e Use routing engine calculated LFT for SA queries +1413d64 osm_state_mgr.c: Remove new_lft buffers cleanup +f2b96de osm_ucast_mgr.c: Force unicast routing to fail when lft allocations fail +8ea4e06 osm_congestion_control.c: Reset cc_timeout_count when count threshold passed +0b7c80c osm_congestion_control.c: Consistently set cc_unavailable_flag +e590ebc osm_congestion_control.c: Add additional header status check in cc_rcv_mad +f130d5a osm_subnet.c: Correct output error with congestion control table entries +0a695ae osm_switch.c: Fix wrong assertion failed in osm_switch_get_lft_block() +5077198 osm_perfmgr.c: Reset physp_discovered before discovery +8d51ae5 osm_state_mgr.c: Avoid continuing discovery when SM port is +unresponsive +f017063 Better way to handle polling other MASTER SM +25e5ee5 complib/cl_dispatcher.c: Check registrations vector size when +searching for handlers +6b0fb4d osm_lid_mgr.c: Send client reregistration in case of SM LID change +5d231f8 osm_pkey_mgr.c: Use calloc instead of malloc in +pkey_mgr_process_physical_port +985859e Fix turning on first_time_master_sweep flag +4629e80 osm_mcast_mgr.c: Invalidate cache due to multicast routing errors +b6a1dd4 osm_state_mgr.c: Avoid ucast cache invalidation due to errors during +initialization +db69cd8 osm_perfmgr.c: Fix perfmgr sweep_state race +b42f11f osm_vendor_ibumad.c: Better match table eviction strategy +cc0d61b libvendor/osm_vendor_ibumad.c: Check the next CA if an error is +returned for the current one +a16349d osm_trap_rcv.c: Lock released without locking in some cases of trap +processing +6c6e4cc osm_sa_guidinfo_record.c: Fix crash when receiving AGUID SET/DELETE +with block_num equal to max_block +a8b23b7 osm_vl15intf.c: Fix potential NULL dereference in vl15_send_mad +e94d471 osm_torus.c: Fix torus crash when actual topology is not torus +a20cd5f osm_sa_service_record.c: Fix locking issue in osm_sr_rcv_process +127acf1 osm_sa_mcmember_record.c: Fix double locking in mcmr_rcv_join_mgrp +c86c30a All SA queries should validate the requester port under lock +80e1e59 osm_sminfo_rcv.c: Send trap 144 to a newly found MASTER SM when in +MASTER state +0189dea osm_sm_state_mgr.c: Fix opensm crash after handover +52c4a30 osm_sm_state_mgr.c: Reconfigure the fabric when receiving +HANDOVER/POLLING_TIMEOUT in MASTER state +26af3a9 Fix crash during handover +f775a1a osm_sa_path_record.c: Search for requester port should be under lock +24d6219 osm_sw_info_rcv.c: Fix sending PortInfo during lightsweep +12d2c8e osm_node_info_rcv.c: Fix multiple switch discovery during a sweep +ef22eb9 osm_qos.c: Fix possible seg fault +9dd8f45 dfsssp: avoid crash due to port initialisation errors +f6418cf osm_ucast_dfsssp.c: prevent double free error +1633550 Resend LFTs/VLArb/SL2VL MADs in case of error +90d19c2 osm_port_info_rcv.c: Reread pkeys from SP0 if switch rebooted during a +sweep +6e90421 Better handling of topology changes in the fabric +b33c3ba Change discovery order of switch data +395b5f0 Handle bad SMP status +a322f51 Skip TID 0 on 32 bit wraparound for SMP, SA and PerfMgt queries +cded9af Fix transaction id casting +3585f8b opensm: Fix crash found with ucast cache +74e12d9 opensm: fix part_enforce parameter parsing crash +647a98e Fixed crash in sm_state_mgr_send_master_sm_info_req() during fabric + merge +63ebd0d Fix crash in ucast cache when chain of switches connected back at once + to the fabric +3b21d6f Fix crash in ucast cache when ucast cache invalidates after updating + one of the switches +5654e22 Fix invalid error check, which lead to segfault +7bf7482 fix segfault corner case w/ updn routing and LMC > 0 +18990fa opensm: set IS_SM bit during opensm init +3551389 fix local port smlid in osm_send_trap144() +a6de48d opensm/osm_link_mgr.c initialize SMSL +82df467 opensm/osm_req.c: Shouldn't reveal port's MKey on Trap method +45ebff9 opensm/osm_console_io.h: Modify osm_console_exit so only the + connection is killed, not the socket +d10660a opensm/osm_req.c: In osm_send_trap144, set producer type according + to node type +8a2d2dd opensm/osm_node_info_rcv.c: create physp for the newly discovered + port of the known node +39b241f opensm/lid_mgr: fix duplicated lid assignment +b44c398 opensm: invalidate routing cache when entering master state +595f2e3 opensm: update LFTs when entering master +8406c65 opensm: fix port chooser +fa90512 opensm/osm_vendor_*_sa: fix incompatibility with QLogic SM +7ec9f7c opensm: discard multicast SA PR with wildcard DGID +5cdb53f opensm/osm_sa_node_record.c use comp mask to match by LID or GUID +55f9772 opensm: Return single PathRecord for SubnAdmGet with DGID/SGID wild + carded +5ec0b5f opensm: compress IPV6 SNM groups to use a single MLID +26e7e83 opensm/osm_lid_mgr: fix couple of duplicate LIDs bugs +fedc419 opensm: Multicast root switch calculation +6772fdb opensm: Fix sl2vl configuration +dd3470f complib/cl_timer.c: fixing cl_timer calculation +ae1bcdd opensm/osm_lid_mgr.c: Allow switch lids to be non LMC aligned +593547e Wrong handling of MC create and delete traps +4c822b0 opensm/osm_prtn.c: removing TopSpin hack +8214e2a opensm: Add support for SwitchInfo:MulticastFDBTop +1e544ba opensm: fixed memory leak in multicast spanning tree calculation +0456b3f Fixed multicast groups reconfiguration during heawy sweep +9ad844f Fix ucast cache crash, when switch doesn't have valid phys ports +07aa9fa opensm: fix crash in osm_ucast_mgr +076bd38 opensm/osm_ucast_cache.c: fix crash in ucast cache when switch with lid 0 dropped +9cbb8af opensm/osm_ucast_ftree.c: fix opensm segfault in osm_ucast_ftree.c +edd0552 opensm/osm_qos_policy.c: fix segmentation fault on + osm_qos_policy_match_rule_destroy (osm_qos_policy.c) +dec1109 DFSSSP: fix a memory leak in dfsssp_build_graph +d5fe528 opensm/osm_req.c: In req_determine_mkey, fix DR algorithm +a46b33e opensm: fix crash in DFSSSP routing engine on reroute +d2312af opensm/osm_link_mgr.c: Fix sending PortInfo(Set) with AM SMSupportExtendedSpeeds + bit set for switch base port 0 +06ae82c opensm/osm_link_mgr.c: Set AM SMSupportExtendedSpeeds bit if port + supports ExtPortInfo +b2cd2d7 opensm/osm_ucast_ftree.c: Fix unranked nodes bug in FTree +8384156 opensm/osm_torus.c: Fix crash in torus_update_osm_vlarb +0ff054a osm_trap_rcv.c: Removed unneeded lock when disabling port +e7c4ec3 osm_trap_rcv.c: Minimize time holding RW lock for SystemImageGUID +changed trap 145 +1418f8a osm_trap_rcv.c: In trap_rcv_process_request, change locking strategy +280a2ac osm_trap_rcv.c: fix locking in trap_rcv_process_request() +a45e311 osm_db_files.c: Minor improvement to fix in previous commit +86cf679666f49f6073c1ddf2b9ff644a41537a57 +86cf679 osm_db_files.c: Fix issue introduced in commit +aaa7b1e67ec6e5fc2a10accf46d538f9d47c6323 +ed7be1a Add client_rereg flag to Port Info context +413e4fa osm_trap_rcv.c: fix race condition during sweep +90a7960 osm_ucast_file.c: Fix crash when port is invalid in LFT file +a1ccf88 osm_sa_path_record.c: path_sl may return SL different from requested +SL +8f3f7d0 osm_sm_state_mgr.c: Fix race condition during +sm_state_mgr_send_master_sm_info_req +b1b1b37 Fix minhop population in fabric with duplicate lids +ed9de4c osm_mcast_mgr.c: Fix wrong comparison in mcast_mgr_subdivide() +c5d5faa Only rewrite db files during heavy sweep when there is a real change +9aa6dcd osm_drop_mgr.c: fix timeouts on Get Pkey from ext switch ports +361c014 Fix dropping node after setPkey mad +2585f58 Improve memory consumption of pkey manager by using cl_map for +accum_pkeys +754bd75 osm_opensm.c: When exiting, update SADB only in MASTER state +3cc0a3e Fix timeout handling for pkeyGet for sw port 0 +189a39d Fix possible use of lid 0 when sending set PortInfo after failure of +the first PortInfo set +6d1d1a4 Fix handling of get P_KeyTable on timeout +df385e8 osm_ucast_cache.c: Fix memory leak in ucast_cache +79ba877 osm_link_mgr.c: active_transition parameter in PortInfo(Set) context +may not be initialized +b442062 Resend trap 144 when detecting remote MASTER SM with lower priority +49ea151 osm_pkey.c: Fix find common pkey bug fix +e3aa9e8 dfsssp: send multicast forwarding tables to switches +4a39fb1 dfsssp: send multicast forwarding tables to switches +d5f6e25 osm_sa_mcmember_record.c: Fix incorrect comparison of IPv6 MGID when +searching for SNM MLID +908c524 osm_guid_mgr.c: Fix GUIDInfo SET function +e8bf592 Clean up event subscriptions if a port goes away +7c9afa0 Improve m_key lookup +807d9ed osm_trap_rcv.c: Fix locking in aging callback +eb294d5 osm_lid_mgr.c: Don't configure MTU and LMC for base SP0 +a47e5d0 complib/cl_event_wheel.c: Roundup timeout to nearest msec +2a8c474 complib: Fix memory leak in cl_thread_pool_destroy function +1960fbc osm_trap_rcv.c: Fix crash in babbling port feature +1872d11 osm_sa_sminfo_record.c: fix sminfo sa query returns all sminfo records +when filtering by LID in osm_smir_rcv_process +b92f25a osm_ucast_cache.c : Fix dereference null return value +21765ad When SM fails to load/parse root_guids file use MinHop heuristics +7c21c14 Fix shift pattern support in FTREE routing for native ftree topologies +35f4419 osm_ucast_ftree.c: fix dereferencing null variable +4d36d6d osm_ucast_ftree.c : fix dereferencing null variable +49f1a71 opensm: fix possible double free in osm_ucast_ftree.c +2695128 Fix fat-tree routing for CAs with more than 1 connected port +42558d6 osm_sm_state_mgr.c Don't clear IS_SM bit when changing state to +NOT_ACTIVE +21a5b5f DFSSSP - workaround for better VL balancing +b42ae68 osm_console_io.c Memory leak when closing console +bcf00cb osm_ucast_dfsssp.c: Fix memory leak in dfsssp_do_dijkstra_routing +f42a232 Fix segfault in osm_mgrp_delete_port() +1a94326 osm_sa_multipath_record.c Use aliasGUIDs when building responses +943a40a opensm: Fix Q_Key, TClass and limited keys parsing warnings in +partitions.conf +0fc6e8b osm_lid_mgr.c: Fix duplicate LID assignment after SM port down +becc27e osm_sa_mad_ctrl.c: Drop incoming SA queries when shutting down +dbeb7a7 Change LFT event to be per block/per switch rather than just per +switch +25c088a Setup SM port GUID in subnet object as soon as it is known +92c502a osm_sa_mcmember_record.c: On join and leave, validate subnet prefix in +port GID +c0602ed opensm/osm_port_info_rcv.c: Fix min_sw_data_vls calculation +2d8df36 opensm/osm_sw_info_rcv.c: Fix double release of lock in +osm_si_rcv_process +e7435ef opensm/osm_torus.c: avoid the possibility of following stale ->priv +pointers +904a555 opensm/perfmgr: fix access to shared sweep_state variable +1458263 opensm/perfmgr: don't clear data counters in PortCounters when +ExtendedPortCounters is supported +b6d0001 opensm/perfmgr: issue ClassPortInfo as first query to each port. +e1af1ce opensm/osm_console.c: Use ib_port_info_get_link_speed_ext_enabled +2ba9919 opensm/osm_console.c: Do not perform portstatus checks on down ports +5c45f60 opensm/osm_console.c: Support portstatus output for unenabled +width/speed +1947644 opensm/osm_qos_policy.c: fix memory leak when parsing policy file +9358164 opensm/osm_sa_multipath_record.c: Validate required components are +indicated +fbe0d02 opensm/osm_sa_multipath_record.c: Make sure either none or both +ServiceID parameters are supplied +f929ac1 opensm/osm_sa_path_record.c: Make sure either none or both ServiceID +parameters are supplied +43efbc2 opensm/osm_sa_multipath_record.c: Fix seg fault in +mpr_rcv_get_apm_paths +250802d opensm/osm_qos_policy.c: Fix source & destination GUID policy check +9f231a8 libvendor/osm_vendor_ibumad.c: fix mad validation in case of multipath +record response +2be4e20 opensm/perfmgr: update node name when Node Description is received +from node +f37bf3c opensm/perfmgr: skip data counters when only printing errors +8412591 opensm/perfmgr: mark/report time of last counter update + +4.2 Other Bug Fixes + +9b9ea72 Fix spelling mistake of "switches" +70722b4 ib_types: Drop packed attribute where unnecessary +9a7cc0d opensm.spec.in: Move COPYING back into doc +b2f10ec opensm.spec.in: Updated for move to github +8bf41a5 travis: Add patch check +2e1bd5e Add travis validatio +393f665 osm_[port ucast_ftree].c: Remove unused static functions +3bcefae osm_ucast_ftree.c: Fix clang warning about empty loop +992687f osm_opensm.c: Fix use of enum as NULL pointer in osm_opensm_init_finish +1c57b7d libvendor/osm_vendor_ibumad.c: Fix type of array passed to umad_get_ca_portguids in libibumad +f5c21ce osmtest.c: Remove ununsed osmtest_get_node_rec routine +e3e49a9 osmtest/main.c: Fix show_usage declaration +ef67b6a osmtest: Add missing static keywords +2989b31 libvendor,osmtest: Use NULL instead of 0 in all places where it used as a pointer +9b8bcc1 osmtest/main.c: Fix return type for getopt_long_only +8e6d76c osm_[congestion_control perfmgr].c: Fix signed vs unsigned comparison +476b822 osm_opensm.c: Fix static declaration +6d49a7e libvendor/osm_vendor_mlx_sim.c: In osmv_transport_init, fix memory leaks on error +e76b7ab libvendor/osm_vendor_mlx[_hca]_sim.c: Eliminate use of exit +d34622f gen_chlog.sh: Update script to use git describe rather than git cat-file tag +92a9c3e osm_[link_mgr trap_rcv].c: Check the return value of osm_get_port_by_guid +06b168a main.c: Remove NO_EFFECT code +20c135d osm_helper.c: Make "50" string proper fixed width in lsea_str_fixed_width +df22e54 osm_helper.c: Fix lsea_str_fixed_width OVERRUN issue +d289316 osm_trap_rcv.c: Fix missing log message when 10 traps are received from the same source within time window +ceef038 man/opensm.8.in: Quiet some man warnings +aae42c6 Fix various typos +8e6abe9 Fix typo in OSM_DEFAULT_TRAP_SUPPRESSION_TIMEOUT +6b71ec0 Fix a bunch of typos +7c2dd40 osm_opensm.c: No need to check context for default routing engine in destroy_routing_engines +7f98069 osm_opensm.c: destroy_routing_engines should destroy the default routing engine +a66a09c osm_subnet.c: Free per_module_logging_file in subn_opt_destroy +7c9521b osm_sa.c: Remove unneeded label in osm_sa_bind +e000a2e osm_resp.c: No need to swap DR [D/S]LIDs in resp_make_resp_smp +67799fa opensm/osm_service.h: Fix and add some comments +5da75d8 opensm/osm_mlnx_ext_port_info_rcv.c: Check the pointer of osm_sm_t before accessing it +406dc70 opensm/osm_mtree.h: Improve comment +7b06728 opensm/osm_remote_sm.c: Improve comment +d817750 opensm/osm_ucast_cache.h: Improve coding style and comments +717312e opensm/osm_ucast_mgr.h: Add comment for 'max_lid' foeld in osm_ucast_mgr structure +718c14b opensm/osm_multicast.h: Improve comments +f538e69 opensm/osm_sa_mad_ctrl.h: Improve comments +6173c97 opensm/osm_sm_mad_ctrl.h: Improve comments +2414219 opensm/osm_remote_sm.h: Improve comments +6f2cbdb opensm/osm_mtree.h: Improve comments +242f0a8 opensm/osm_vl15intf.h: Minor update of comments +a989601 opensm/osm_node.h: Improve comments +519c2d1 opensm/osm_router.h: Improve comments +537cbb9 Delete unused header opensm/osm_attrib_req.h +688543e opensm/osm_path.h: Delete comments for non-existent struct field and function parameters +4974027 Revert complib/cl_event_wheel.c: Fix memory leak in event_wheel mechanism +f911f13 opensm/osm_base.h: Delete comments about non-existent "Base" class +fb9c811 opensm/osm_base.h: Delete unused header complib/cl_types.h +e316e0a complib/cl_event_wheel.c: Minor update to the sample test program +ae1c005 complib/cl_event_wheel.h: Improve comment documentation +1b1048b complib/cl_[dispatcher event_wheel].h: Fix commentary typo +c11b31c complib/cl_event_wheel.c: Handle malloc failure in cl_event_wheel_reg +7345086 complib/cl_event_wheel.h: Eliminate unneeded field in cl_event_wheel_reg_info_t +be122bc complib/cl_event_wheel.c: Some cosmetic changes +44afd73 complib/cl_event_wheel.c: Fix memory leak in event_wheel mechanism +12c24d1 complib/cl_dispatcher.h: Improve comments +b69dc33 complib/cl_dispatcher.c: Fix typo and delete one incorrect comment +c558360 include/opensm: Remove some redundant includes +169b144 include/complib: Delete documentation about 'p_nil' +2c6c6b4 complib/cl_debug.h: Cosmetic formatting changes to some macros +8a4172c complib/cl_heap.c: Remove redundant initialization statement +b42bea6 complib/cl_heap.h: Replace 'shift_' with 'heap_' in the DESCRIPTION section +5446e32 complib/cl_qcomppool.h: Improve max_objects comment +d429d5f include/complib: Fix comments and documentation +0877d07 complib/cl_ptr_vector.h: Fixed cut 'n paste error in cl_ptr_vector_remove NOTES +329cb4a complib/cl_types.h: Cosmetic formatting changes +250204d osm_db_files.c: Remove an obsolete GUID-length check on osm_db_restore +15c1acc osm_sa_mcmember_record.c: Change level of log messages for port GID subnet prefix mismatches in join/leave +2b9cf6f Use precision specifier for scanf +43270a6 Replace hard coded constants with defines for force_link_xxx options +901af61 osm_helper.h: Remove trailing whitespace in comment +a0aee8b Revert osm_db_files.c: Fix bad free in osm_db_delete +9dc1106 osm_prtn_config.c: Fix pointer dereference in verify_val +c9fcdfd osm_prtn_config.c: Cosmetic formatting change +024fe73 opensm.8.in: Emphasize that the fields of mgroup_flag must be split with "comma" +1f82c22 partition-config.txt: Emphasize that the fields of mgroup_flag must be split with "comma" +04d2a8b osm_prtn_config.c: parse_group_flag log suspicious group flag value +bfa7f34 osm_sm_mad_ctrl.c: Change [i r]path declarations in log_rcv_cb_error +a07b48a osm_console.c: Fix resource leak in dump_portguid_parse +9da76ca osm_qos_parser_y.y: Fix opensm crash when qos policy file is null +a16cdef osmtest.c: Close file before exit function osmtest_create_inventory_file +87848a2 osmtest.c: osmtest_parse_path fix resource leak in error path +a3e2286 osmtest.c: osmtest_parse_port fix resource leak in error path +f03f013 osmtest.c: osmtest_parse_node delete p_node in error path +b8048cd osmt_service.c: Fix resource leak in osmt_get_all_services_and_check_names +b10f46c osm_ucast_nue.c: Fix resource leak in nue_create_context +a7084c9 osm_qos_parser_y.y: Fix resource leak +92059c2 osm_port.c: Fix memory leak in osm_port_new +dcfeb90 main.c: Close fd before return to avoid resource leak +91f0e83 opensm.spec.in: Adjust BuildRequires for covscan +2ef378f osm_ucast_lash.c: Fix comment in init_lash_structures +94bfbdb osm_sa_mcmember_record.c: Fix use after free in mcmr_rcv_join_mgrp +257d6c0 osm_mesh.c: Improve one line of comment in make_geometry +4123382 osm_db_files.c: Fix bad free in osm_db_delete +16c35ba osm_db_files.c: Cosmetic change to comment in osm_db_update +2028be0 osm_sm_mad_ctrl.c: Reduce path buffer sizes to avoid format truncation in log_rcv_cb_error +9070d50 osm_prtn.c: Avoid potential no NUL-terminated strncpy in osm_prtn_new +1c586e9 osm_console_io.c: Avoid potential no NUL-terminated strncpy in osm_console_init +f211bbc osm_sa_mcmember_record.c: Fix maybe uninitialized issue in mcmr_by_comp_mask +2f21624 Replace deprecated 'BSD_SOURCE' macro in Makefiles with GNU_SOURCE +a3a6876 Refactor common SA path record rate code into ib_path_rate_2x_hdr_fixups +7cbfac9 osm_sa_[multi]path_record.c: Add some comments for better code clarity +f096ee5 osm_sa_path_record.c: Cosmetic formatting change +16f441c osm_subnet.c: Remove redundant Bull device ID in is_mlnx_ext_port_info_supported +10cf3e0 Revert osm_vendor_ibumad.c: OpenSM no longer works with ibsim with latest libibuma +62e0ee0 osm_pkey.c: Fix comment in match_pkey +c4770cf osm_vendor_ibumad.c: OpenSM no longer works with ibsim with latest libibumad +ef0fcce osm_sa_[multi]path_record.c: Remove some redundant code in [m]pr_pr_rcv_get_path_parms +7122495 osm_[multi]path_record.c: Fix a couple of edge cases with new 2x/HDR SA rates +aa638d6 PKEY: Add functionality to ignore existing pkey indexes +9094a58 osm_subnet.c: Make formatting consistent in generated opensm.conf +ad7d56d doc/QoS_management_in_OpenSM.txt: Fix typo +a166aca osm_ucast_dfsssp.c: Uniquify some error codes +9012a8a complib/cl_heap.[h c]: Fix a corner case in d-ary heap +3d22440 osm_helper.c: Add decode of HDR supported to dbg_get_capabilities2_str +b31ec5e [current-routing.txt man/opensm.8.in]: Some minor fixups +d12e40e osm_subnet.c: Fix typo in generated configuration/options file +0d58e55 ib_types.h: mcast_pkey_trap_suppr in PortInfo attribute is 2 bits in IBA 1.3 +ece32f3 osm_sa.c: Cosmetic change to 4C05 error log message +78f262a osm_qos.c: Better handling of VL arbitration tables when there is 1 data VL +a3f0c5f osm_prtn_config.c: Fix a couple of compile warnings with more recent gcc +e9ad4af osmtest/osmt_multicast.c: Fix MC join with unrealistic rate +55823ac osm_multicast.h: Fix some osm_mgrp_box structure field descriptions +7e00315 osm_sa_path_record.c: Check input parameters in osm_get_path_params +dfaf5db osm_switch.c: Fix commentary typo +b879f98 osm_sa_inform_info.c: Use defines rather than hard coded constants in infr_rcv_process_set_method +135ad4b ib_types.h: Fix bit for IB_PM_IS_ADDL_PORT_CTRS_EXT_SUP +a161ec4 ib_types.h: Replace hard coded constant with define +fe1cf8d ib_types.h: Fix some typos associated with IB_CLASS_RESP_TIME_MASK +e16fedf osm_subnet.c: Indicate that subnet prefix can't be changed at runtime +c9a3c6f osm_sa_mad_ctrl.c: It's report response rather than repress +075fbd6 osm_subnet.c: EOL missing in error message in opts_strtoull +cb99d8d gen_ver.sh: Change configure.in to configure.ac in comment +519b24d configure.ac: Update configure.in to configure.ac +1ed0eea configure.in: Update AM_INIT_AUTOMAKE to use subdir-objects +e4f6b79 Makefile.am: Fix INCLUDES warnings +b4f5374 osm_[link lid]_mgr.c: Simplify error threshold comparisons +ed4e65a osm_link_mgr.c: Simplify some link speed related comparisons +eaa1469 osm_[link lid]_mgr.c: Simplify link width comparisons +5baa2a4 osm_sminfo_rcv.c: Use initial rather than return path in smi_rcv_process_get_response +f449e50 osm_switch.h: Fix commentary typo +063154d osm_sa_mcmember_record.c: Add MGID to 1B13 error message +68db71c osm_ucast_ftree.c: Remove redundant condition in fabric_route_downgoing_by_going_up +b25e518 osm_console.c: Remove redundant condition in __get_stats +3d665b3 osm_ucast_ftree.c: Implement atomic update operation for dump file +b439c42 Fix various typos +6a1624f osm_state_mgr.c: Move subnet up event to occur after mkey related files are written +58884b5 osm_torus.c: Cosmetic formatting change +bb15d2f osm_service.c: Fix missing endian conversion in log message +c0e8141 Fix various coverity issues +a0d9157 osm_subnet.c: Add guid_routing_order_no_scatter option to opensm.conf +5d521c1 osm_state_mgr.c: Update comment in state_mgr_check_tbl_consistency +ccefa3d osm_sm_mad_ctrl.c: Add ':' to "ERR 3120" error message +dc45fca ib_types.h: Comment change to indicate 1.3.1 rather than 1.2 IBA spec +37e1246 ib_types.h: Cosmetic formatting changes +73fd8ee man/opensm.8.in: Fix typo (missing close parenthesis) +c32b813 doc/current-routing.txt: Fix typo +c4691e7 ib_types.h: Cosmetic commentary fix +85e2214 osm_sa_mcmember_record.c: Prevent log errors swamp in MC query scenario +88626f4 osm_sa_mcmember_record.c: Updated Mellanox copyright years +432727b osm_sa_mcmember_record.c: Cosmetic formatting change to mcmr_rcv_create_new_mgrp +b85c31e osm_prtn_config.c: Cosmetic formatting changes in manage_membership_change +1a2a6d7 osm_prtn_config.c: Updated Mellanox copyright years +8b288b8 osm_mcast_mgr.c: Cosmetic formatting change in create_mgrp_switch_map +4772806 osm_state_mgr.c: Fix uninitialized area of SMInfo SET mad +175de41 osm_prtn.c: Cosmetic formatting change +870b21a osm_ucast_dfsssp.c: Fix some commentary typos +d5c51e9 osm_ucast_mgr.c: Cosmetic formatting change +f234d83 osm_inform.c: Cosmetic code refactoring in match_inf_rec +fdd1a68 Updated some Mellanox copyrights +ecf07eb Set Type field in Notice attribute using IB_NOTICE_TYPE_SUBN_MGMT define +022e6af Use SM trap defines from ib_types.h rather than hard coded values +d111a2c ib_types.h: Cosmetic formatting change +1cbea4b osmtest/osmt_multicast.c: Cosmetic changes +c52a1b2 osm_drop_mgr.c: Eliminated redundant check for switch node type +2c4771f osm_ucast_dfsssp.c: Minor change to setting dropped when switch exists +e94fc29 osm_subnet.[h c]: Fix wrong function documentation and parameter list +098259e Updated Mellanox copyrights in some recently changed files +1f3dcb4 osm_ucast_ftree.c: Cosmetic formatting changes in fabric_construct_hca_ports +e92cfe5 iba/ib_types.h: Fix commentary typo +5109f44 osm_ucast_ftree.c: Cosmetic formatting change +0bc858e osm_ucast_ftree.c: Cosmetic variable name changes in ftree_port_group_t struct +08e8a9a osm_sa_path_record.c: Refactored PR [D/S] GID and LID validation +7777987 osm_sa_path_record.c: Cosmetic formatting change +a7ac5fc osm_prtn_config.c: Handle valgrind warning in osm_prtn_config_parse_file +609b777 osm_ucast_ftree.c: Cosmetic formatting change +d9a601b osm_ucast_ftree.c: Remove no longer needed code in remove_depended_hca +1f6cd87 osm_state_mgr.c: Cosmetic formatting changes +7f2f902 libvendor/osm_vendor_ibumad_sa.c: Cosmetic formatting change to ERR +5501 log message +e527ec6 libvendor/osm_vendor_ibumad_sa.c: Cosmetic formatting changes to +osmv_query_sa +ec2e2bf osm_pkey.h: Fix osm_physp_has_pkey method description +bf3818b partition_config.txt: Small correction in doc file +9fa761c osm_ucast_ftree.c: Removed *p_ftree parameter from sw_destroy function +652b063 osm_ucast_ftree.c: Removed *p_ftree parameter from sw_create function +d504e76 doc/QoS_management_in_OpenSM.txt: Cosmetic changes +a28d63e Fix documentation on ignore-guids command line option +df32644 osm_subnet.c: Minor clarification to SwitchCongestionSetting Control +Map description in generated option file +483ff00 man/osmtest.8: Minor tweaks to inventory option description +8d3c6f8 osm_sa_mcmember_record.c: Change to log messages for PKey consistency +504e43f osm_req.c: Cosmetic formatting change +a5e484b osm_node.h: Fix commentary typo +6ceb0ce osm_sa_mcmember_record.c: Removed redundancy in comment +9e0cba3 osm_prtn_config.c: Cosmetic formatting change +0eb5117 osm_sa_mcmember_record.c: Cosmetic formatting change +2541ed9 osm_ucast_mgr.c: Cosmetic formatting change +28fd4ee osm_[subnet congestion_control].c: Cosmetic commentary change +391c244 osm_congestion_control.c: Added Mellanox copyright +7e08621 osm_congestion_control.c: In cc_rcv_mad, add attribute ID and modifier +to log message +62075d6 osm_perfmgr.c: Fix endian of MAD status in pc_recv_process +cd94c9c osm_congestion_control.c: Fix endian of MAD status in cc_rcv_mad +cb5df8b osm_congestion_control.c: Fix endian of node and port GUIDs in some +log messages +68887fb osm_congestion_control.c: Cosmetic changes +1331a46 osm_subnet.[h c]: Fix possibility for open file descriptor issue +ce15bb7 osm_perfmgr.c: Output remote port on perfmgr error counter log +messages +030ac82 osm_sa.c: Improve ERR 4C05 log message +fbb63d6 osm_subnet.c: Update MEPI supported devices white list +0774229 osm_subnet.c: Cosmetic formatting change +5871ef7 osm_sa.c: Check return value from chmod in opensm_dump_to_file +61cd0cf osm_switch.c: Fix potential memory leak due to misuse of realloc +c78adde osm_req.c: Initialize dest_port_guid in req_determine_mkey +dd5e5df osm_mcast_mgr.c: Add MLID to error 0A06 log message +63900bc osm_vendor_ibumad.h: Use UMAD_MAX_DEVICES for OSM_UMAD_MAX_CAS +a5c0200 iba/ib_types.h: Add support for new MAD SM:PortInfoExtended and for +modifying PM:PortExtendedSpeedsCounters +3f99535 osm_port_info_rcv.c: In osm_pi_rcv_process, move assert before first +log message +a0b9444 Change osm_subn_t.log_max_size type to uint32_t +b5abaa2 osmeventplugin.c: Add include of osm_config.h as first OpenSM include +ecce4e3 osm_console.c: Handle LinkSpeed[Ext]Active 0 for portstatus command +f622810 osm_console.c: Fix unknown speed/width port reporting for portstatus +command +ffdd042 osm_console_io.c: In is_authorized, STRING_UNKNOWN is define +a6ba888 Add missing keyword in partition definition in man page and +partition.txt doc +7444e5e Fix the difference in osm_opensm struct size between OpenSM and +plugin(s) +caf764f osm_subnet.h: Fixed commentary typo +117bc82 osm_prtn.c: Change message verbosity for log message in +osm_prtn_add_port +3b172ab osm_config.h.in: Fixed the difference in osm_opensm_t struct size +between opensm and plugins +d13d4bd osm_console.c: Track and report unknown/speed width ports +d63e7f6 osm_sa_path_record.c: Cosmetic formatting changes +48d159b libvendor/osm_vendor_ibumad.c: Remove GID index 0 check in +umad_receiver +d7f1d02 libvendor/osm_vendor_ibumad.c: memset osm_mad_addr_t before setting +fields +96f89d6 osm_console.c: Included unknown speed/width ports in "possible issues" +44df235 osm_dump.c: In dump_topology_node, handle link_width_active of 4 for +8X +a1e58e0 osm_link_mgr.c: Fix bug in mlnx extended port info setting +fcb6967 osm_subnet.c: Improve sweep_on_trap documentation in generated conf +file +47a6b00 osm_sa_multipath_record.c: Better logging for 4514, 4515, and 4505 +error messages +115529b osm_sa_path_record.c: Better logging for 1F02 error message +fe079df osm_sa_path_record.c: Better logging for 1F05 and 1F03 error messages +43c378f osm_ucast_mgr.c: A couple of cosmetic log message changes in +ucast_mgr_route +9a5e514 osmtest/osmtest.c: Cosmetic formatting change +90db6c3 osm_state_mgr.c: Cosmetic formatting changes +e7139af osmtest/osmtest.c: Fix osmtest_get_sm_gid when running osmtest on node +other than SM node +83ea812 doc/performance-manager-HOWTO.txt: Update perfmgr config options +documentation +d4d5af3 libvendor/osm_vendor_ibumad_sa.c: Fixed endian in debug log message +c1080b9 osmtest/main.c: Cosmetic change to output for consistency +179fbac Add support for additional Mellanox OUI +be3e4f1 osm_subnet.c: Remove duplicate strcmp check in +osm_subn_rescan_conf_files method +e6fc2f8 Add some missing documentation files to installation +ed1571a osm_subnet.c: Change default for perfmgr_query_cpi option +3a1b458 osm_mcast_mgr.c: Cosmetic change to error log mesage +56bd964 opensm.init.in: Fix return value checking in opensm.init script +787c16e Eliminate redundant calls to ib_port_info_compute_rate +6a2d081 Shorten long lines while calculating SA rate +6ec10d6 man/opensm.8.in: Update date on man page +1dcf322 Improve scatter ports documentation +b1a8fb0 osm_subnet.c: Cosmetic formatting change in subn_validate_neighbor +d1b70fc osm_perfmgr.c: Add log message when perfmgr sweep is skipped +8098a4d osm_perfmgr.c: Remove unnecessary log message +38bc2e4 Skip state_mgr_check_tbl_consistency when no LID is changed by lid or +link managers +ce73c60 osm_drop_mgr.c: Add missing CR at end of log message in +drop_mgr_check_node +8255f8f osm_state_mgr.c: Improve error flow with wrong LIDs +86f1720 osm_sa_path_record.c: Fix some commentary typos +2bc6074 osmtest/main.c: Output formatting change for case 'x' +60727f7 osmtest: Add GRH tests for SA queries +d29dcd7 osm_sa_path_record.c: Fix misleading error messages during sweep +e91908b Add osm_congestion_control.c to per module logging support +3de1091 osm_congestion_control.c: Fix error code +3ee0a22 osm_sm.c: In sm_sweeper, no need to check for timeout after +cl_event_wait_on EVENT_NO_TIMEOUT +f3e1924 osm_trap_rcv.c: In shutup_noisy_port, improve ERR 3811 log message +f73ff43 osm_mcast_mgr.c: In mcast_mgr_process_mlid, cosmetic change to log +message +ab9fec9 osm_congestion_control.c: In cc_poller_send, handle cl_event_wait_on +return status +af04af7 osm_[sm sa]_mad_ctrl.c: Improve unsupported attribute error messages +16fee25 osm_helper.c: In osm_get_lsa_str, fix printing of wrong FDR10 data if +link is down +6c70bf1 PerfMgr: Eliminate no longer used sig_sweep variable +41d0a42 osm_perfmgr.c: In perfmgr_send_mad, handle cl_event_wait_on return +status +d764777 osm_perfmgr.c: Minor code factoring in perfmgr_send_mad +fa31298 doc/opensm_release_notes-3.3.txt: Updated repo location +8394ef8 libvendor/osm_vendor_ibumad.c: Commentary change +0ab7492 osm_subnet.c: More cosmetic changes to opensm conf file PerfMgr +documentation +cc977be osm_subnet.c: Enhance opensm conf file documentation for PerfMgr +options +38273db osm_port_info_rcv.c: Fixed calculation of min_data_vls +dc3259e Add support for additional Mellanox OUI +99a8e74 osmtest.c: Fixed missing assignment of return value from function +osmtest_get_port_rec_by_num +9ffa520 Fix the creation of empty multicast groups from SADB +1d3aacf osm_perfmgr_db.c: Add missing clear of new xmit_wait counter in +clear_counters +0f9b15c SM should resweep the fabric if vl15_send_mad fails +ee5f6d5 osm_perfmgr.c: Added Mellanox copyright +05be6c4 osm_perfmgr.c: Eliminate unneeded initialization in pc_recv_process +9ac71fd osm_perfmgr.c: Cosmetic formatting changes +54c6c86 osm_log.c: Fix wrong hour and date display in log when CL_ASSERT fail +5c81051 osm_console_io.c: Handle return value of function setsockopt +2e1294a osm_sa_[mcmember path]_record.c: Optimize clearing of SA record items +db9c450 osm_guid_info_rcv.c: Fix assert placement in osm_gi_rcv_process +bb723ae libvendor/osm_vendor_ibumad.c: Cosmetic change to umad_set_grh calls +5ca6bdc osm_perfmgr.c: Add current PerfMgr sweep state to 54FF error log +message +753af81 osm_mcast_mgr.c: Add missing new line at end of ERR 0A21 log message +d437d58 osm_mcast_mgr.c: Fix endian of port GUID in ERR 0A06 log message +51fb51d osm_ucast_mgr.c: Fix duplicated error codes +21c2ab7 osm_sa_mad_ctrl.c: In sa_mad_ctrl_rcv_callback, improve 1A04 error log +message +c83bde4 doc/performance-manager-HOWTO.txt: Fix typo +abaf91b osm_sa_service_record.c: Improved locking +1f4de58 osm_subnet.c: Fix bug in parsing configuration file +0fa5fc1 osm_sa_mcmember_record.c: Fix removing members from existing mc group +due to invalid requests +e156626 osm_qos_parser_y.y: Added range check for mtu limit parsing +cb439b3 osm_qos_parser_y.y: Added range check for rate limit parsing +a556f82 man/opensm.8.in: Minor fixes to per module logging configuration +7991745 osm_node_info_rcv.c: Update local copy of node info for known nodes +1c637df osm_node_info_rcv.c: Update NodeInfo.SysImageGUID on heavy sweep +275a56a osm_state_mgr.c: Clear first time sweep even after subnet error +09b5ffe osm_sa_path_record.c: In osm_pr_rcv_process, release lock before log +message +ef7a651 osm_state_mgr.c: Revert commit to "remove redundant unset to +first_time_master_sweep" +23dfbf8 osm_state_mgr.c: Remove redundant unset to first_time_master_sweep +e5a87dd Revert "Reset client reregistration when receiving handover" +28e5fa7 osm_sw_info_rcv.c: Add check of switch mcast_cap +78b6e8f osm_subnet.c: Fix resource leak neighbor parser +("subn_validate_neighbor" function) +46749c0 osm_subnet.c: Fix resource leak guid2mkey parser (guid validation +function) +6212e4b osm_ucast_ftree.c: Fix memory leak in ftree fabric_rank +afb6cb8 osm_ucast_mgr.c: Use LFT block of all port 0s to indicate resend +319e065 ib_types.h: Rename ib_switch_info_set_state_change function +b1c17a8 ib_types.h: Fix shadow declaration warnings +e3f0440 osm_ucast_updn.c: Fix the AA0B error number +30d9020 osm_ucast_updn.c: Add missing ERR number to log message +e8a9275 osm_port.c: Improve ERR 4108 log message +740c22b opensm/include/opensm/osm_log.h: Fix commentary cut 'n paste error +13ebee4 opensm/osm_subnet.c: Fixed ftree/updn configuration failure when + root_guid_file points to non-existing file +8a9d267 opensm: fix locking in osm_guid_mgr_process +4d682bb opensm: Fix pthread_create() return value checks +63c6609 opensm/osm_port.h: Fix commentary typo +3e4e00b opensm/osmtest: fix osmtest ignores timeout parameter +68b1d92 opensm: perfmgr fix dump_counters +9d16039 opensm/perfmgr: fix endian conversion of PortCounters +324f269 opensm/osm_sa_inform_info.c: Fix some error log messages +1d5213a opensm/osm_madw.h: Fix a couple of cut 'n paste commentary errors +3fc662d opensm/torus-2QoS: Fix some typos in documentation +048c66e Fixed Multicast precreation parsing +086d611 Fixes in SL2VL table distribution algorithm +7d9f0c9 Fix deadlock between sminfo_set_req() and osm_sm_state_mgr() +69741e6 Fix base port0 sl2vl mapping optimization +bcda38e Fix SL2VL configuration +7e39542 Fix pre-creation of MC group with MGID containing P_Key +20e1a46 opensm/osm_congestion_control.c: Fix initialization hex string +a59072d opensm/osm_congestion_control.c: Skip TID 0 on 32 bit wraparound +7d18662 osmtest/osmtest.c: Fix permission +e7d4574 opensm/perfmgr: update new error codes to '54' prefix +e17dae6 opensm/osm_sa_portinfo_record.c: In pir_rcv_new_pir, fix switch port 0 physp +93b2f56 libvendor/osm_vendor_ibumad.c: fix unused-but-set warning +cda58af fixed unsused-but-set warning for DEBUG variables +e881d0e opensm/osmtest/osmt_multicast.: Fix typo in log message +203f3c6 opensm/osm_switch.c: Fix compile warnings +67c2538 opensm/man/opensm.8.in: Fix some typos +7338efc opensm/osmtest/osmtest.c: Fix endian in some log messages +67063ca opensm/libvendor: Fix compile warnings on 64 bit machines when building --with-osmv=sim +a8c209c opensm: fixed port order configuration in torus routing engine +7359cfc opensm/osm_ucast_mgr.c: Fix some issues found by Coverity +0150ab9 opensm/osm_ucast_ftree.c: Fix some issues found by Coverity +267a08f opensm/osm_pkey_mgr.c: Fix cast +12f772d opensm: Fix some OSM_SIGNAL and OSM_SM_SIGNAL numbering +c29c4f1 opensm/osm_sa_service_record.c: Fix minor memory leak +f936f8b opensm: fix strtoull error handling +0292ae2 opensm: fixed segfault when enable qos on fabric with no switches +619fa64 osmtest/osmt_multicast.c: Fixed another insufficient component case +1618803 opensm/osm_sa.c: Fix commentary typo +c98fec9 opensm/osm_subnet.c: Fix description of max_msg_fifo_timeout +0a3839f osmtest/osmtest.c: Fix trap flow not implemented log message +f08479a opensm/osm_base.h: Fix a commentary typo +ae966f6 osmtest/osmt_multicast.c: Fix some typos +8a3b5b9 osmtest/osmt_multicast.c: Fixed some error codes in OSM_LOG messages +c90953d osmtest/osmt_multicast.c: Fixed a couple of typos in OSM_LOG messages +f7f1ead opensm/osm_sa_mcmember_record.c: Fix handling of invalid PKey +65d3e4f opensm/osm_pkey_mgr.c: Fix commentary typo +34d61cc opensm/perfmgr: fix overflow processing +77d79b4 opensm: fixed potential null variable dereferencing in libvendor +350c6e4 opensm: fixed potential memory leak in osm_ucast_ftree() +e206872 opensm: Fixed debug message in osm_vendor_send() +1b3e93e opensm: fixed sizeof of pointer allocation in osm_ucast_lash() +f0b915a Fix SANodeRecord.nodeInfo.localPortNum +3332658 opensm: fixed memory leak in multicast spanning tree calculation +e4525b1 opensm: fixed indentation and decreased verbosity of RMPP length message +10ac4c1 Fix compile warning introduced by patch "fixed getline pointer allocation free in osm_console_io" +bf23d7c opensm: fixed getline pointer allocation free in osm_console_io +54b1583 Makefile: ChangeLog and version generation script path fix +4911e0b performance-manager-HOWTO.txt: Indicate master state +86ccaa4 opensm/osm_pkey_mgr.c: Fix pkey endian in log message +b79b079 opensm.8.in: Add mention of backing documentation for QoS policy + file and performance manager +b4d92af opensm/osm_perfmgr.c: Eliminate duplicated error number +a10b57a opensm/osm_ucast_ftree.c: lids are always handled in host order +44273a2 opensm/osm_ucast_ftree.c: fixing bug in indexing +5cd98f7 Fix further bugs around console closure and clean up code. +6b34339 opensm/osm_opensm.c: add newline to log message +68c241c send trap144 when local priority is higher than master priority +6462999 opensm/osm_inform.c: In __osm_send_report, make sure p_report_madw + valid before using +9b8561a opensm/console: Fixed osm_console poll to handle POLLHUP +91d0700 osm_vendor_ibumad.c: In clear_madw, fix tid endian in message +5a5136b osm_switch.h : Fixed wrong comment about return value of + osm_switch_set_hops +c1ec8c0 osm_ucast_ftree.c: Removed useless initialization on switch indexes +418d01f opensm/osm_helper.c: use single buffer in osm_dump_dr_smp() +2c9153c opensm/osm_helper.c: consolidate dr path printing code +048c447 opensm/osm_helper.c: return then log is inactive +dd3ef0c opensm: Return error status when cl_disp_register fails +0143bf7 opensm/osm_perfmgr.c: Improve assert in osm_pc_rcv_process +6622504 osm_perfmgr.c: In osm_perfmgr_shutdown, add missing cl_disp_unregister +7b66dee opensm: remove unneeded anymore physp initializations +f11274a opensm/partition-config.txt: Update for defmember feature +d240e7d opensm/osm_sm_state_mgr.c: Remove unneeded return statement +898fb8c opensm: Improve some snprintf uses +6820e63 opensm/osm_sa_link_record.c: improve get_base_lid() +64c8d31 opensm: initialize all switch ports +555fae8 opensm/sweep: add log message before lid assignment +8e22307 opensm/console: Enhance perfmgr print_counters for better nodenames +b9721a1 opensm/osm_console.c: Improve perfmgr print_counters error message +4d8dc72 opensm/osm_inform.c: Fix sense of zero GID compare in __match_inf_rec +a98dd82 opensm/main.c: remove enable_stack_dump() call +db6d51e opensm/osm_subnet: fix crash in qos string config parameters reloading +e5111c8 opensm: proper config file rescan +e5295b2 opensm: pre-scan command line for config file option +e2f549e opensm/osm_console.c: Eliminate some extraneous parentheses +0a265dc opensm/console: dump_portguid - don't duplicate matched guids +540fefb opensm/console: dump_portguid command fixes +d96202c opensm/osm_console.c: Add missing command in help_perfmgr +ae1bd3c opensm/osm_helper.c: Add port counters to __osm_disp_msg_str +1d38b31 opensm/osm_ucast_mgr.c: Add error numbers for some OSM_LOG prin +156c749 opensm: fix structure definition for trap 257-258 +5c09f4a opensm/osm_state_mgr.c: small bug in scanning lid table +72a2fa2 opensm/osm_sa.c: fixing SA MAD dump +539a4d3 opensm/osm_ucast_ftree.c Fixed bad init value for down port index +6690833 opensm/ftree: simplify root guids setup. +90e3291 opensm/ftree: cleanup ftree_sw_tbl_element_t use +c07d245 opensm/qos_config: no invalid option message on default values +b382ad8 opensm: avoid memory leaks on config parameters reloading +45f57ce opensm/osm_ucast_ftree.c: Fixed bug on index port incrementation +3d618aa opensm/osm_subnet.c: break matching when config parameter already found +44d98e3 opensm/osm_subnet.c: clean_val() remove trailing quotation +173010a opensm/doc/perf-manager-arch.txt: Fix some commentary typos +83bf6c5 opensm/osm_subnet.c fix parse functions for big endian machines +6b9a1e9 opensm/PerfMgr: Primarily fix enhanced switch port 0 perf manager + operation +4f79a17 opensm/osm_perfmgr.c: In osm_perfmgr_init, eliminate memory leak + on error +22da81f opensm/osm_ucast_ftree.c: fix full topology dump +aa25fcb opensm/osm_port_info_rcv.c: don't clear sw->need_update if port 0 + is active +003bd4b opensm/osm_subnet.c Fix memory leak for QOS string parameters. +9cbbab2 opensm/opensm.spec: fix event plugin config options +996e8f6 OpenSM: update osmeventplugin example for the new TRAP event. +67f4c07 opensm/lash: simplify some memory allocations +3e6bcdb opensm/lash: fix memory leaks +3ff97b9 opensm/vendor: save some stack memory +ccc7621 opensm/osm_ucast_ftree.c: fixing errors in comments +1a802b3 Corrected incoherency in __osm_ftree_fabric_route_to_non_cns comments +85a7e54 opensm/osm_sm.c: fix MC group creation in race condition +aad1af2 opensm/osm_trap_rcv.c: Improvements in log_trap_info() +f619d67 opensm/osm_trap_rcv.c: Minor reorganization of trap_rcv_process_request +084335b opensm/link_mgr: verify port's lid +d525931 opensm/osm_vendor_ibumad: Use OSM_UMAD_MAX_AGENTS rather than + UMAD_CA_MAX_AGENTS +f342c62 opensm/osm_sa.c: don't ignore failure in osm_mgrp_add_port() +587fda4 osmtest/osmt_multicast.c: fix strict aliasing breakage warning +6931f3e opensm: make subnet's max mlid update implementation independent +30f1acd osm_ucast_ftree.c missing reset of ca_ports +ac04779 opensm: fix LFT allocation size +a7838d0 opensm/osm_ucast_cache: reduce OSM_LOG_INFO debug printouts +c027335 opensm/osm_ucast_updn.c: Further reduction in cas_per_sw allocation +e8ee292 opensm/opensm/osm_subnet.c: adjust buffer to ensure a '\n' is printed +84d9830 opensm/osm_ucast_updn.c: Reduce temporary allocation of cas_per_sw +347ad64 opensm/ib_types.h: Mask off client rereg bit in set_client_rereg +c2ab189 opensm/osm_state_mgr.c: in cleanup_switch() check only relevant + LFT part +40c93d3 use transportable constant attributes +c8fa71a osmtest -code cleanup - use strncasecmp() +770704a opensm/osm_mcast_mgr.c: In mcast_mgr_set_mft_block, fix node GUID + in log message +3d20f82 opensm/osm_sa_path_record.c: separate router guid resolution code +27ea3c8 opensm: fix gcc-4.4.1 warnings +c88bfd3 opensm/osm_lid_mgr.c: Fix typo in OSM_LOG message +a9ea08c opensm/osm_mesh.c: Add dump_mesh routine at OSM_LOG_DEBUG level +bc2a61e C++ style coding does not compile +6647600 opensm: remove meanless 'const' keywords in APIs +323a74f opensm/osm_qos_parser_y.y: fix endless loop +0121a81 opensm: fix endless looping in mcast_mgr +696c022 opensm: fix some obvious -Wsign-compare warnings +b91e3c3 opensm/osm_get_port_by_lid(): don't bother with lmc +ca582df opensm/osm_get_port_by_lid(): speedup a port lookup +fd846ee opensm/osm_mesh.c: simplify compare_switches() function +fe20080 osm_sa.c - void * arithmetic causes problems +220130f osm_helper.c use explicit value for struct init +0168ece use standard varargs syntax in macro OSM_LOG() +180b335 update functions to match .h prototypes +9240ef4 opensm/osm_ucast_lash: fix use after free bug +6f1a21a opensm: osm_get_port_by_lid() helper +c9e2818 opensm/osm_sa_path_record.c: validate multicast membership +225dcf5 opensm/osm_mesh.c: Remove edges in lash matrix +4dd928b opensm/osm_sa_mcmember_record.c: clean uninitialized variable use +c48f0bc opensm/osm_perfmgr_db.c: Fix memory leak of db nodes +82d3585 opensm/osm_notice.c: move logging code to separate function +9557f60 opensm/osm_inform.c: For traps 64-67, use GID from DataDetails in + log message +e2e78d9 opensm/opensm.8.in: Indicate default rule for Default partition +08c5beb opensm/osm_sa_node_record.c: dump NodeInfo with debug verbosity +1fe88f0 opensm/multicast: merge mcm_port and mcm_info +ba75747 opensm/multicast: consolidate port addition/removing code +5e61ab8 opensm: port object reference in mcm ports list +5c5dacf opensm: fix uninitialized return value in osm_sm_mcgrp_leave() +7cfe18d osm_ucast_ftree.c: Removed reverse_hop parameters from + fabric_route_upgoing_by_going_down +aa7fb47 opensm/multicast: kill mc group to_be_deleted flag +a4910fe opensm/osm_mcast_mgr.c: multicast routing by mlid - renaming +1d14060 opensm/multicast: remove change id tracking +5a84951 opensm: use mgrp pointer as osm_sm_mcgrp_join/leave() parameter +d8e3ff5 opensm: use mgrp pointer in port mcm_info +0631cd3 opensm doc: Indicated limited (rather than partial) partition + membership +1010535 opensm/osm_ucast_lash.c: In lash_core, return status -1 for all errors +942e20f opensm/osm_helper.c: Add SM priority changed into trap 144 description +2372999 opensm/osm_ucast_mgr: better lft setup +e268b32 opensm/osm_helper.c: Only change method when > rather than >= +9309e8c complib/cl_event.c: change nanosec var type long +d93b126 opensm/complib: account for nsec overflow in timeout values +ef4c8ac opensm/osm_qos_policy.c: matching PR query to QoS level with pkey +c93b58b opensm: fixing some data types in osm_req_get/set +2b89177 opensm/libvendor/osm_vendor_ibumad.c: Handle umad_alloc failure in + osm_vendor_get +2cba163 opensm/osm_helper.c: In osm_dump_dr_smp, fix endian of status +47397e3 opensm/osm_sm_mad_ctrl.c: Fix endian of status in error message +e83b7ca opensm/osm_mesh.c: Reorder switches for lash +9256239 opensm/osm_trap_rcv.c: Validate trap is 144 before checking for + NodeDescription changed +011d9ca opensm/osm_ucast_lash.c: Handle calloc failure in generate_cdg_for_sp +59964d7 opensm: fixing handling of opt.max_wire_smps +f4e3cd0 opensm/osm_ucast_lash.c: Directly call calloc/free rather than + create/delete_cdg +5a208bd opensm/osm_ucast_lash.c: Added error numbers to some error log messages +3b80d10 opensm/osm_helper.c: fix printing trap 258 details +f682fe0 opensm: do not configure MFTs when mcast support is disabled +cc42095 opensm/osm_sm_mad_ctrl.c: In sm_mad_ctrl_send_err_cb, indicate + failed attribute +aebf215 opensm/osm_ucast_lash.c: Remove osm_mesh_node_delete call from + switch_delete +1ef4694 opensm/osm_path.h: In osm_dr_path_init, only copy needed part of path +c594a2d opensm: osm_dr_path_extend can fail due to invalid hop count +46e5668 opensm/osm_lash: Fix use after free problem in osm_mesh_node_delete +81841dc opensm/osm_ucast_lash.c: Handle malloc failures better +2801203 opensm: remove extra "0x" from debug message. +88821d2 opensm/main.c: Display SMSL when specified +f814dcd opensm/osm_subnet.c: Format lash_start_vl consistent with other + uint8 items +66669c9 opensm/main.c: Display LASH start VL when specified +31bb0a7 opensm/osm_mcst_mgr.c: check number of switches only once +75e672c opensm: find MC group by MGID using fleximap +2b7260d Clarify the syntax of the hop_weights_file +e6f0070 opensm/osm_mesh.c: Improve VL utilization +27497a0 opensm/osm_ucast_ftree.c Fix assert comparing number of CAs to CN ports +3b98131 opensm/osm_qos_policy.c: Use proper size in malloc in + osm_qos_policy_vlarb_scope_create +e6f367d opensm/osm_ucast_ftree.c: Made error numbers unique in some log + messages +83261a8 osm_ucast_ftree.c Count number of hops instead of calculating it +7bdf4ff opensm/osm_sa_(path multipath)_record.c: Fix typo in a couple of + log messages +0f8ed87 opensm/osm_ucast_mgr.c: Add error numbers to some error log messages +0b5ccb4 complib/Makefile.am: prevent file duplications +e0b8ec9 opensm/osm_sminfo_rcv.c: clean type of smi_rcv_process_get_sm() +4d01005 opensm: sweep component processors return status value +6ad8d78 opensm/libvendor/osm_vendor_(ibumad mlx)_sa.c: Handle malloc + failure in __osmv_send_sa_req +cf97ebf opensm/osm_ucast_lash.(h c): Replace memory allocation by array +957461c opensm/osm_sa.c add attribute and component mask to error message +5d339a1 osm_dump.c dump port if lft is set up +518083d osm_port.c: check if op_vls = 0 before max_op_vls comparison +b6964cb opensm/osm_port.c: Change log level of Invalid OP_VLS 0 message + to VERBOSE +b27568c opensm/PerfMgr: Reduce host name length +bc495c0 opensm/osm_lid_mgr.c bug in opensm LID assignment +5a466fd opensm/osm_perfmgr_db.c: Remove unneeded initialization in + perfmgr_db_print_by_name +57cf328 opensm/osm_ucast_ftree.c Increase the size of the hop table +8323cf1 opensm/PerfMgr: Remove some underbars from internal names +65b1c15 opensm: Changes to spec and make files for updated release notes +cd226c7 OpenSM: include/vendor/osm_vendor.h - Replaced #elif with no + condition by #else +9f8bd4a management: Fixed custom_release in SPEC files +c0b8207 opensm/PerfMgr: Change redir_tbl_size to num_ports for better clarity +596bb08 opensm/osm_sa.c: check for SA DB file only if requested +2f2bd4e opensm SA DB dump/restore: load SA DB only once +4abcbf2 opensm: Added print_desc to various log messages +5e3d235 opensm/osm_vendor_ibumad.c: Move error info into single message +8e5ca10 opensm/libvendor//osm_vendor_ibumad_sa.c: uninitialized fields +d13c2b6 opensm/osm_sm_mad_ctrl.c Changes to some error messages +f79d315 opensm/osm_sm_mad_ctrl.c: Add missing call to return mad to mad pool +150a9b1 opensm/osm_sa_mcmember_record.c: print mcast join/create failures in + VERBOSE instead of DEBUG level +9b7882a opensm/osm_vendor_ibumad.c: Change LID format to decimal in log message +5256c43 opensm/osm_vendor_mlx: fix compilation error +93db10d opensm/osm_vendor_mlx_txn.c: eliminate bunch of compilation warnings +156fdc1 opensm/osm_helper.c Log format changes +7a55434 opensm/osm_ucast_ftree.c Changed log level +a1694de opensm/osm_state_mgr.c Added more info to some error messages +fdec20a opensm/osm_trap_rcv.c: Eliminate heavy sweep on receipt of trap 145 +13a32a7 opensm - standardize on a single Windows #define - take #2 +b236a10 opensm/osm_db_files.c: kill useless malloc() castings +4ba0c26 opensm/osm_db_files.c: add '/' path delimited +e3b98a5 opensm/osm_sm_mad_ctrl.c: Fix qp0_mads_accounting +dbbe5b3 opensm/osm_subnet.c: fixing bug in dumping options file +f22856a opensm/osm_ucast_mgr.c: fix memory leak +0d5f0b6 opensm: osm_get_mgrp_by_mgid() helper +e3c044a osm_sa_mcmember_record.c: pass MCM Record data to mlid allocator +3dda2dc opensm/osm_sa_member_record.c: mlid independent MGID generator +1f95a3c opensm/osm_sa_mcmember_record.c: move mgid allocation code +b78add1 complib: replace intn_t types by C99 intptr_t +a864fd3 osmtest/osmt_mtl_regular_qp.c: cleaning uintn_t use +9e01318 opensm/osm_console.c: make const functions +f8c4c3e opensm/osm_mgrp_new(): add subnet db insertion +80da047 complib/fleximap: make compar callback to return int +bf7fe2d opensm: cleanup intn_t uses +0862bba opensm/main.c: opensm cannot be killed while asking for port guid +2b70193 opensm/complib: bug in cl_list_insert_array_head/tail functions +4764199 opensm - use C99 transportable data type for pointer storage +a9c326c opensm/osm_state_mgr.c: do not probe remote side of port 0 +4945706 opensm/osm_mcast_mgr.c: fix return value on alloc_mfts() failures +8312a24 OpenSM: Fix unused variable compiler warning. +ab8f0a3 opensm/partition: keep multicast group pointer +a817430 opensm: Only clear SMP beyond end of PortInfo attribute +52fb6f2 opensm/osm_switch.h: Remove dead osm_switch_get_physp_ptr routine +aa6d932 opensm/osm_mcast_tbl.c: In osm_mcast_tbl_clear_mlid, use memset to + clear port mask entry +2ad846b opensm/osm_trap_rcv.c: use source_lid and port_num for logging +b9d7756 opensm/osm_mcast_tbl: Fix size of port mask table array +11c0a9b opensm/main.c: Use strtoul rather than strtol for parsing transaction + timeout +0608af9 opensm/osm_sm_mad_ctrl.c: In sm_mad_ctrl_send_err_cb, revert setting + of init failure on QoS initialization failures +c6b4d4a opensm/osm_vendor_ibumad.c: Add transaction ID to osm_vendor_send + log message +520af84 opensm/osm_sa_path_record.c: don't set dgid pointer for local subnet +4a878fb opensm/osm_mcast_mgr.c: fix osm_mcast_mgr_compute_max_hops for + managed switch +7c48590 opensm/osm_log.c: add OSM_LOG_SYS to default flags +89f7cb6 opensm/osm_lid_mgr: use 'first_time_master_sweep' flag +0cb7fab opensm: conversion to osm_get_port_by_lid() +9d14fc0 opensm/osm_lid_mgr.c: fix memory leak +c364aa1 opensm/opensm.init.in: fix install warning on SLES11 +1010c9c opensm/osm_sa_path_record.c: livelock in pr_rcv_get_path_parms +4b2cd5e opensm/vendor: fix portguids array size +52bf5b2 opensm/osm_subnet.c: fixing some options to not "hot-swappable" +8900da0 opensm/osm_subnet.{c,h}: passing options to the event plugins +051c57f Delete port only after GID OUT trap was sent +d4ebf7e opensm/complib/cl_passivelock.h: remove unneeded casting +8fdb17c opensm/complib/cl_types.h: convert cl_status_t to int +fd7fb1e opensm/osm_mcast_mgr.c: preserve root switch calculation functionality +fcb0f3a opensm/osm_mcast_mgr.c: code simplifications +444f559 opensm/osm_mcast_mgr.c: fix bug in MC root switch calculation +041ebcb opensm/osm_mcast_mgr.c: remove redundant casting +3717f53 opensm/osm_sa_pkey_record.c: optimize port selection logic +48352be opensm/osm_mcast_mgr.c: fix memory leak +f3cf83f opensm/complib/cl_ptr_vector.c: fix bug/compiler warning +27c8ebd opensm/osm_subnet.h: remove redundant function definition +f296938 opensm/osm_vl_arb_rcv.c: fix double mutex release bug +00bc48e opensm/osm_port_info_rcv.c: fix compilation warning +8823800 opensm/osm_sa.{c,h}: osm_sa_db_file_dump() return values +f4581f3 opensm/osm_qos.c: Fix typo in OSM_LOG message +e3c790a opensm/osm_update_node_desc(): minor prototype improvement +3cc68cb opensm/osm_vl_arb_rcv.c: Dump table after validating block number +7dbb96e opensm SA DB: dump only if modified +fa2106d opensm/osm_sa_infrominfo.c: fixes and simplifications in lid range check +051a1dd opensm/osm_qos.c: split switch external and end ports setup +a6c0189 opensm/osm_qos.c: merge SL2VL mapping capability check +3fe8efe opensm/osm_slvl_map_rcv.c: verify port number values received from + network +88c372c opensm/osm_slvl_map_rcv.c: fix mutex double release bug +d282093 opensm/osm_slvl_map_rcv.c: fix port parsing on BE machine +8e9dbd3 osm_sa_path_record.c: use PR DGID by reference +7c9d375 osm_sa_path_record.c: separate mutlicast processing code +cb2d18e opensm/osm_sa_path_record.c: MGID must be specified explicitly +bd3932b opensm/osm_mcast_mgr.c: strip log-only variable +9d93de3 opensm/osm_pkey_mgr.c: Eliminate unneeded parameter from pkey_mgr_get_physp_max_blocks API +5f49472 opensm/include/osm_helper.h: Eliminate some duplicate declarations +e8ddcd4 opensm/osm_opensm.c: no report when SM is exiting +77ce7c8 complib/cl_timer: remove not needed timeval initializations +490aae2 opensm/osm_helper.c: Add some missing message names to disp_msg_str +d678a21 opensm: Modify OSM_LOG_SYS messages +4cfb481 opensm: Fix wrong messages in MC delete flow +5b82f92 opensm/osm_req.c: In osm_send_trap144, eliminate redundant clear of m_key in smp +9bf64dc opensm/osm_qos.c: Eliminate unneeded endport SL to VL setup +34b536c opensm/osm_sa_path_record.c: adding wrapper for pr_rcv_get_path_parms() +237b5d1 opensm/osm_mcast_mgr.c: Only route MLIDs with more than 1 member +a72db14 opensm/osm_trap_rcv.c: No need for heavy sweep when just NodeDescription changes +ea9a768 opensm/osmtest.c: fix bug in getting attr offset +a3dec3a iba/ib_types.h: remove assertion in ib_get_attr_offset() +6bc032a return no path when path does not exist +1592ae9 opensm: Better handling of non responsive SMAs +a69f01b opensm/osm_perfmgr.c: Remove unnecessary lock reference from Performance Manager object +167ade2 opensm: fixing compilation issues in some header files +e1c253e opensm/qos.c: Revert port ranges for calls to sl2vl_update_table(). +0689f49 opensm/libvendor Reduce stack consumption +59056c7 opensm - address windows env issues +ff14200 opensm/osm_sa_multipath_record.c: livelock in mpr_rcv_get_path_parms +3f23d83 opensm/osm_sa_path_record.c: Add error code to newly added log message +7fc6cd3 ib_types.h add debug assert +4fd4ca3 osmtest - use helper function +6fdc20a opensm/complib use portable macro syntax +bf23d7c opensm: fixed getline pointer allocation free in osm_console_io +74867c7 Add node/port/qos information to some error messages +31a617d replace (long*)(long) casting with transportable data type (uintptr_t) +8da7521 opensm/st.c: fix potential core dumps +6a30911 opensm/osm_console.c: fix memory and file descriptor leaks +696f12c opensm/osm_qos_parser_y.y: fixing bunch of memory leaks on invalid values +3a7b97c opensm/osm_ucast_file.c: closing file descriptor in error path +b4575c5 opensm/osm_pkey_mgr.c: fixing small memory leak +dc0695f opensm/osm_ucast_lash.c: small bug in calculating allocated size +3c9604b opensm/osm_ucast_ftree.c: fixing another memory leak at error path +4460990 opensm/osm_ucast_ftree.c: fix small memory leak in error path +857cd6c opensm/osm_trap_rcv.c: fix possible core dump +b74bef5 opensm/osm_trap_rcv.c: No need to check for sweep for trap 145 +81dade3 opensm/osm_ucast_ftree: When roots are not connected, update hop count but not lft +acf2337 osmtest/osmt_service.c: In osmt_run_service_records_flow, add missing status +6db7f4a opensm/osm_state_mgr.c: Don't signal DISCOVER to SM state machine when already DISCOVERING +28693c5 Fix autotools to include the necessary M4 files +c1c8730 osm_vl15intf.c: fixing use-after-free coredump +3353f9b opensm/osm_helper.c: use ARR_SIZE macro instead of hardcoded values +2da9849 osmtest/osmt_slvl_vl_arb.c: handling fopen() failure +f48d5ea opensm/osm_db_files.c: malloc() return value run-time check +ea3ef82 opensm/osm_db_files.c: fix small memory leak +f4a5174 opensm/osm_subnet.c: fixing small bug in error path +c18ef23 opensm/osm_mesh.c: fixing a bug in compare_switches() +83b74cd opensm/osm_helper.c: fix potential overrun of the array +85c0ac9 osmtest/osmtest.c: handle timeouts in PR stress test +ebb2c84 opensm/osm_node_info_rcv.c: move p_physp declaration under code block +5c88113 opensm/osm_node_info_rcv.c: remove useless code line +866d939 opensm/osm_sa_vlarb_record.c: removed unused variable +15a8770 opensm/osm_sa_pkey_record.c: removing unused variable +8f002b7 opensm/osm_pkey.c: removing unused function +9c0fa2f opensm/osm_sminfo_rcv.c: removing unused variable +d3f060a opensm/osm_mtree.c: removing useless 'if' statement +bbef64a libvendor/osm_vendor_mlx_sa.c: remove useless "if" statement +2da02b5 libvendor/osm_vendor_ibumad_sa.c: remove useless "if" statement +435dde0 opensm/sa: simplify osm_mcmr_rcv_find_or_create_new_mgrp() function call +e7a872d opensm/osm_qos_policy.c: change a log message +ffbe7d0 opensm: Cause status of unicast routing attempt to propogate to callers of osm_ucast_mgr_process(). +673877a opensm: Make mcast_mgr_purge_tree() available outside osm_mcast_mgr.c. +b135687 opensm: Track the minimum value in the fabric of data VLs supported. +7c1ee64 opensm: Fix typo in routing section in man page and doc +d206011 opensm/osmtest/osmtest.c: inventory file parsing bugfix +e01121a fixed deprecated conversion from string constant to char* warning +a7ba101 opensm/main.c: Change size parameter in setvbuf call from 0 to BUFSIZ +2a92554 opensm/osmtest/osmt_multicast.c: Fix multicast flow failures on pkey validation +364e65b opensm/osm_dump.c: Fix FDR10 speed dumping +80e11b9 osmtest/osmt_multicast.c: Fix check of partial JoinState delete request - removing NonMember (o15.0.1.14) +78e1e4e osmtest/osmt_multicast.c: Fix check of BAD RATE when connecting to existing MGID (o15.0.1.13) +e94e972 osmtest/osmt_multicast.c: Fix first MGID=0 MC group creation case +daedad7 opensm/osmtest/osmt_multicast.c: Fix an unrealistic rate case +110ae10 opensm: fixed segfault in osm_destroy +d666205 opensm/osm_prtn.c: Fix typo in log message +f3ccb45 opensm: Remove duplicate definition of IB_MAD_STATUS_CLASS_MASK +917070c Move no_fallback_routing_engine from osm_subn_opt_t to osm_opensm_t. +d71a924 Free memory from osm_subn_opt_t when osm_subn_t destroyed +1b75fa4 Remove duplicate initialization of scatter_ports +b5f4570 Do not load configs from the default config file and specified config file +f24a089 Fix memleak and segfault +247d0d8 Fix IPoIB broadcast group creation on non-default Pkey +78d86bd opensm/osm_subnet.c: Trivial optimization to code flow in subn_verify_sl2vl +6bb41e3 opensm/complib/cl_atomic.h: Commentary changes +28ee7b9 opensm/osm_sa_class_port_info.c: Conditionalize setting of OSM_CAP2_IS_MCAST_TOP_SUPPORTED +29e59a2 Removed unused parameter "ib_mad_addr" from umad_reciever() +63ad0bb opensm/osmeventplugin/src/osmeventplugin.c: Output LIDs in decimal +17967c2 opensm/osm_switch.h: Fix commentary typo +6d3e223 opensm/osm_perfmgr.c: Enhance send error log message +08abcd4 opensm/osm_sa_mad_ctrl.c: Enhance send error log message +13f3e0f opensm/libvendor/osm_vendor_ibumad.c: Fix DR path printing on send timeouts +0dfd760 Fix suggest parentheses around operand warning +ad2dbf8 Support source-target-port-guid QoS policy configuration with ULP 'any' +080e3ad Support source-port-guid QoS policy configuration with ULP 'any' +49777a9 Fix typo in qos-ulps parsing comment +bcfe1b9 opensm/osm_torus.c: Use "OpenSM standard" error codes +683397d Fix use of GNU old-style field designator extension +719fcd4 Fix use of logical && with constant operand; switch to bitwise & +6bc87bd opensm: fix search common pkeys +a6ac5e3 opensm/osm_drop_mgr.c: GID out trap fix +7717505 opensm/osm_pkey_mgr.c: fix segfault when trying to access not allocated block +eb90efd include/opensm/osm_subnet.h: fix comment typos +0a315e3 opensm/include/iba/ib_types.h: fix comment typos and errors +125baa0 opensm/opensm.8.in: Fix cut 'n paste error +f0d14d2 opensm/osm_ucast_ftree.c: Fix some typos +841096f opensm: Fix opensm handover/relinquish corner case +bf420ac opensm/osm_helper.c: Fix commentary typo +2be999c opensm: fixed description in osm_routing_engine +cf3d185 gen_chlog.sh: fixed version ordering +8d49c5d opensm/osm_sa_guidinfo_record.c: Fix locking +8d764b8 Fix continous looping when clearing accum_pkeys table +32500a6 opensm/osm_link_mgr.c: Fix sending PortInfo Set for ports supporting extended speed +e24ff39 Fix Pkey enforcement configuration +041e47a Fix PathRecord reply to be the same for allow_both_pkeys ON and OFF +3c9b81d OpenSM/osm_prtn_config.c: Fix non-initialized pointer usage +28a40a2 Fix logging messages about op_vls and mtu mismatch +5e672a6 Fix memory leak on dfsssp_context_destroy() +3a87fa2 opensm/osm_req.c: fix first sweep m_key search algorithm +2108431 opensm: fix default cc_max_outstanding_mads assignment +dae214f opensm/osm_log.h: fix function documentation +1cf0bb9 opensm/osm_congestion_control.c: fix use-after-free found by coverity +ab5af49 opensm/osm_ucast_dfsssp.c : fix dereference before null check +b6a2f6a opensm/osm_ucast_dfsssp.c : fix dereference null return value +df66c1f opensm: Fix incorrect use of sizeof +c58a416 opensm/osm_dump.c: Fix output port on SL2VL table for non switch nodes +8af5bce opensm/osm_sa_slvl_record.c: Fix out port for CAs and routers +18a77f9 opensm/osm_torus.c: Fix memory leak +4ccbdc1 Fix -Wformat-security warnings with clang +c762389 Fix -Wtautological-compare warnings with clang +9e6dd8f Fix linker error with clang with -O < 2 +27da5eb opensm/include/osm_opensm.h: Fix commentary typo +b9067ba opensm/include/complib/cl_packon.h: Fix some commentary typos +e522f74 opensm/osm_perfmgr_db.c: Fix output error due to possible 32bit int overflow +6f94c4c opensm: Fix signed vs unsigned int comparison +2ea2cf3 opensm/osm_vl15intf.c: Fix commentary typo +f396936 opensm/complib/cl_atomic_osd.h: Fix long standing bug in cl_atomic_sub +606157c osmtest/osmt_multicast.c: Fix 02BF error +03d55f6 opensm/osm_sw_info_rcv.c: Fixed locking issue on osm_get_node_by_guid error +d7e4da1 opensm: Use IB_PATH_SELECTOR_EXACTLY rather than harded coded constant +735c86d opensm/osm_sa_informinfo.c: Fix infr_rcv_respond to only copy InformInfo +074ec5a osm_perfmgr.h: Cosmetic formatting changes +05af776 osm_drop_mgr.c: Add missing assert +de254f9 osm_opensm.c: Add missing ERR number +b1e58bb osmeventplugin: Fix compile warning +48c9f43 osm_prtn_config.c: Some changes to osm_prtn_config_parse_file +6321afa osm_subnet.c: Cosmetic change to config file output +aaa7b1e osm_db_files.c: Some minor fixes/improvements to osm_db_store +3cae07c osm_perfmgr.c: Cosmetic formatting changes +07b24a3 osm_port_info_rcv.c: Reset client reregister bit only on a response to +SET +84c9832 osm_ucast_ftree.c: replace assert with error return value +1c0f0 osm_sa_path_record.c: Improve ERR 1F1D to show the pkey specified in +PathQuery +94e99a8 osm_mcast_mgr.c: Removed mcast_mgr_purge_tree_node due to code +duplication +929934e osm_pkey.[h c]: Remove dead function osm_pkey_tbl_clear_accum_pkeys +bf42ec8 osm_mtree.c: Cosmetic change in osm_mtree_destroy function +7e96f32 osm_sm_state_mgr.c: Fix handling of polling retry number +78d87b9 osm_mcast_mgr.c: fixed missing error message number +a1df1d7 osm_state_mgr.c: Fix error print in state_mgr_check_tbl_consistency() +1441937 osm_mcast_mgr.c Add block number to error message +edd5e74 Redundant remove() function call during db file generation +dcb2df0 osm_link_mgr.c: Fix uninitialized value (physp0) +7160cce osm_link_mgr.c: Fix uninitialized value (physp0) +5aa4ea3 osm_link_mgr.c: fix uninitialized variable usage +cf014ec reduce log level for missing partition configuration file. +d3aeae8 osmtest: Handle other than default subnet prefix +181c863 osm_vendor_ibumad.c: Improve ERR 5430 log message +7321689 osm_state_mgr.c Add info to some error messages +0fc753d osm_sa_path_record.c: Eliminate extraneous space in 1F1A log message +18b3be9 Use trap number defines rather than actual trap numbers +270a700 osm_db_files.c: Add osm_db_domain_init failure handling into test +program +2b82c1c Handle memory allocation failure in osm_db_domain_init() +8284132 Use after free in osm_prtn_delete +e186b4a osm_sa_guidinfo_record.c: False duplicate GUID error messages +d95d461 osm_sa_informinfo.c Add attribute info to log messages +94789a8 osm_port_info_rcv.c Issue a log message if we cannot read the MKey of +a port +b67db2b osm_helper.c: Fix out-of-bounds read +ef86015 osm_db_files.c : Fix resource leak guid2lid parser +4a2d2d8 osm_subnet.c: Fix memory leak caused by commit +dc0760cb8088fbe079e19682570a884ba01e94ff +168eaeb osm_db_files.c: Fix memory leak when deleting entries from osm db +edfaddc osm_ucast_dfsssp.c: Fix some typos +e1804f4 dfsssp - add missing and change existing return values +d5ef9af update man page and usage explanation for --lfts_file +6fdd844 osm_sa_path_record.c: Fix rate setting issue in SA PR handling +4d6925c osm_vendor_ibumad.c: Fix explicit null derefenced issue found by +coverity +2d67f3e opensm/osm_db_pack.c: Removed uneeded asserts +09e1e7e libvendor/osm_pkt_randomizer.c: Fix broken compilation with vendor sim +6ff99aa complib/cl_event_wheel.c: Add print of num_regs in cl_event_wheel_dump +a3957f2 complib/cl_event_wheel.c: Fix duplicate error codes +ceb4041 Fix test scenario in cl_event_wheel +5824714 osmeventplugin/osmeventplugin.c: Add Mellanox copyright +7186965 osm_event_plugin.h: Add Mellanox copyright +cd3b715 osmtest: Make the "-guid" option's argument mandatory +0154977 complib/cl_event_wheel.h: Some cosmetic fixes +b78b1d5 osm_sa_mcmember_record.c validate_requested_mgid returning boolean +58ee065 osm_ucast_mgr.c: Fix extra copy in set_lft_block routine +5bf6e72 osm_ucast_mgr.c: Fix duplicated code for fallback routing engine +2e5966e osm_sm_state_mgr.c Trivial log changes +aed1675 Log changes related to event subscription and forwarding +75bba51 Minor log formatting changes +16b4dfc Some log changes +51f87ee Add attribute information to SA request error messages +abd47cc osm_sa_mcmember_record.c Reduce number of error messages the for same +event +ae9d7e7 Add trap details to notice log message +886de5d ib_types.h: Fix commentary typo +234401b cl_threadpool.h: Remove vestigial mention of cl_thread_pool_construct +e20f37a osm_subnet.c Remove empty syslog message +898e9a3 osm_ucast_lash.c: Cosmetic formatting change +6d0413d opensm/osm_ucast_dfsssp.c: Fix unused variable in update_mcft() +583d4cf osm_console_io.c: Handle another write-strings issue +cbbe385 osm_sa_mcmember_record.c: Improve debug log message in validate_modify +4f835ae osm_congestion_control.c: Simplify some code +23ebbe9 osm_log.c: Remove unneeded initialization in osm_log +550fdeb osm_lid_mgr.c: Some commentary fixes/updates +1d50845 opensm: Add configure output messages for several configure options +91384ed osm_lid_mgr.c: Fix a couple of commentary typos +a238800 ib_types.h: Trap 144 PortInfo:CapabilityMask2 changed bit definition +564b6eb opensm: fix dfsssp uninitialized value +395157e opensm/osm_node.h: Fix some commentary typos +664494a opensm: Fix issues causing const warnings for strings +8840f82 opensm/osm_switch.h: Cosmetic change +1a06167 opensm/man/osmtest.8: Add option for using full world path queries +eb43f7e opensm/osm_torus.c: clarify log messages on stale priv pointers +1f7ceb8 opensm/osmtest: fix debug build +dc0760c opensm/osm_subnet.c: Miscellaneous minor fixes +4924ea7 osmtest: Add support for full world path records back as option +6f1a67e osmtest/osmtest.c: Minor reordering of code in +osmtest_write_all_path_recs +e9556df opensm/osmtest.c: half_world_query when creating inventory file +81d3ea3 opensm/osm_subnet.c: Change default for perfmgr_query_cpi to FALSE +309317d opensm/osm_ucast_ftree.c: Fix invalid debug output message +d0a8532 opensm/perfmgr: clean up: break out redirect processing from +pc_recv_process +ef32e12 opensm: make osm_pr_rcv_get_end_points, osm_pr_rcv_process_pair, +osm_pr_rcv_process_half public +7723c07 opensm: make osm_get_path_params public +7ac18bb opensm/osm_perfmgr.c: Cosmetic changes +fe52571 opensm/perfmgr: add failed port guid to error message +8b67a1c opensm/osm_req.c: Better implementation of req_determine_mkey +37eecc8 opensm/libibvendor: osm_vendor_get_all_port_attr include sm_sl value +in port attribute struct +8ac930e osmtest/osmt_multicast.c: Fix IPoIB MC group recognition +5a33cc6 osmtest/osmt_multicast.c: Fix commentary typo +186a598 opensm/osm_sa_multipath_record.c: Fix commentary typo +4cb3751 osm_dump.c: Fix typo in dump_lid_matrix +a0deac6 opensm/osm_dump.c: Fix enhanced switch port 0 handling in +print_node_report +abbbe8a opensm/ib_types.h: Add missing IB_MPR_COMPMASK_SERVICEID define +505d48b opensm/osm_torus.c: Cosmetic formatting changes +dc44d48 opensm/ib_types.h: Add missing IB_PR_COMPMASK_SERVICEID define + +* Other less critical or visible bugs were also fixed. + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a stand-alone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configurations. The regression includes + multiple OpenSM dedicated tests. +* cluster testing - when we run OpenSM to setup a large cluster, perform + hand-off, reboots and reconnects, verify routing correctness and SA + responsiveness at the ULP level (IPoIB and SDP). + +5.1 osmtest + +osmtest is an automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, link and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* GUIDInfo Record: + - All GUIDInfoRecords in subnet are obtained + +* MultiPathRecord: + - Perform some compliant and noncompliant MultiPathRecord requests + - Validation is via status in responses and IB analyzer + +* PKeyTableRecord: + - Perform some compliant and noncompliant PKeyTableRecord queries + - Validation is via status in responses and IB analyzer + +* LinearForwardingTableRecord: + - Perform some compliant and noncompliant LinearForwardingTableRecord queries + - Validation is via status in responses and IB analyzer + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnecting/connecting ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries, both single and RMPP and + check for the rate of responses as well as their validity. + + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limited to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are run on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed in bringing the + fabric up. The resulting routing is verified to be correct as well. + +* LID Manager: + Using LMC = 2 the fabric is initialized with LIDs. Faults such as + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure that all + LIDs that could possibly be maintained are kept, as well as that all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes randomly join the 0xc000 group and eventually the + resulting routing is verified for completeness and adherence to + Up/Down routing rules. + +* osmtest: + The complete osmtest flow as described in the previous table is run on + the simulated fabrics. + +* Stress Test: + This flow merges fabric, LID and stability issues with continuous + PathRecord, ServiceRecord and Multicast Join/Leave activity to + stress the SM/SA during continuous sweeps. InformInfo Set/Delete/Get + were added to the test such both existing and non existing nodes + perform them in random order. + +5.3 OpenSM Regression + +Using a back-to-back or single switch connection, the following set of +tests is run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomly + dropping SMP packets, used to test OpenSM adaptation to an unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verifies that it + handles them gracefully. + +* SA Query Test: This test exhaustively checks the SA responses to all + possible single component mask. To do that the test examines the + entire set of records the SA can provide, classifies them by their + field values and then selects every field (using component mask and a + value) and verifies that the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setups of 16 to 32 nodes (or more if a beta site +is available). Each test is validated by running all-to-all ping through the IB +interface. The test procedure includes: + +* Cluster bringup + +* Hand-off between 2 or 3 SM's while performing: + - Node reboots + - Switch power cycles (disconnecting the SM's) + +* Unresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualified Software Stacks and Devices +--------------------------------------- + +OpenSM Compatibility +-------------------- +Note that OpenSM version 3.2.1 and earlier used a value of 1 in host +byte order for the default SM_Key, so there is a compatibility issue +with these earlier versions of OpenSM when the 3.2.2 or later version +is running on a little endian machine. This affects SM handover as well +as SA queries (saquery tool in infiniband-diags). + + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +-----------------------------------------|-------------------------- +The main stream Linux kernel | 2.6.x +OFED | 1.5,1.5.x +OFED | 1.4 +OFED | 1.3 +OFED | 1.2 +OFED | 1.1 +OFED | 1.0 + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Mellanox +Device | FW versions +------------------------------------|------------------------------- +InfiniScale | fw-43132 5.2.000 (and later) +InfiniScale III | fw-47396 0.5.000 (and later) +InfiniScale IV | fw-48436 7.1.000 (and later) +InfiniHost | fw-23108 3.5.000 (and later) +InfiniHost III Lx | fw-25204 1.2.000 (and later) +InfiniHost III Ex (InfiniHost Mode) | fw-25208 4.8.200 (and later) +InfiniHost III Ex (MemFree Mode) | fw-25218 5.3.000 (and later) +ConnectX IB | fw-25408 2.3.000 (and later) + +QLogic/PathScale +Device | Note +--------|----------------------------------------------------------- +iPath | QHT6040 (PathScale InfiniPath HT-460) +iPath | QHT6140 (PathScale InfiniPath HT-465) +iPath | QLE6140 (PathScale InfiniPath PE-880) +iPath | QLE7240 +iPath | QLE7280 + +Note 1: OpenSM does not run on an IBM Galaxy (eHCA) as it does not expose +QP0 and QP1. However, it does support it as a device on the subnet. + +Note 2: QoS firmware and Mellanox devices + +HCAs: QoS supported by ConnectX. QoS-enabled FW release is 2_5_000 and +later. + +Switches: QoS supported by InfiniScale III +Any InfiniScale III FW that is supported by OpenSM supports QoS. diff --git a/doc/opensm_release_notes_ibg2-2.0.1.txt b/doc/opensm_release_notes_ibg2-2.0.1.txt new file mode 100644 index 0000000..ea1f6a9 --- /dev/null +++ b/doc/opensm_release_notes_ibg2-2.0.1.txt @@ -0,0 +1,456 @@ + OpenSM Release Notes + ====================== + +Release: IBG2 +Repo: https://openib.org/svn/trunk/contrib/mellanox/gen2/src/userspace/management/osm +Version: 4956 +Date: Jan 2006 + +1 Overview +---------- +This document describes the contents of the OpenSM IBG2 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administrator, +and runs on top of OpenIB. + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliancy statements +4 Major Bug Fixes +5 Main Verification Flows +6 Qualified software stacks and devices + +1.1 New Features + +* New libs created during installation: libopensm - contains interface + to the logging and mads pool machanism. libosmcomp - contains + interface to the complib utilities. libosmvendor - contains + interface to sending/receiving MADs through the SMI or GSI over the + IBG2 driver. + +* Change building mechanism to use autotools. + +* Change directory stucturing of the OpenSM code according to libs: + osm/libvendor - for vendor specific files. osm/complib - for complib + specific files. osm/opensm - for opensm core files. osm/include + +* Semi-static LID assignment: OpenSM uses a cache file for storing all + LID assignments such that, even after a reboot, the LIDs do not + change. The static LID assignment is built on top of a new + "persistancy" layer that abstracts that actual database from its + usage. The implemented database is based on files stored under + /var/cache/osm (this location can be overriden via the environment + variable OSM_CACHE_DIR). Other implementations can use LDAP for + example. Note that a standby SM ignores its previously assigned LIDs + when it becomes the master, and the previous master LID settings are + used. + +* Irresponsive Port Handling: A port that does not respond to SM + queries will be queried upon future light or heavy sweeps, and if + then it responds, it will be setup immediately. Previously such a + port was queried only upon a heavy sweep. + +* Leaf Switch Port HOQ: A different maximal head of queue life time is + assigned to switch ports connected to HCAs such that a bad chipset + or defective hardware will not cause back presure on the fabric. + +* OSM_TMP_DIR: This is a new environment variable controlling the + directory where subnet.lst, osm.fdbs and osm.mcfdbs files are + created. The deafult is still /tmp. + +* Configuration Options cache file: OpenSM was enhanced to provide a + means to modify all its internal configuration options, including + the ones that oreviously were only available under osmsh. The new + file is located under the cache directory and is named + opensm.opts. To automatically create this file OpenSM supports a new + flag: `-c'. The file is generated with the current set of options + used by OpenSM. + +* Previously, under extreme load conditions, when OpenSM got + overloaded with SA queries during which the incoming messages queue + also grew, delays were incurred in message response-time beyond the + expected. This new version of OpenSM has been enhanced such that, + under such a case, incoming new SA queries are returned with a + RESOURCE_BUSY status (per the InfiniBand Architecture + Specification). + +* Kill -HUP: If the OpenSM process (ps -efww |grep opensm.bin) gets a + SIGHUP (sent by kill -HUP), it will start a heavy sweep as if a trap + was received or a change in topology was observed by the SM. + +1.2 Software Dependencies + +OpenSM depends on the installation of either OpenIB gen2 (e.g. IBG2 +distribution), OpenIB gen1 (r.g. IBGD distribution) or Mellanox VAPI +stacks. The qualified driver versions are provided in Table 2, +"Qualified IB Stacks". + +1.4 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Partition/Pkey policy support: + OpenSM does not provide means to set poartitions. + +* IB "trusted" concept is unsupported: + Queries that should be classified according to the trustworthiness of + their sources will not be handled correctly. + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +* NPTL problem under Red Hat 9.0, Red Hat AS 3.0: + There are some bugs (pthread conditional wait missing events) + with thread handling when using the dynamic Native POSIX Thread + Library (/lib/tls) of Red Hat 9.0 & Red Hat AS 3.0 OSs. To overcome + that, OpenSM installation places wrapper scripts named opensm and + osmtest in the /usr/bin directory, which preload the standard libc + and libptherad before invoking the executables. If using the osm + package, a similar workaround is possible by putting the LD_PRELOAD + setting in .tclshrc file, for example: set env (LD_PRELOAD) + "/lib/libc.so.6:/lib/libpthread.so.0" + +* InformInfo failure over IBMGT: + OpenSM might not respect a valid InformInfo unsubscribe request when + running over Mellanox's IBMGT user level MAD interface (not on + IBGD). This will be fixed in the next release. + +* No "port down" event handling: + Changing the switch port through which OpenSM connects to the IB + fabric may cause wrong operation. Please restart OpenSM whenever + such a connectivity change is made. + +3 Unsupported IB Compliancy Statements +-------------------------------------- +The following section lists all the IB compliancy statements which +OpenSM does not support. Please refer to IB specification for detailed +information on each compliancy statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key is matching or + silently drop the packet if M_Key is not matching. + +* C15-0.1.23.1 (Authentication): + PortInfoRecords shall always be provided with the M_Key component + set to 0, except in the case of a trusted request, in which case the + actual M_Key component contents shall be provided. + +* C15-0.1.23.2 (Authentication): + P_KeyTableRecords and ServiceAssociationRecords shall only be + provided in responses to trusted requests. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to + 0, except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-37.1.2 (Handover): + Priority should be kept in non-volatile memory. + +* C14-38.1.1 (Handover): + Support AttributeModifier values in SubnSet(SMInfo). If the state + transition requested is invalid - return with status code 7. + +* C14-24.1.1.5 and C14-62.1.1.22 (Initialization): + GUIDInfo - SM should enable assigning Port GUIDInfo. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.11 (Initialization): + PortInfo:VLHighLimit should match the configured VLArb on the port. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:P_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.13 (Multicast): + If a Join request using unrealistic parameters is received, return + ERR_REQ_INVALID. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignoring SL and FlowLabelTclass). + +* C15-0.1.11 (SA-Query): + Query response should use only base LIDs (as the feature has not + been qualified yet). + +* C15-0.1.19 (SA-Query): + Respond to SubnGetMulti(MultiPathRec) + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.8.7 (SA-Query): + SubnAdmGetMulti SubnAdmGetMultiResp - Only in case of a MultiPath. + +* C15-X.Y.Z.W (SA-Query): + SubAdmGet/GetTable GUIDInfo - support GUIDInfo setting/retrieval. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Major Bug Fixes +----------------- + +The following list of bugs were fixed. Note that other less critical +or visible bugs were also fixed. + +* PortInfo query was not matching on several fields. These fields + were added to teh comparison function. + +* OpenSM would crash during exit flow if run with "-o" flag A fix to + the complib global timer destruction sequence solves this problem. + +* OpenSM does not complete the sweep if the driver fails to send a MAD + Counting the number of outstanding MADs the SM waits for response + for was enhanced to take this acse into acount + +* OpenSM was not compliant to the spec statement: C14.62.1.1 Table 183 + p870 l34: ".., the SM shall ensure that one of the P_KeyTable + entries in every node contains either the value 0xFFFF (the default + P_Key, full membership) or the value 0x7FFF (the default P_Key, + partial membership)." OpenSM sets the PKey table with an entry of + 0xffff in case there is no such entry or 0x7fff entries on that + port. Switch ports are ignored. + +* If the SA is queried with IB_PIR_COMPMASK_BASELID and base_lid of 0, + the SA was incorrectly returning all the ports. Fix: do not ignore base + lid of 0 as a query criteria. + +* When provided a PathRecord query with num_paths = 0 the SM should + assuem num_paths = 1. Fix: in the PathRecord query code. + +* PathRecord query returned a deleted multicast groups info. Fix: + Added a check for multicast group state to avoid such cases. + +* LinkRecord query provided wrong results. Fix: in query code. + +* PathRecord did not honor PacketLifeTime component. Fix: Added the + check for packet lifetime matching. + +* Multicast and other registration hapenning all the time on the + cluster. Fix: OpenSM was sending false "client-re-registration" + messages (in PortInfo). + +* On some heavy load cases OpenSM would consume 100% CPU time. Fix: an + endless loop in timer implementation that would happen under rare + heavy CPU load cases. + +* OpenSM hangs during LID assignment phase. Fix: Some condition that + cause that was fixed. + +* OpenSM core dump in the middle of sweep. Fix: A memory range + overflow write was found by valgrind and fix. + +* OpenSM core dump as result fo PathRecord query with no results. Fix: + A memory free on non allocated memory was fixed. + +* OpenSM sweep algorithm confused by a timing race. Fix: A significant + race conditionin the SM sweep algorithm was found and fixed. + +* OpenSM deadlock due to out of order SMINfo and NodeInfo MAD + received. Fix: A fix in lock ordering resolves this issue. + +* TrapRepress sent even if not a master. Fix: in trap receiver. + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a standalone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configuration. The regression includes + multiple OpenSM dedicated tests +* cluster testing - when we run OpenSM to setup large cluster, perform + handoff, reboots and reconnects, verify routing correctness and SA + responsiveness at teh ULP level (IPoIB and SDP) + +5.1 osmtest + +OsmTest is the main automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service. + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnect/connect ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries both single and RMPP and + check for the rate of responses as well as their validity. + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limitted to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are running on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed bringing the + fabric up. The reulting routing is verified to be correct too. + +* LID Manager: + Using LMC = 2 the fabric is being initialized with LIDs. Faults like + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are being + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure all + LIDs that could possibly be maintained are kept, as well as all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes are randomly joining the 0xc000 group and eventually the + resulting routing is verified for completness and adherance to + Up/Down routing rules. + +* OsmTest: + The complete osmtest flow as desribed in previous table is run on + the simulated fabrics. + +5.3 OpenSM Regression + +Using a back to back or single switch connection the following set of +tests are run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomlly + droping SMP packets used to test OpenSM adaptation to unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verify it does + handle them gracefully. + +* SA Query Test: This test exhoustivly checks the SA responses to all + possible single component mask. To do that the test examine the + entire set of records the SA can provide, classify them by their + field values and then select every field (using component mask and a + value) and verify the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setup of 16 to 32 nodes (or more if beta site +is available). Each test is validated by running all-to-all ping through IB +interface. The test procedure includes: + +* Cluster bringup + +* Handoff between 2 or 3 SM's while performing + - Node reboots + - Switch power cycles (disconneting the SM's) + +* Irresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualification +---------------- + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +----------------------------------------|-------------------------- +VAPI (Mellanox Infininband HCA Driver) | 3.2 and later +OpenIB Gen1 (IBGD distribution) | 1.8.0 +OpenIB Gen2 (IBG2 distribution) | 1.0 + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Device | FW versions +--------|----------------------------------------------------------- +MT43132 | InfiniScale - fw-43132 5.2.0 (and later) +MT47396 | InfiniScale III - fw-47396 0.5.0 (and later) +MT23108 | InfiniHost - fw-23108 3.3.2 +MT25204 | InfiniHost III Lx - fw-25204 1.0.1 +MT25208 | InfiniHost III Ex (InfiniHost Mode) - fw-25208 4.6.2 (and later) +MT25208 | InfiniHost III Ex (MemFree Mode) - fw-25218 5.0.1 (and later) + +Other vendors HCAs not yet verified but eHCA is known to be discovered and configured +correctly. diff --git a/doc/opensm_release_notes_openib-1.2.1.txt b/doc/opensm_release_notes_openib-1.2.1.txt new file mode 100644 index 0000000..02caaf8 --- /dev/null +++ b/doc/opensm_release_notes_openib-1.2.1.txt @@ -0,0 +1,460 @@ + OpenSM Release Notes + ====================== + +Version: OpenFabric Enterprise Distribution (OFED) 1.0 +Repo: https://openib.org/svn/gen2/branches/1.0/src/userspace/management/osm +Version: 7992 +Date: Jun 2006 + +1 Overview +---------- +This document describes the contents of the OpenSM OFED 1.0 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administrator, +and runs on top of OpenIB. The OpenSM version for this release +is openib-1.2.1 + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliance statements +4 Major Bug Fixes +5 Main Verification Flows +6 Qualified software stacks and devices + +1.1 New Features + +* SA GuidInfoRecord support + +* Default for maxsmps changed: + Control the number of SMP sent in parallel and thus shorten the + fabric initialization time. + +* osmtest/osmt_slvl_vl_arb.c: + Output file name changed from vl_arbs.txt to qos.txt + +* Support new IBTA Errata IsPortInfoCapMaskMatchSupported: + This new capability of the SA enables matching of individual port + capability bits dramatically reducing the query size for agents like + the SRP initiator query for finding SRP targets. + +* Honor guid2lid when coming out of standby: + This change adds an option to the opensm that forces it to honor the + guid2lid file given when it comes out of Standby state. Currently, + when opensm comes out of Standby state, it ignores the guid2lid file + it read, and honors only the lids defined on the ports themselves. + +* Add guid to opensm opts + This enables the port on which to run the SM to be defined through + the configuration file as well as through the command line. + +* PPC support: + No PPC QA was performed. + +1.2 Software Dependencies + +OpenSM depends on the installation of either OFED 1.0, +OpenIB gen2 (e.g. IBG2 distribution), OpenIB gen1 (e.g. IBGD +distribution) or Mellanox VAPI stacks. The qualified driver versions +are provided in Table 2, "Qualified IB Stacks". + +1.4 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +are listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Partition/Pkey policy support: + OpenSM does not provide means to set partitions. + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +* No "port down" event handling: + Changing the switch port through which OpenSM connects to the IB + fabric may cause incorrect operation. Please restart OpenSM whenever + such a connectivity change is made. + +* Changing connections during SM operation: + Under some conditions the SM can get confused by a change in + cabling (moving a cable from one switch port to the other) and + momentarily see this as having the same GUID appear connected + to two different IB ports. Under some conditions, when the SM fails to + get the corresponding change event it might mistakenly report this case + as a "duplicated GUID" case and abort. It is advisable to double-check + the syslog after each such change in connectivity and restart + OpenSM if it has exited. + +* No QoS support: + No SL2VL and VLArbitration setting is performed by the SM. + +3 Unsupported IB Compliance Statements +-------------------------------------- +The following section lists all the IB compliance statements which +OpenSM does not support. Please refer to the IB specification for detailed +information regarding each compliance statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key matches, or + silently drop the packet if M_Key does not match. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to 0, + except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-37.1.2 (Handover): + Priority should be kept in non-volatile memory. + +* C14-24.1.1.5 and C14-62.1.1.22 (Initialization): + GUIDInfo - SM should enable assigning Port GUIDInfo. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.11 (Initialization): + PortInfo:VLHighLimit should match the configured VLArb on the port. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:P_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.13 (Multicast): + If a Join request using unrealistic parameters is received, return + ERR_REQ_INVALID. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignoring SL and FlowLabelTclass). + +* C15-0.1.11 (SA-Query): + Query response should use only base LIDs (as the feature has not + been qualified yet). + +* C15-0.1.19 (SA-Query): + Respond to SubnGetMulti(MultiPathRec) + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.8.7 (SA-Query): + SubnAdmGetMulti SubnAdmGetMultiResp - Only in case of a MultiPath. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Major Bug Fixes +----------------- + +The following is a list of bugs that were fixed. Note that other less critical +or visible bugs were also fixed. + +* Eliminate error on active -> active port state transition + SM may transition port from armed to active but in the meantime, due + to passing a data packet with active enable set, the port may + already have transitioned to active. Active -> active port state + transition is indicated as an error but it isn't really an error so + don't indicate error in the osm log. + +* Routing not set for the first LID in the last LFT block: + Fix: osm_switch.c: In osm_switch_get_fwd_tbl_block last block calculation + +* Corrupted guid2lid file causes OpenSM exit + Fix: exit only if exit_on_fatal option is set (the default) + +* OpenSM was causing Client-Re-Registration continuously: + The SM was storing the response PortInfo.ClientReRegstration + bit and using it during next Set(PortInfo). Fix: clear the bit on + receive. + +* Multicast Query Selectors MTU, rate, and PacketLifeTime were not exact + +* Try not to recognize port change as duplicated GUID + This fix solves the issue of a port move during heavy sweep + being recognized as a duplicated guid. Fix: If the SM sees what + seems to be a duplicated guid, but it also received an indication + for immediately forcing another heavy sweep (for example, as a + result of receiving trap 128) then it shouldn't issue a duplicated + guid error (and possibly exit), but should just ignore this and + continue. This means that only if the SM recognizes such a + duplication in a stable subnet that it'll issue the error (and + possibly exit). + +* Set PKey table on switch ports not supporting it: + OpenSM attempts to set pkey table entries on external switch ports + even if the switch declares a PartitionEnforcementCap of zero. The + consequence is ERR 4108. Fix: Observe PartitionEnforcementCap of zero. + +* Incorrect MCMemberRecord Get/GetTable in trusted mode: + This change fixes the retrieval of the MCMember records according to + Errata MGTWG3280. This fix provide means to obtain all the group + members by issuing a trusted GetTable query. + +* Trusted MCMemberRecord query was not recognized as trusted: + Trust is checked by comparing the request SM_Key field to the SM + SM_Key. The bug was in looking up the SM_Key from the response not + the request. + +* Port left in down state after setting MTU or OpVLs on its neighbor: + In case of a difference between the MTU of two ports, only the port + with the higher MTU was set to down. Its remote port was written in + the DB as in the ACTIVE state although its real status was INIT. + Because of this, the SM didn't try to move the remote port to + ACTIVE. + +* Atomic counters used throughout the code were broken: + A new implementation has been provided. + +* MC Group creation with "less than" MTU ignores the requester MTU: + When requesting to create an MC group with MTU(rate) selector 1 + (meaning less than rate specified), the MC group is created with + MTU(rate) requested - 1. This is without checking the MTU(rate) of + the port requesting the creation of the multicast group. This means + that if, for example, port with MTU=2 sends a request for MC group + creation with MTU selector=1 and MTU=5, Opensm will try to create a + MC group with MTU=4, and fail, since the port capabilities are not + realizable. Fix: creation of the MC group with MTU(rate) also takes + into account the MTU(rate) of the port requesting the creation. + +* MC Group join does not validate that the joining port's capabilities + match those of the MC. Fix: Add verification of endport physical + capability to join MC group. + +* ClientReRegistration not sent to ports discovered after first sweep: + PortInfo sent with ClientReRegistration bit turned on only during + the first sweep after becoming Master. This doesn't cover all cases + where ClientReRegistration should be turned on. Fix: turn on this + bit also on new ports it discovers (in cases of subnet merging, for + example). + +* segfault during a report on deleted multicast group: + osm_mcast_mgr.c, executing the line of code: + osm_mgrp_send_delete_notice( p_mgr->p_subn, p_mgr->p_log, p_mgrp ); + caused segmentation fault since the handle p_mgrp was already + deleted while the function was called. Fix: inserted the line above + into the protected section. + +* segfault in osm_get_gid_by_mad_addr: + The affected flows are ports and multicast joins. + +* segfault in LID manager: + Handle NULL p_rem_physp can validly be NULL when the remote SMA is + not responding (but physical link is up). + +* segfault in Up/Down routing engine + + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a stand-alone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configurations. The regression includes + multiple OpenSM dedicated tests. +* cluster testing - when we run OpenSM to setup a large cluster, perform + hand-off, reboots and reconnects, verify routing correctness and SA + responsiveness at the ULP level (IPoIB and SDP). + +5.1 osmtest + +osmtest is an automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service. + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* GUIDInfo Record: + - All GUIDInfoRecords in subnet are obtained + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnecting/connecting ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries, both single and RMPP and + check for the rate of responses as well as their validity. + + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limited to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are run on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed in bringing the + fabric up. The resulting routing is verified to be correct, too. + +* LID Manager: + Using LMC = 2 the fabric is initialized with LIDs. Faults such as + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure that all + LIDs that could possibly be maintained are kept, as well as that all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes randomly join the 0xc000 group and eventually the + resulting routing is verified for completeness and adherence to + Up/Down routing rules. + +* osmtest: + The complete osmtest flow as described in the previous table is run on + the simulated fabrics. + +* Stress Test: + This flow merges fabric, LID and stability issues with continuous + PathRecord, ServiceRecord and Multicast Join/Leave activity to + stress the SM/SA during continuous sweeps. + +5.3 OpenSM Regression + +Using a back-to-back or single switch connection, the following set of +tests is run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomly + dropping SMP packets, used to test OpenSM adaptation to an unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verifies that it + handles them gracefully. + +* SA Query Test: This test exhaustively checks the SA responses to all + possible single component mask. To do that the test examines the + entire set of records the SA can provide, classifies them by their + field values and then selects every field (using component mask and a + value) and verifies that the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setups of 16 to 32 nodes (or more if a beta site +is available). Each test is validated by running all-to-all ping through the IB +interface. The test procedure includes: + +* Cluster bringup + +* Hand-off between 2 or 3 SM's while performing + - Node reboots + - Switch power cycles (disconnecting the SM's) + +* Unresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualification +---------------- + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +-----------------------------------------|-------------------------- +OFED | 1.0 +OpenIB Gen2 (IBG2 distribution) | 1.0 +OpenIB Gen1 (IBGD distribution) | 1.8.0 +VAPI (Mellanox InfiniBand HCA Driver) | 3.2 and later + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Mellanox +Device | FW versions +--------|----------------------------------------------------------- +MT43132 | InfiniScale - fw-43132 5.2.0 (and later) +MT47396 | InfiniScale III - fw-47396 0.5.0 (and later) +MT23108 | InfiniHost - fw-23108 3.3.2 +MT25204 | InfiniHost III Lx - fw-25204 1.0.1 +MT25208 | InfiniHost III Ex (InfiniHost Mode) - fw-25208 4.6.2 (and later) +MT25208 | InfiniHost III Ex (MemFree Mode) - fw-25218 5.0.1 (and later) + +QLogic/PathScale +Device | Note +--------|----------------------------------------------------------- +iPath | QHT6040 (PathScale InfiniPath HT-460) +iPath | QHT6140 (PathScale InfiniPath HT-465) +iPath | QLE6140 (PathScale InfiniPath PE-880) + +Note: OpenSM does not run on an IBM Galaxy (eHCA) as it does not expose +QP0 and QP1. However, it does support it as a device on the subnet. diff --git a/doc/opensm_release_notes_openib-2.0.5.txt b/doc/opensm_release_notes_openib-2.0.5.txt new file mode 100644 index 0000000..51bd21c --- /dev/null +++ b/doc/opensm_release_notes_openib-2.0.5.txt @@ -0,0 +1,486 @@ + OpenSM Release Notes 2.0.5 + ============================ + +Version: OpenFabrics Enterprise Distribution (OFED) 1.1 +Repo: https://openib.org/svn/gen2/branches/1.1/src/userspace/management/osm +Version: 9535 (openib-2.0.5) +Date: October 2006 + +1 Overview +---------- +This document describes the contents of the OpenSM OFED 1.1 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. The OpenSM version for this release +is openib-2.0.5 + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliance statements +4 Major Bug Fixes +5 Main Verification Flows +6 Qualified software stacks and devices + +1.1 Major New Features + +* Partition manager: + The partition manager provides a means to setup multiple partitions + by providing a partition policy file. For details please read the + doc/partition-config.txt or the opensm man page. + +* Basic QoS Manager: + Provides a uniform configuration of the entire fabric with values defined + in the OpenSM options file. The options support different settings for + CAs, Switches, and Routers. Note that this is disabled by default and + using -Q enables QoS fabric setup. + +* Loading pre-routes from a file: + A new routing module enables loading pre-routes from a file. + To use this option you should use the command line options: + "-R file --U " or + "--routing_engine file --ucast_file " + For more information refer to the file doc/modular-routing.txt + or the opensm man page. + +* SA MultiPathRecord support: + The SA can now handle requests for multiple PathRecords in one query. + This includes methods SA GetMulti/GetMultiResp and dual sided RMPP. + +* PPC64 is now QAed and supported + +* Support LMC > 0 for Switch Enhanced Port 0: + Allows enhanced switch port 0 (ESP0) to have a non zero + LMC. Use the configured subnet wide LMC for this. Modifications were + necessary to the LID assignment and routing to support this. + Also, added an option to the configuration to use LMC configured for + subnet for enhanced switch port 0 or set it to 0 even if a non zero + LMC is configured for the subnet. The default is currently the + latter option. The new configuration option is: lmc_esp0 + +1.2 Minor New Features: + +* IPoIB broadcast group configuration: + It is now possible to control the IPoIB broadcast group parameters + (MTU, rate, SL) through the partitions configuration file. + +* Limiting OpenSM log file size: + By providing the command line option: "-L " or + "--log_limit " the user can limit the generated log + file size. When specified, the log file will be truncated upon reaching + this limit. + +* Favor 1K MTU for Tavor (MT23108) HCA + In cases where a PathRecord or MultiPathRecord is queried and the + requestor does not specify the MTU or does specify it in a way + that allows for MTU to be 1K and one of the path ends in a Tavor, + limit the MTU to 1K max. + +* Man pages: + Added opensm.8 and osmtest.8 + +* Leaf VL stall count control: + A new parameter (leaf_vl_stall_count) for controlling the number of + sequential packets dropped on a switch port driving a HCA/TCA/Router + that cause the port to enter the VLStalled state was added to the + options file. + +* SM Polling/Handover defaults changed + The default SMInfo polling retries was decreased from 18 to 4 + which reduces the default handover time from 3 min to 40 seconds. + +1.3 Library API Changes + +* cl_mem* APIs deprecated in complib: + These functions are now considered as deprecated and should be + replaced by direct calls to malloc, free, memset, etc. + +* osm_log_init_v2 API added in libopensm: + Supports providing the new option for log file truncation. + +1.4 Software Dependencies + +OpenSM depends on the installation of either OFED 1.1, OFED 1.0, +OpenIB gen2 (e.g. IBG2 distribution), OpenIB gen1 (e.g. IBGD +distribution), or Mellanox VAPI stacks. The qualified driver versions +are provided in Table 2, "Qualified IB Stacks". + +1.5 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +are listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +* No "port down" event handling: + Changing the switch port through which OpenSM connects to the IB + fabric may cause incorrect operation. Please restart OpenSM whenever + such a connectivity change is made. + +* Changing connections during SM operation: + Under some conditions the SM can get confused by a change in + cabling (moving a cable from one switch port to the other) and + momentarily see this as having the same GUID appear connected + to two different IB ports. Under some conditions, when the SM fails to + get the corresponding change event it might mistakenly report this case + as a "duplicated GUID" case and abort. It is advisable to double-check + the syslog after each such change in connectivity and restart + OpenSM if it has exited. + +3 Unsupported IB Compliance Statements +-------------------------------------- +The following section lists all the IB compliance statements which +OpenSM does not support. Please refer to the IB specification for detailed +information regarding each compliance statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key matches, or + silently drop the packet if M_Key does not match. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to 0, + except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-24.1.1.5 and C14-62.1.1.22 (Initialization): + GUIDInfo - SM should enable assigning Port GUIDInfo. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:P_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignores SL and FlowLabelTClass). + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Major Bug Fixes +----------------- + +The following is a list of bugs that were fixed. Note that other less critical +or visible bugs were also fixed. + +* "Broken" fabric (duplicated port GUIDs) handling improved + Replace assert with a real check to handle invalid physical port + in osm_node_info_rcv.c which could occur on a broken fabric + +* SA client synchronous request failed but status returned was IB_SUCCESS + even if there was no response. + There was a missing setting of the status in the synchronous case. + +* Memory leak fixes: + 1. In libvendor/osm_vendor_ibumad.c:osm_vendor_get_all_port_attr + 2. In libvendor/osm_vendor_ibumad_sa.c:__osmv_sa_mad_rcv_cb + 3. On receiving SMInfo SA request from a node that does not share a + partition, the response mad was allocated but never free'd + as it was never sent. + +* Set(InformInfo) OpenSM Deadlock: + When receiving a request with unknown LID + +* PathRecord to inconsistent multicast destination: + Fix the return error when multicast destination is not consistently + indicated. + +* Remove double calculation of reversible path + In osm_sa_path_record.c:__osm_pr_rcv_get_lid_pair_path a PathRecord + query used to double check if the path is reversible + +* Some PathRecord log messages use "net order": + Fix GUID net to host conversion in some osm_log messages + +* DR/LID routed SMPs direction bit handling: + osm_resp.c:osm_resp_make_resp_smp, set direction bit only if direct + routed class. This bug caused two issues: + 1. Get/Set responses always had direction bit set. + 2. Trap represses never had direction bit set. + The direction bit needs setting in direct routed responses and it + doesn't exist in LID routed responses. + osm_sm_mad_ctrl.c: did not detect the "direction bit" correctly. + +* OpenSM crash due to transaction lookup (interop with Cisco stack) + When a wire TID that maps to internal TID of zero (after applying + mask) was received the lookup of the transaction was successful. + The stale transaction pointed to "free'd" memory. + +* Better handling for Path/MultiPath requests for raw traffic + +* Wrong ProducerType provided in Notice Reports: + When formating an SM generated report, the ProducerType was using + CL_NTOH32 which can not be used to format a 24bit network order number. + +* OpenSM break on PPC64 + complib: Fixed memory corruption in cl_pool.c:cl_qcpool_init. This + affected big endian 64-bit architectures only. + +* Illegal Set(InformInfo) was wrongly successful in updating the SMDB + osm_sa_informinfo.c: In osm_infr_rcv_process_set_method, if sending + error, don't call osm_infr_rcv_process_set_method + +* RMPP queries of InformInfoRecord fail + ib_types.h: Pad ib_inform_info_record_t to be modulo 8 in size so + that attribute offset is calculated properly + +* Returning "invalid request" rather than "unsupported method/attribute" + In these cases, a noncompliant response was being provided. + +* Noncompliant response for SubnAdmGet(PortInfoRecord) with no match + osm_pir_rcv_process, now returns "SA no records error" for SubnAdmGet + with 0 records found + +* Noncompliant non base LID returned by some queries: + The following attributes used to return the request LID rather than + its base LID in responses: PKeyTableRecord, GUIDInfoRecord, + SLtoVLMappingTableRecord, VLArbitrationTableRecord, LinkRecord + +* Noncompliant SubnAdmGet and SubnAdmGetTable: + Mixing of error codes in case of no records or multiple records + fixed for the attributes: + LinearForwardingTableRecord, GUIDInfoRecord, + VLArbitrationTableRecord, LinkRecord, PathRecord + +* segfault in InformInfo flows + Under stress concurrent Set/Delete/Get flows. Fixed by adding + missing lock. + +* SA queries containing LID out if range did not return ERR_REQ_INVALID + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a stand-alone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configurations. The regression includes + multiple OpenSM dedicated tests. +* cluster testing - when we run OpenSM to setup a large cluster, perform + hand-off, reboots and reconnects, verify routing correctness and SA + responsiveness at the ULP level (IPoIB and SDP). + +5.1 osmtest + +osmtest is an automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, link and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* GUIDInfo Record: + - All GUIDInfoRecords in subnet are obtained + +* MultiPathRecord: + - Perform some compliant and noncompliant MultiPathRecord requests + - Validation is via status in responses and IB analyzer + +* PKeyTableRecord: + - Perform some compliant and noncompliant PKeyTableRecord queries + - Validation is via status in responses and IB analyzer + +* LinearForwardingTableRecord: + - Perform some compliant and noncompliant LinearForwardingTableRecord queries + - Validation is via status in responses and IB analyzer + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnecting/connecting ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries, both single and RMPP and + check for the rate of responses as well as their validity. + + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limited to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are run on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed in bringing the + fabric up. The resulting routing is verified to be correct as well. + +* LID Manager: + Using LMC = 2 the fabric is initialized with LIDs. Faults such as + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure that all + LIDs that could possibly be maintained are kept, as well as that all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes randomly join the 0xc000 group and eventually the + resulting routing is verified for completeness and adherence to + Up/Down routing rules. + +* osmtest: + The complete osmtest flow as described in the previous table is run on + the simulated fabrics. + +* Stress Test: + This flow merges fabric, LID and stability issues with continuous + PathRecord, ServiceRecord and Multicast Join/Leave activity to + stress the SM/SA during continuous sweeps. InformInfo Set/Delete/Get + were added to the test such both existing and non existing nodes + perform them in random order. + +5.3 OpenSM Regression + +Using a back-to-back or single switch connection, the following set of +tests is run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomly + dropping SMP packets, used to test OpenSM adaptation to an unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verifies that it + handles them gracefully. + +* SA Query Test: This test exhaustively checks the SA responses to all + possible single component mask. To do that the test examines the + entire set of records the SA can provide, classifies them by their + field values and then selects every field (using component mask and a + value) and verifies that the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setups of 16 to 32 nodes (or more if a beta site +is available). Each test is validated by running all-to-all ping through the IB +interface. The test procedure includes: + +* Cluster bringup + +* Hand-off between 2 or 3 SM's while performing: + - Node reboots + - Switch power cycles (disconnecting the SM's) + +* Unresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualification +---------------- + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +-----------------------------------------|-------------------------- +OFED | 1.1 +OFED | 1.0 +OpenIB Gen2 (IBG2 distribution) | 1.0 +OpenIB Gen1 (IBGD distribution) | 1.8.0 +VAPI (Mellanox InfiniBand HCA Driver) | 3.2 and later + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Mellanox +Device | FW versions +--------|----------------------------------------------------------- +MT43132 | InfiniScale - fw-43132 5.2.0 (and later) +MT47396 | InfiniScale III - fw-47396 0.5.0 (and later) +MT23108 | InfiniHost - fw-23108 3.3.2 (and later) +MT25204 | InfiniHost III Lx - fw-25204 1.0.1i (and later) +MT25208 | InfiniHost III Ex (InfiniHost Mode) - fw-25208 4.6.2 (and later) +MT25208 | InfiniHost III Ex (MemFree Mode) - fw-25218 5.0.1 (and later) + +QLogic/PathScale +Device | Note +--------|----------------------------------------------------------- +iPath | QHT6040 (PathScale InfiniPath HT-460) +iPath | QHT6140 (PathScale InfiniPath HT-465) +iPath | QLE6140 (PathScale InfiniPath PE-880) + +Note: OpenSM does not run on an IBM Galaxy (eHCA) as it does not expose +QP0 and QP1. However, it does support it as a device on the subnet. diff --git a/doc/opensm_release_notes_openib-3.0.13.txt b/doc/opensm_release_notes_openib-3.0.13.txt new file mode 100644 index 0000000..b48f148 --- /dev/null +++ b/doc/opensm_release_notes_openib-3.0.13.txt @@ -0,0 +1,535 @@ + OpenSM Release Notes 3.0.13 + ============================= + +Version: OpenFabrics Enterprise Distribution (OFED) 1.2 +Repo: git://git.openfabrics.org/~ofed_1_2/management.git (release) + git://git.openfabrics.org/~halr/management.git (development) +Date: June 2007 + +1 Overview +---------- +This document describes the contents of the OpenSM OFED 1.2 release. +OpenSM is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. The OpenSM version for this release +is openib-3.0.13 + +This document includes the following sections: +1 This Overview section (describing new features and software + dependencies) +2 Known Issues And Limitations +3 Unsupported IB compliance statements +4 Major Bug Fixes +5 Main Verification Flows +6 Qualified software stacks and devices + +1.1 Major New Features + +* Routing improvements + Two additional routing algorithms have been added in addition to + performance improvements to the existing routing algorithms. The + two new routing algorithms are FAT tree and LASH. See the + opensm man page for additional details. + +* SA Optional Record support now "virtually" complete + Includes SA InformInfo improvements and InformInfoRecord support in + addition to support for the remaining SA optional records + (MulticastForwardingTableRecord, SwitchInfoRecord). Also, SMInfoRecord + support was improved to include all SMs found. + +* SA database dump/restore + OpenSM now includes the ability to dump and restore the SA database. + This allows for all SA registrations (multicast, services, and events) + to be saved and restored later. + + In verbose mode, OpenSM will dump SA DB (existing multicast groups, + services and InformInfo) into dump file which named "opensm-sa.dump" + and located under standard OpenSM dump directory (/var/log by default). + + If option -S is specified and SA DB dump file name is provided, OpenSM + will try to restore SA database from this file. And if restore is + successful, OpenSM won't ask for client reregistration at subnet bring-up. + +* Modular routing for multicast + In conjunction was SA database dump/restore, there is the ability to + dump and load switch lid matrices (min hops tables) which are used + for multicast route calculation. + +* IB router enablement + OpenSM now supports router ports properly (in terms of PortInfo handling). + There is also some experimental support for IB routers which is enabled + via the ROUTER_EXP compile flag. This support includes SA PathRecord and + MCMemberRecord support for off subnet GIDs. + +* Socket support added to console + OpenSM console now supports remote in addition to local access. + Remote access is currently via telnet. + +1.2 Minor New Features: + +* Change output format of DR path from hex to decimal port numbers + +* Log rotation + The OpenSM log can now be rotated while OpenSM is running (without + stopping and restarting OpenSM). This is accomplished via SIGUSR1. + +* Support scope for IPoIB multicast groups in partition config + +* Dump filename changed from subnet.lst to osm-subnet.lst + Default temp directory for non Windows platforms was previously changed + from /tmp to /var/log. + +* Add option for force SDR link speed + Add option to opensm.opts to force link speed. Currently, only forcing + to SDR link speed is supported. This option is not supported as a + command line option. + +1.3 Library API Changes + + None + +1.4 Software Dependencies + +OpenSM depends on the installation of either OFED 1.2, OFED 1.1, +OFED 1.0, OpenIB gen2 (e.g. IBG2 distribution), OpenIB gen1 (e.g. IBGD +distribution), or Mellanox VAPI stacks. The qualified driver versions +are provided in Table 2, "Qualified IB Stacks". + +1.5 Supported Devices Firmware + +The main task of OpenSM is to initialize InfiniBand devices. The +qualified devices and their corresponding firmware versions +are listed in Table 3. + +2 Known Issues And Limitations +------------------------------ + +* No Service / Key associations: + There is no way to manage Service access by Keys. + +* No SM to SM SMDB synchronization: + Puts the burden of re-registering services, multicast groups, and + inform-info on the client application (or IB access layer core). + +* No "port down" event handling: + Changing the switch port through which OpenSM connects to the IB + fabric may cause incorrect operation. Please restart OpenSM whenever + such a connectivity change is made. + +* Changing connections during SM operation: + Under some conditions the SM can get confused by a change in + cabling (moving a cable from one switch port to the other) and + momentarily see this as having the same GUID appear connected + to two different IB ports. Under some conditions, when the SM fails to + get the corresponding change event it might mistakenly report this case + as a "duplicated GUID" case and abort. It is advisable to double-check + the syslog after each such change in connectivity and restart + OpenSM if it has exited. The same error ("duplicated GUID") will + also appear with a loopback plug. + +3 Unsupported IB Compliance Statements +-------------------------------------- +The following section lists all the IB compliance statements which +OpenSM does not support. Please refer to the IB specification for detailed +information regarding each compliance statement. + +* C14-22 (Authentication): + M_Key M_KeyProtectBits and M_KeyLeasePeriod shall be set in one + SubnSet method. As a work-around, an OpenSM option is provided for + defining the protect bits. + +* C14-67 (Authentication): + On SubnGet(SMInfo) and SubnSet(SMInfo) - if M_Key is not zero then + the SM shall generate a SubnGetResp if the M_Key matches, or + silently drop the packet if M_Key does not match. + +* C15-0.1.23.4 (Authentication): + InformInfoRecords shall always be provided with the QPN set to 0, + except for the case of a trusted request, in which case the actual + subscriber QPN shall be returned. + +* o13-17.1.2 (Event-FWD): + If no permission to forward, the subscription should be removed and + no further forwarding should occur. + +* C14-24.1.1.5 and C14-62.1.1.22 (Initialization): + GUIDInfo - SM should enable assigning Port GUIDInfo. + +* C14-44 (Initialization): + If the SM discovers that it is missing an M_Key to update CA/RT/SW, + it should notify the higher level. + +* C14-62.1.1.12 (Initialization): + PortInfo:M_Key - Set the M_Key to a node based random value. + +* C14-62.1.1.13 (Initialization): + PortInfo:P_KeyProtectBits - set according to an optional policy. + +* C14-62.1.1.24 (Initialization): + SwitchInfo:DefaultPort - should be configured for random FDB. + +* C14-62.1.1.32 (Initialization): + RandomForwardingTable should be configured. + +* o15-0.1.12 (Multicast): + If the JoinState is SendOnlyNonMember = 1 (only), then the endport + should join as sender only. + +* o15-0.1.8 (Multicast): + If a request for creating an MCG with fields that cannot be met, + return ERR_REQ_INVALID (currently ignores SL and FlowLabelTClass). + +* C15-0.1.8.6 (SA-Query): + Respond to SubnAdmGetTraceTable - this is an optional attribute. + +* C15-0.1.13 Services: + Reject ServiceRecord create, modify or delete if the given + ServiceP_Key does not match the one included in the ServiceGID port + and the port that sent the request. + +* C15-0.1.14 (Services): + Provide means to associate service name and ServiceKeys. + +4 Major Bug Fixes +----------------- + +The following is a list of bugs that were fixed. Note that other less critical +or visible bugs were also fixed. + +* osm_sminfo_rcv.c: Add SMInfo self query check. OpenSM can query + itself for SMInfo occassionally due to port moving during subnet + discovery process. Don't create remote SM entry in this case to + prevent deadlocks. + +* osm_ucast_updn.c: Two similar bugs in up/down routing fixed. + 8-bit integers were used as indexes when scanning subnet, which + in one case caused OpenSM to crash when ranking "path" is longer + than 256 switches, and in the other case, caused OpenSM to go into + an infinite loop when fabric has more than 256 roots. + +* osm_sm_state_mgr.c: In __osm_sm_state_mgr_send_master_sm_info_req, + handle master GUID port not found properly + +* osm_sa_multipath_record.c: In __osm_mpr_rcv_get_path_parms, return + IB_NOT_FOUND rather than IB_ERROR when can't route to LID from switch + +* osm_sa_path_record.c: In __osm_pr_rcv_get_path_parms, return IB_NOT_FOUND + rather than IB_ERROR when can't route to LID from switch + +* osm_vendor_ibumad.c: In osm_vendor_set_sm, set issmfd to + -1 on open error + +* osm_vendor_ibumad: Termination crash fix + When OpenSM is terminated umad_receiver thread still running even after + the structures are destroyed and freed, this causes to random (but easily + reproducible) crashes. The reason is that osm_vendor_delete() does not + care about thread termination. This patch adds the receiver thread + cancellation (by using pthread_cancel() and pthread_join()) and cares to + keep have all mutexes unlocked upon termination. There is also minor + termination code consolidation - osm_vendor_port_close() function. + +* osm_port_profile.h: Fix reinsertion issue in osm_port_prof_set_ignored_port + +* osm_matrix.h: Fix segfault with up/down and root nodes file + +* osm_sa_path_record.c: In osm_pr_rcv_process, fix endian of hop_limit + +* osm_vendor_ibumad.c: Close umad port in osm_vendor_delete + +* osm_sa_(multipath path)_record.c: Fix MultiPathRecord/PathRecord issues + with using MTU/rate/PktLife explicitly ignoring selectors + + OpenSM just uses the resulting path MTU/rate/pkt-life and fail the + query even though the selector might be allowing for selecting an + appropriate value. + + After this fix, the following results are obtained for a case of + path allowing maximal 2K MTU. + +In standard mode: +------------------------------------------------------------ +MTU greater than ... 256 (0x01) -> equal to ....... 2K +MTU less than ...... 256 (0x41) -> NO PATHS +MTU equal to ....... 256 (0x81) -> equal to ....... 256 +MTU largest possible 256 (0xc1) -> equal to ....... 2K +MTU greater than ... 512 (0x02) -> equal to ....... 2K +MTU less than ...... 512 (0x42) -> equal to ....... 256 +MTU equal to ....... 512 (0x82) -> equal to ....... 512 +MTU largest possible 512 (0xc2) -> equal to ....... 2K +MTU greater than ... 1K (0x03) -> equal to ....... 2K +MTU less than ...... 1K (0x43) -> equal to ....... 512 +MTU equal to ....... 1K (0x83) -> equal to ....... 1K +MTU largest possible 1K (0xc3) -> equal to ....... 2K +MTU greater than ... 2K (0x04) -> NO PATHS +MTU less than ...... 2K (0x44) -> equal to ....... 1K +MTU equal to ....... 2K (0x84) -> equal to ....... 2K +MTU largest possible 2K (0xc4) -> equal to ....... 2K +MTU greater than ... 4K (0x05) -> NO PATHS +MTU less than ...... 4K (0x45) -> equal to ....... 2K +MTU equal to ....... 4K (0x85) -> NO PATHS +MTU largest possible 4K (0xc5) -> equal to ....... 2K +============================================================ + +With enable_quirks (when one of the ends is a Tavor device): +------------------------------------------------------------ +MTU greater than ... 256 (0x01) -> equal to ....... 1K +MTU less than ...... 256 (0x41) -> NO PATHS +MTU equal to ....... 256 (0x81) -> equal to ....... 256 +MTU largest possible 256 (0xc1) -> equal to ....... 2K +MTU greater than ... 512 (0x02) -> equal to ....... 1K +MTU less than ...... 512 (0x42) -> equal to ....... 256 +MTU equal to ....... 512 (0x82) -> equal to ....... 512 +MTU largest possible 512 (0xc2) -> equal to ....... 2K +MTU greater than ... 1K (0x03) -> NO PATHS +MTU less than ...... 1K (0x43) -> equal to ....... 512 +MTU equal to ....... 1K (0x83) -> equal to ....... 1K +MTU largest possible 1K (0xc3) -> equal to ....... 2K +MTU greater than ... 2K (0x04) -> NO PATHS +MTU less than ...... 2K (0x44) -> equal to ....... 1K +MTU equal to ....... 2K (0x84) -> equal to ....... 2K +MTU largest possible 2K (0xc4) -> equal to ....... 2K +MTU greater than ... 4K (0x05) -> NO PATHS +MTU less than ...... 4K (0x45) -> equal to ....... 1K +MTU equal to ....... 4K (0x85) -> NO PATHS +MTU largest possible 4K (0xc5) -> equal to ....... 2K +============================================================ + +* osm_pkey_rcv.c: rwlock double release fix + When the port is removed from subnet, but previously requested pkey + table block is received after this - the lock will be released twice. + This leads to deadlocks later when other MAD processor will try to + acquire the same lock. + +* osm_sa_informinfo.c: Fix InformInfoRecord searches + +* Better SA MCMemberRecord leave locking + Hold locked multicast group leave request (MCMember Record) processing. + This prevents kind of race with multicast group join request where + those requests can be reordered during processing. + +* osm_sa_informinfo.c: Conformance changes for subscribe component + +* osm_sa_path_record.c: Handle LID 0 as error + +* Fix comparing InformInfo records + 1. The received InformInfo struct was modified before dumping it. + 2. The function that compares InformInfo structures was just + comparing the whole memory allocated for it, including reserved + fields. Fixed to compare more selectively. + + As for QPN, from the IB spec, table 119 InformInfo: + QPN : Ignored except when subscribe=0 (an unsubscribe + request). Queue pair to which Report()s were sent as + a result of a corresponding subscription. If no + subscription for this Report() with this QPN exists, + the request to unsubscribe performs no action and + produces GetResp() with status indicating an invalid + field value. + +* osm_trap_rcv.c: Reduce repeated trap messages so log doesn't fill + so quickly + +* osm_helper.c: Fix stack smashing detected problem in osm_dump_service_record + +* Fix permission on db files directory + When creating directory for db files (guid2lid) storing create it with + reasonable permissions (current 777 decimal = octal 01411) and don't do + it world writable. + +* Fix node_desc.description as string usages + +5 Main Verification Flows +------------------------- + +OpenSM verification is run using the following activities: +* osmtest - a stand-alone program +* ibmgtsim (IB management simulator) based - a set of flows that + simulate clusters, inject errors and verify OpenSM capability to + respond and bring up the network correctly. +* small cluster regression testing - where the SM is used on back to + back or single switch configurations. The regression includes + multiple OpenSM dedicated tests. +* cluster testing - when we run OpenSM to setup a large cluster, perform + hand-off, reboots and reconnects, verify routing correctness and SA + responsiveness at the ULP level (IPoIB and SDP). + +5.1 osmtest + +osmtest is an automated verification tool used for OpenSM +testing. Its verification flows are described by list below. + +* Inventory File: Obtain and verify all port info, node info, link and path + records parameters. + +* Service Record: + - Register new service + - Register another service (with a lease period) + - Register another service (with service p_key set to zero) + - Get all services by name + - Delete the first service + - Delete the third service + - Added bad flows of get/delete non valid service + - Add / Get same service with different data + - Add / Get / Delete by different component mask values (services + by Name & Key / Name & Data / Name & Id / Id only ) + +* Multicast Member Record: + - Query of existing Groups (IPoIB) + - BAD Join with insufficient comp mask (o15.0.1.3) + - Create given MGID=0 (o15.0.1.4) + - Create given MGID=0xFF12A01C,FE800000,00000000,12345678 (o15.0.1.4) + - Create BAD MGID=0xFA. (o15.0.1.6) + - Create BAD MGID=0xFF12A01B w/ link-local not set (o15.0.1.6) + - New MGID with invalid join state (o15.0.1.9) + - Retry of existing MGID - See JoinState update (o15.0.1.11) + - BAD RATE when connecting to existing MGID (o15.0.1.13) + - Partial JoinState delete request - removing FullMember (o15.0.1.14) + - Full Delete of a group (o15.0.1.14) + - Verify Delete by trying to Join deleted group (o15.0.1.14) + - BAD Delete of IPoIB membership (no prev join) (o15.0.1.15) + +* GUIDInfo Record: + - All GUIDInfoRecords in subnet are obtained + +* MultiPathRecord: + - Perform some compliant and noncompliant MultiPathRecord requests + - Validation is via status in responses and IB analyzer + +* PKeyTableRecord: + - Perform some compliant and noncompliant PKeyTableRecord queries + - Validation is via status in responses and IB analyzer + +* LinearForwardingTableRecord: + - Perform some compliant and noncompliant LinearForwardingTableRecord queries + - Validation is via status in responses and IB analyzer + +* Event Forwarding: Register for trap forwarding using reports + - Send a trap and wait for report + - Unregister non-existing + +* Trap 64/65 Flow: Register to Trap 64-65, create traps (by + disconnecting/connecting ports) and wait for report, then unregister. + +* Stress Test: send PortInfoRecord queries, both single and RMPP and + check for the rate of responses as well as their validity. + + +5.2 IB Management Simulator OpenSM Test Flows: + +The simulator provides ability to simulate the SM handling of virtual +topologies that are not limited to actual lab equipment availability. +OpenSM was simulated to bring up clusters of up to 10,000 nodes. Daily +regressions use smaller (16 and 128 nodes clusters). + +The following test flows are run on the IB management simulator: + +* Stability: + Up to 12 links from the fabric are randomly selected to drop packets + at drop rates up to 90%. The SM is required to succeed in bringing the + fabric up. The resulting routing is verified to be correct as well. + +* LID Manager: + Using LMC = 2 the fabric is initialized with LIDs. Faults such as + zero LID, Duplicated LID, non-aligned (to LMC) LIDs are + randomly assigned to various nodes and other errors are randomly + output to the guid2lid cache file. The SM sweep is run 5 times and + after each iteration a complete verification is made to ensure that all + LIDs that could possibly be maintained are kept, as well as that all nodes + were assigned a legal LID range. + +* Multicast Routing: + Nodes randomly join the 0xc000 group and eventually the + resulting routing is verified for completeness and adherence to + Up/Down routing rules. + +* osmtest: + The complete osmtest flow as described in the previous table is run on + the simulated fabrics. + +* Stress Test: + This flow merges fabric, LID and stability issues with continuous + PathRecord, ServiceRecord and Multicast Join/Leave activity to + stress the SM/SA during continuous sweeps. InformInfo Set/Delete/Get + were added to the test such both existing and non existing nodes + perform them in random order. + +5.3 OpenSM Regression + +Using a back-to-back or single switch connection, the following set of +tests is run nightly on the stacks described in table 2. The included +tests are: + +* Stress Testing: Flood the SA with queries from multiple channel + adapters to check the robustness of the entire stack up to the SA. + +* Dynamic Changes: Dynamic Topology changes, through randomly + dropping SMP packets, used to test OpenSM adaptation to an unstable + network & verify DB correctness. + +* Trap Injection: This flow injects traps to the SM and verifies that it + handles them gracefully. + +* SA Query Test: This test exhaustively checks the SA responses to all + possible single component mask. To do that the test examines the + entire set of records the SA can provide, classifies them by their + field values and then selects every field (using component mask and a + value) and verifies that the response matches the expected set of records. + A random selection using multiple component mask bits is also performed. + +5.4 Cluster testing: + +Cluster testing is usually run before a distribution release. It +involves real hardware setups of 16 to 32 nodes (or more if a beta site +is available). Each test is validated by running all-to-all ping through the IB +interface. The test procedure includes: + +* Cluster bringup + +* Hand-off between 2 or 3 SM's while performing: + - Node reboots + - Switch power cycles (disconnecting the SM's) + +* Unresponsive port detection and recovery + +* osmtest from multiple nodes + +* Trap injection and recovery + + +6 Qualification +---------------- + +Table 2 - Qualified IB Stacks +============================= + +Stack | Version +-----------------------------------------|-------------------------- +OFED | 1.2 +OFED | 1.1 +OFED | 1.0 +OpenIB Gen2 (IBG2 distribution) | 1.0 +OpenIB Gen1 (IBGD distribution) | 1.8.0 +VAPI (Mellanox InfiniBand HCA Driver) | 3.2 and later + +Table 3 - Qualified Devices and Corresponding Firmware +====================================================== + +Mellanox +Device | FW versions +--------|----------------------------------------------------------- +MT43132 | InfiniScale - fw-43132 5.2.0 (and later) +MT47396 | InfiniScale III - fw-47396 0.5.0 (and later) +MT23108 | InfiniHost - fw-23108 3.3.2 (and later) +MT25204 | InfiniHost III Lx - fw-25204 1.0.1i (and later) +MT25208 | InfiniHost III Ex (InfiniHost Mode) - fw-25208 4.6.2 (and later) +MT25208 | InfiniHost III Ex (MemFree Mode) - fw-25218 5.0.1 (and later) + +QLogic/PathScale +Device | Note +--------|----------------------------------------------------------- +iPath | QHT6040 (PathScale InfiniPath HT-460) +iPath | QHT6140 (PathScale InfiniPath HT-465) +iPath | QLE6140 (PathScale InfiniPath PE-880) + +Note: OpenSM does not run on an IBM Galaxy (eHCA) as it does not expose +QP0 and QP1. However, it does support it as a device on the subnet. + diff --git a/doc/partition-config.txt b/doc/partition-config.txt new file mode 100644 index 0000000..8f3a368 --- /dev/null +++ b/doc/partition-config.txt @@ -0,0 +1,186 @@ +OpenSM Partition configuration +=============================== + +The default name of OpenSM partitions configuration file is +'$(OPENSM_CONFIG_DIR)/partitions.conf' (where $(OPENSM_CONFIG_DIR) is set at +configure time and defaults to ${sysconfdir}/opensm). The default may be +changed at run time by using the --Pconfig (-P) option with OpenSM. + +The default partition will be created by OpenSM unconditionally even +when partition configuration file does not exist or cannot be accessed. + +The default partition has P_Key value 0x7fff. OpenSM's port will always +have full membership in default partition. All other end ports will have +full membership if the partition configuration file is not found or cannot +be accessed, or limited membership if the file exists and can be accessed +but there is no rule for the Default partition. + +Effectively, this amounts to the same as if one of the following rules +below appear in the partition configuration file: +In the case of no rule for the Default partition: +Default=0x7fff : ALL=limited, SELF=full ; +In the case of no partition configuration file or file cannot be accessed: +Default=0x7fff : ALL=full ; + + +File Format +=========== + +Comments: + +Line content followed after \'#\' character is comment and ignored by +parser. + +General file format: + +:[]; + + Partition Definition: + [PartitionName][=PKey][,indx0][,ipoib_bc_flags][,defmember=full|limited|both] + + PartitionName - string, will be used with logging. When + omitted, empty string will be used. + PKey - P_Key value for this partition. Only low 15 + bits will be used. When omitted will be + autogenerated. + indx0 - indicates that this pkey should be inserted in + block 0 index 0. + ipoib_bc_flags - used to indicate/specify IPoIB capability of + this partition. + + defmember=full|limited|both - specifies default membership for + port guid list. Default is limited. + + ipoib_bc_flags: + ipoib_flag|[mgroup_flag]* + + ipoib_flag: + ipoib - indicates that this partition may be used for + IPoIB, as a result the IPoIB broadcast group will + be created with the mgroup_flag flags given, + if any. + + Partition Properties: + [|]* | + + Port list: + [,] + + Port Specifier: + [=[full|limited|both]] + + PortGUID - GUID of partition member EndPort. + Hexadecimal numbers should start from + 0x, decimal numbers are accepted too. + full or - indicates full and/or limited membership for + limited or this port. When omitted (or unrecognized) + both limited membership is assumed. Both + indicates both full and limited membership + for this port. + + MCast Group: + mgid=gid[,mgroup_flag]* + + - gid specified is verified to be a Multicast + address. IP groups are verified to match + the rate and mtu of the broadcast group. + The P_Key bits of the mgid for IP groups are + verified to either match the P_Key specified + in by "Partition Definition" or if they are + 0x0000 the P_Key will be copied into those + bits. + + mgroup_flag: + rate= - specifies rate for this MC group + (default is 3 (10GBps)) + mtu= - specifies MTU for this MC group + (default is 4 (2048)) + sl= - specifies SL for this MC group + (default is 0) + scope= - specifies scope for this MC group + (default is 2 (link local)). Multiple scope + settings are permitted for a partition. + NOTE: This overwrites the scope nibble of the + specified mgid. Furthermore specifying + multiple scope settings will result in + multiple MC groups being created. + Q_Key= - specifies the Q_Key for this MC group + (default: 0x0b1b for IP groups, 0 for other + groups) + WARNING: changing this for the broadcast + group may break IPoIB on client + nodes!!! + TClass= - specifies tclass for this MC group + (default is 0) + FlowLabel= - specifies FlowLabel for this MC group + (default is 0) + NOTE: All mgroup_flag flags MUST be separated by comma (,). + +Note that values for rate, mtu, and scope, for both partitions and multicast +groups, should be specified as defined in the IBTA specification (for example, +mtu=4 for 2048). + +There are several useful keywords for PortGUID definition: + + - 'ALL' means all end ports in this subnet. + - 'ALL_CAS' means all Channel Adapter end ports in this subnet. + - 'ALL_SWITCHES' means all Switch end ports in this subnet. + - 'ALL_ROUTERS' means all Router end ports in this subnet. + - 'SELF' means subnet manager's port. + +Empty list means no ports in this partition. + + + +Notes: +----- + +White space is permitted between delimiters ('=', ',',':',';'). + +PartitionName does not need to be unique, PKey does need to be unique. +If PKey is repeated then those partition configurations will be merged +and first PartitionName will be used (see also next note). + +It is possible to split partition configuration in more than one +definition, but then PKey should be explicitly specified (otherwise +different PKey values will be generated for those definitions). + + + +Examples: +-------- + + Default=0x7fff : ALL, SELF=full ; + Default=0x7fff : ALL, ALL_SWITCHES=full, SELF=full ; + + NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; + + YetAnotherOne = 0x300 : SELF=full ; + YetAnotherOne = 0x300 : ALL=limited ; + + ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; + # 0x123453, 0x123454 will be limited + ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; + # 0x123456, 0x123457 will be limited + ShareIO = 0x80 , defmember=limited : 0x123456, 0x123457, 0x123458=full; + ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; + ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; + + # multicast groups added to default + Default=0x7fff,ipoib: + mgid=ff12:401b::0707,sl=1 # random IPv4 group + mgid=ff12:601b::16 # MLDv2-capable routers + mgid=ff12:401b::16 # IGMP + mgid=ff12:601b::2 # All routers + mgid=ff12::1,sl=1,Q_Key=0xDEADBEEF,rate=3,mtu=2 # random group + ALL=full; + + +Note: +---- + +The following rule is equivalent to how OpenSM used to run prior to the +partition manager: + +Default=0x7fff,ipoib:ALL=full; + diff --git a/doc/perf-manager-arch.txt b/doc/perf-manager-arch.txt new file mode 100644 index 0000000..3a078ab --- /dev/null +++ b/doc/perf-manager-arch.txt @@ -0,0 +1,181 @@ +Performance Manager +2/12/07 + +This document will describe an architecture and a phased plan +for an OpenFabrics OpenIB performance manager. + +Currently, there is no open source performance manager, only +a perfquery diagnostic tool which some have scripted into a +"poor man's" performance manager. + +The primary responsibilities of the performance manager are to: +1. Monitor subnet topology +2. Based on subnet topology, monitor performance and error counters. + Also, possibly monitor counters related to congestion. +3. Perform data reduction (various calculations (rates, histograms, etc.)) + on counters obtained +4. Log performance data and indicate "interesting" related events + + +Performance Manager Components +1. Determine subnet topology + Performance manager can determine the subnet topology by subscribing + for GID in and out of service events. Upon receipt of a GID in service + event, use GID to query SA for corresponding LID by using SubnAdmGet + NodeRecord with PortGUID specified. It would utilize the LID and NumPorts + returned and add this to the monitoring list. Note that the monitoring + list can be extended to be distributed with the manager "balancing" the + assignments of new GIDs to the set of known monitors. For GID out of + service events, the GID is removed from the monitoring list. + +2. Monitoring + Counters to be monitored include performance counters (data octets and + packets both receive and transmit) and error counters. These are all in + the mandatory PortCounters attribute. Future support will include the + optional 64 bit counters, PortExtendedCounters (as this is only known + to be supported on one IB device currently). Also, one congestion + counter (PortXmitWait) will also be monitored (on switch ports) initially. + + Polling rather than sampling will be used as the monitoring technique. The + polling rate configurable from 1-65535 seconds (default TBD) + Note that with 32 bit counters, on 4x SDR links, byte counts can max out in + 16 seconds and on 4x DDR links in 8 seconds. The polling rate needs to + deal with this as accurate byte and packet rates are desired. Since IB + counters are sticky, the counters need to be reset when they get "close" + to max'ing out. This will result in some inaccuracy. When counters are + reset, the time of the reset will be tracked in the monitor and will be + queryable. Note that when the 64 bit counters are supported more generally, + the polling rate can be reduced. + + The performance manager will support parallel queries. The level of + parallelism is configurable with a default of 64 queries outstanding + at one time. + + Configuration and dynamic adjustment of any performance manager "knobs" + will be supported. + + Also, there will be a console interface to obtain performance data. + It will be able to reset counters, report on specific nodes or + node types of interest (CAs only, switches only, all, ...). The + specifics are TBD. + +3. Data Reduction + For errors, rate rather than raw value will be calculated. Error + event is only indicated when rate exceeds a threshold. + For packet and byte counters, small changes will be aggregated + and only significant changes are updated. + Aggregated histograms (per node, all nodes (this is TBD))) for each + counter will be provided. Actual counters will also be written to files. + NodeGUID will be used to identify node. File formats are TBD. One + format to be supported might be CSV. + +4. Logging + "Interesting" events determined by the performance manager will be + logged as well as the performance data itself. Significant events + will be logged to syslog. There are some interesting scalability + issues relative to logging especially for the distributed model. + + Events will be based on rates which are configured as thresholds. + There will be configurable thresholds for the error counters with + reasonable defaults. Correlation of PerfManager and SM events is + interesting but not a mandatory requirement. + + +Performance Manager Scalability +Clearly as the polling rate goes up, the number of nodes which can be +monitored from a single performance management node decreases. There is +some evidence that a single dedicated management node may not be able to +monitor the largest clusters at a rapid rate. + +There are numerous PerfManager models which can be supported: +1. Integrated as thread(s) with OpenSM (run only when SM is master) +2. Standby SM +3. Standalone PerfManager (not running with master or standby SM) +4. Distributed PerfManager (most scalable approach) + +Note that these models are in order of implementation complexity and +hence "schedule". + +The simplest model is to run the PerfManager with the master SM. This has +the least scalability but is the simplest model. Note that in this model +the topology can be obtained without the GID in and out of service events +but this is needed for any of the other models to be supported. + +The next model is to run the PerfManager with a standby SM. Standbys are not +doing much currently (polling the master) so there is much idle CPU. +The downside of this approach is that if the standby takes over as master, +the PerfManager would need to be moved (or is becomes model 1). + +A totally separate standlone PerfManager would allow for a deployment +model which eliminates the downside of model 2 (standby SM). It could +still be built in a similar manner with model 2 with unneeded functions +(SM and SA) not included. The advantage of this model is that it could +be more readily usable with a vendor specific SM (switch based or otherwise). +Vendor specific SMs usually come with a built-in performance manager and +this assumes that there would be a way to disable that performance manager. +Model 2 can act like model 3 if a disable SM feature is supported in OpenSM +(command line/console). This will take the SM to not active. + +The most scalable model is a distributed PerfManager. One approach to +distribution is a hierarchial model where there is a PerfManager at the +top level with a number of PerfMonitors which are responsible for some +portion of the subnet. + +The separation of PerfManager from OpenSM brings up the following additional +issues: +1. What communication is needed between OpenSM and the PerfManager ? +2. Integration of interesting events with OpenSM log +(Does performance manager assume OpenSM ? Does it need to work with vendor +SMs ?) + +Hierarchial distribution brings up some additional issues: +1. How is the hierarchy determined ? +2. How do the PerfManager and PerfMonitors find each other ? +3. How is the subnet divided amongst the PerfMonitors +4. Communication amongst the PerfManager and the PerfMonitors +(including communication failures) + +In terms of inter manager communication, there seem to be several +choices: +1. Use vendor specific MADs (which can be RMPP'd) and build on top of +this +2. Use RC QP communication and build on top of this +3. Use IPoIB which is much more powerful as sockets can then be utilized + +RC QP communication improves on the lower performance of the vendor +specific MAD approach but is not as powerful as the socket based approach. + +The only downside of IPoIB is that it requires multicast to be functioning. +It seems reasonable to require IPoIB across the management nodes. This +can either be a separate IPoIB subnet or a shared one with other endnodes +on the subnet. (If this communication is built on top of sockets, it +can be any IP subnet amongst the manager nodes). + +The first implementation phase will address models 1-3. Model 3 is optional +as it is similar to models 1 and 2 and may be not be needed. + +Model 4 will be addressed in a subsequent implementation phase (and a future +version of this document). Model 4 can be built on the basis of models 1 and +2 where some SM, not necessarily master, is the PerfManager and the rest are +PerfMonitors. + + +Performance Manager Partition Membership +Note that as the performance manager needs to talk via GSI to the PMAs +in all the end nodes and GSI utilizes PKey sharing, partition membership +if invoked must account for this. + +The most straightforward deployment of the performance manager is +to have it be a member of the full default partition (P_Key 0xFFFF). + + +Performance Manager Redundancy +TBD (future version of this document) + + +Congestion Management +TBD (future version of this document) + + +QoS Management +TBD (future version of this document) diff --git a/doc/performance-manager-HOWTO.txt b/doc/performance-manager-HOWTO.txt new file mode 100644 index 0000000..d63d830 --- /dev/null +++ b/doc/performance-manager-HOWTO.txt @@ -0,0 +1,161 @@ +OpenSM Performance manager HOWTO +================================ + +Introduction +============ + +OpenSM now includes a performance manager which collects port counters from +the subnet and stores them internally in OpenSM. + +Some of the features of the performance manager are: + + 1) Collect port data and error counters per v1.2.1 spec and store in + 64 bit internal counts. + 2) Automatic reset of counters when they reach approximately 3/4 full. + (While not guaranteeing that counts will not be missed, this does + keep counts incrementing as best as possible given the current + spec limitations.) + 3) Basic warnings in the OpenSM log on "critical" errors like symbol + errors. + 4) Automatically detects "outside" resets of counters and adjusts to + continue collecting data. + 5) Can be run when OpenSM is in standby or inactive states in + addition to master state. + +Known issues are: + + 1) Data counters will be lost on high data rate links. Sweeping the + fabric fast enough for even a DDR link is not practical. + 2) Default partition support only. + + +Setup and Usage +=============== + +Using the Performance Manager consists of 3 steps: + + 1) compiling in support for the perfmgr (Optionally: the console + socket as well) + 2) enabling the perfmgr and console in opensm.conf + 3) retrieving data which has been collected. + 3a) using console to "dump data" + 3b) using a plugin module to store the data to your own + "database" + +Step 1: Compile in support for the Performance Manager +------------------------------------------------------ + +At this time, it is really best to enable the console socket option as well. +OpenSM can be run in an "interactive" mode. But with the console socket +option turned on one can also make a connection to a running OpenSM. By +default, only "loopback" is enabled with the console with socket being a +compile time option. Regardless, please be aware of your network security +configuration for as the commands presented in the console can affect the +operation of your subnet. + + +Step 2: Enable the perfmgr and console in opensm.conf +----------------------------------------------------- + +Turning the Performance Manager on is pretty easy, set the following options in +the opensm.conf config file. (Default location is +/usr/local/etc/opensm/opensm.conf) + + # Turn it all on + perfmgr TRUE + + # redirection enable + perfmgr_redir TRUE + + # sweep time in seconds + perfmgr_sweep_time_s 180 + + # Max outstanding queries + perfmgr_max_outstanding_queries 500 + + # Ignore CAs on sweep + perfmgr_ignore_cas FALSE + + # Remove missing nodes from DB + perfmgr_rm_nodes TRUE + + # Log error counters to opensm.log + perfmgr_log_errors TRUE + + # Query PerfMgt Get(ClassPortInfo) for extended capabilities + # Extended capabilities include 64 bit extended counters + # and transmit wait support + perfmgr_query_cpi TRUE + + # Log xmit_wait errors + perfmgr_xmit_wait_log FALSE + + # If logging xmit_wait's; set threshold + perfmgr_xmit_wait_threshold 65535 + + # Dump file to dump the events to + event_db_dump_file /var/log/opensm_port_counters.log + +Also, enable the console socket and configure the port for it to listen to if +desired. + + # console [off|local|loopback|socket] + console socket + + # Telnet port for console (default 10000) + console_port 10000 + + "local" is only useful if you run OpenSM in the foreground. + + +Step 3: Retrieve data which has been collected +---------------------------------------------- + +Step 3a: Using console dump function +------------------------------------ + +The console command "perfmgr dump_counters" will dump counters to the file +specified in the opensm.conf file. In the example above +"/var/log/opensm_port_counters.log" + +Example output is below: + + +"SW1 wopr ISR9024D (MLX4 FW)" 0x8f10400411f56 port 1 (Since Mon May 12 13:27:14 2008) + symbol_err_cnt : 0 + link_err_recover : 0 + link_downed : 0 + rcv_err : 0 + rcv_rem_phys_err : 0 + rcv_switch_relay_err : 2 + xmit_discards : 0 + xmit_constraint_err : 0 + rcv_constraint_err : 0 + link_integrity_err : 0 + buf_overrun_err : 0 + vl15_dropped : 0 + xmit_data : 470435 + rcv_data : 405956 + xmit_pkts : 8954 + rcv_pkts : 6900 + unicast_xmit_pkts : 0 + unicast_rcv_pkts : 0 + multicast_xmit_pkts : 0 + multicast_rcv_pkts : 0 + + + +Step 3b: Using a plugin module +------------------------------ + +If you want a more automated method of retrieving the data OpenSM provides a +plugin interface to extend OpenSM. The header file is osm_event_plugin.h. +The functions you register with this interface will be called when data is +collected. You can then use that data as appropriate. + +An example plugin can be configured at compile time using the +"--enable-default-event-plugin" option on the configure line. This plugin is +very simple. It logs "events" received from the performance manager to a log +file. I don't recommend using this directly but rather use it as a template to +create your own plugin. + diff --git a/doc/qos-config.txt b/doc/qos-config.txt new file mode 100644 index 0000000..ac7312f --- /dev/null +++ b/doc/qos-config.txt @@ -0,0 +1,44 @@ +Trivial low level QoS configuration proposition +=============================================== + +Basically there is a set of QoS related low-level configuration parameters. +All these parameter names are prefixed by "qos_" string. Here is a full +list of these parameters: + + qos_max_vls - The maximum number of VLs that will be on the subnet + qos_high_limit - The limit of High Priority component of VL Arbitration + table (IBA 7.6.9) + qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) template + qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) template + Both VL arbitration templates are pairs of VL and weight + qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is a list + of VLs corresponding to SLs 0-15 (Note the VL15 used + here means drop this SL) + +Typical default values (hard-coded in OpenSM initialization) are: + + qos_max_vls 15 + qos_high_limit 0 + qos_vlarb_low 0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 + qos_vlarb_high 0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 + qos_sl2vl 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + +The syntax is compatible with rest of OpenSM configuration options and +values may be stored in OpenSM config file (cached options file). + +In addition to the above, we may define separate QoS configuration +parameters sets for various target types. As targets, we currently support +CAs, routers, switch external ports, and switch's enhanced port 0. The +names of such specialized parameters are prefixed by "qos__" +string. Here is a full list of the currently supported sets: + + qos_ca_ - QoS configuration parameters set for CAs. + qos_rtr_ - parameters set for routers. + qos_sw0_ - parameters set for switches' port 0. + qos_swe_ - parameters set for switches' external ports. + +Examples: + + qos_sw0_max_vls 2 + qos_ca_sl2vl 0,1,2,3,5,5,5,12,12,0, + qos_swe_high_limit 0 diff --git a/doc/todo b/doc/todo new file mode 100644 index 0000000..4bf7117 --- /dev/null +++ b/doc/todo @@ -0,0 +1,27 @@ +# OSM List of todo, open issues, and futures: + +1 041228 - Handle local events (local lid change, port state change, etc.) +2 041228 - SM port fail over to next port upon request ? +3 050912 - Handle busy status in SA client API/implementation +4 050912 - Handle o15-0.1.13 (SA ServiceRecord) as well as updates + to osmtest for this +5 051207 - Client reregistration is indicated before SA is + ready to accept subscriptions +6 060109 - Use LID routing for light sweep to guarantee trap + delivery path to the SM +7 061201 - Finer grained locking ? +8 061201 - Mapping multiple MGIDs on single MLID when characteristics + match (PKey, etc.) +9 070329 - Add ssh support into remote socket/console support +10 070329 - Add authentication for (at least remote) console +11 070413 - Add dynamic rate adjustment for multicast groups + + +Futures + +LID partitioning ? +Advanced failover +Upper layer management +Regression tests and automation +Additional pathing algorithms + diff --git a/gen_chlog.sh b/gen_chlog.sh new file mode 100755 index 0000000..b28d1de --- /dev/null +++ b/gen_chlog.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +usage() +{ + echo "Usage: $0 [--spec]" + exit 2 +} + +if [ "$1" = "--spec" ] ; then + spec_format=1 +fi + +GIT_DIR=`git rev-parse --git-dir 2>/dev/null` + +test -z "$GIT_DIR" && usage + + +export GIT_DIR +export GIT_PAGER="" +export PAGER="" + + +mkchlog() +{ + format=$1 + + prev_tag="" + + for tag in `git tag -l '*' | sort -V` ; do + obj=`git describe $tag` + base=`git merge-base $obj HEAD` + if [ -z "$base" -o "$base" != $obj ] ; then + continue + fi + all_vers="$prev_tag$tag $all_vers" + prev_tag=$tag.. + done + + if [ -z "$prev_tag" ] ; then + all_vers=HEAD + else + all_vers="${prev_tag}HEAD $all_vers" + fi + + for ver in $all_vers ; do + log_out=`git log $ver -- ./` + if [ -z "$log_out" ] ; then + continue + fi + ver_name=`echo $ver | sed -e 's/^.*\.\.//'` + echo "" + echo "** Version: $ver_name" + echo "" + git log --no-merges "${format}" $ver -- ./ + prev_t=$tag.. + done +} + + +if [ -z "$spec_format" ] ; then + mkchlog --pretty=format:"%ad %an%n%H%n%n* %s%n" \ + | sed -e 's/^\* /\t* /' +else + echo "%changelog" + mkchlog --pretty=format:"- %ad %an: %s" + echo "" +fi diff --git a/gen_ver.sh b/gen_ver.sh new file mode 100755 index 0000000..db870a2 --- /dev/null +++ b/gen_ver.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +# This generates a version string which includes recent version as +# specified in correspondent sub project's configure.ac file, plus +# git revision abbreviation in the case if sub-project HEAD is different +# from recent tag, plus "-dirty" suffix if local uncommitted changes are +# in the sub project tree. +# + +usage() +{ + echo "Usage: $0" + exit 2 +} + +cd `dirname $0` + +packege=`basename \`pwd\`` +conf_file=configure.ac +version=`cat $conf_file | sed -ne '/AC_INIT.*.*/s/^AC_INIT.*, \(.*\),.*$/\1/p'` + +git diff --quiet $packege-$version..HEAD -- ./ > /dev/null 2>&1 +if [ $? -eq 1 ] ; then + abbr=`git rev-parse --short --verify HEAD 2>/dev/null` + if [ ! -z "$abbr" ] ; then + version="${version}_${abbr}" + fi +fi + +git diff-index --quiet HEAD -- ./> /dev/null 2>&1 +if [ $? -eq 1 ] ; then + version="${version}_dirty" +fi + +echo $version diff --git a/include/Makefile.am b/include/Makefile.am new file mode 100644 index 0000000..1df1abc --- /dev/null +++ b/include/Makefile.am @@ -0,0 +1,33 @@ + +SUBDIRS = . + +nobase_pkginclude_HEADERS = iba/ib_types.h iba/ib_cm_types.h + +EXTRA_DIST = \ + $(srcdir)/iba/ib_types.h \ + $(srcdir)/iba/ib_cm_types.h \ + $(srcdir)/vendor/osm_vendor_mlx_transport_anafa.h \ + $(srcdir)/vendor/osm_vendor_mlx.h \ + $(srcdir)/vendor/osm_vendor_mlx_sender.h \ + $(srcdir)/vendor/osm_vendor_ibumad.h \ + $(srcdir)/vendor/osm_vendor_mlx_defs.h \ + $(srcdir)/vendor/osm_vendor_mtl_transaction_mgr.h \ + $(srcdir)/vendor/osm_vendor_mlx_sar.h \ + $(srcdir)/vendor/osm_vendor_mlx_dispatcher.h \ + $(srcdir)/vendor/osm_vendor_umadt.h \ + $(srcdir)/vendor/osm_vendor_mlx_svc.h \ + $(srcdir)/vendor/osm_vendor_mlx_hca.h \ + $(srcdir)/vendor/osm_vendor_mlx_rmpp_ctx.h \ + $(srcdir)/vendor/osm_vendor_mlx_transport.h \ + $(srcdir)/vendor/osm_vendor_mlx_inout.h \ + $(srcdir)/vendor/osm_vendor_mtl_hca_guid.h \ + $(srcdir)/vendor/osm_vendor_test.h \ + $(srcdir)/vendor/osm_vendor_ts.h \ + $(srcdir)/vendor/osm_vendor_mlx_txn.h \ + $(srcdir)/vendor/osm_vendor_al.h \ + $(srcdir)/vendor/osm_vendor_mtl.h \ + $(srcdir)/vendor/osm_ts_useraccess.h \ + $(srcdir)/vendor/osm_umadt.h \ + $(srcdir)/vendor/osm_mtl_bind.h + +pkgincludedir = $(includedir)/infiniband diff --git a/include/complib/cl_atomic.h b/include/complib/cl_atomic.h new file mode 100644 index 0000000..ba21442 --- /dev/null +++ b/include/complib/cl_atomic.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of atomic manipulation functions. + */ + +#ifndef _CL_ATOMIC_H_ +#define _CL_ATOMIC_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Atomic Operations +* NAME +* Atomic Operations +* +* DESCRIPTION +* The Atomic Operations functions allow callers to operate on +* 32-bit signed integers in an atomic fashion. +*********/ +/****f* Component Library: Atomic Operations/cl_atomic_inc +* NAME +* cl_atomic_inc +* +* DESCRIPTION +* The cl_atomic_inc function atomically increments a 32-bit signed +* integer and returns the incremented value. +* +* SYNOPSIS +*/ +int32_t cl_atomic_inc(IN atomic32_t * const p_value); +/* +* PARAMETERS +* p_value +* [in] Pointer to a 32-bit integer to increment. +* +* RETURN VALUE +* Returns the incremented value pointed to by p_value. +* +* NOTES +* The provided value is incremented and its value returned in one atomic +* operation. +* +* cl_atomic_inc maintains data consistency without requiring additional +* synchronization mechanisms in multi-threaded environments. +* +* SEE ALSO +* Atomic Operations, cl_atomic_dec, cl_atomic_add, cl_atomic_sub +*********/ + +/****f* Component Library: Atomic Operations/cl_atomic_dec +* NAME +* cl_atomic_dec +* +* DESCRIPTION +* The cl_atomic_dec function atomically decrements a 32-bit signed +* integer and returns the decremented value. +* +* SYNOPSIS +*/ +int32_t cl_atomic_dec(IN atomic32_t * const p_value); +/* +* PARAMETERS +* p_value +* [in] Pointer to a 32-bit integer to decrement. +* +* RETURN VALUE +* Returns the decremented value pointed to by p_value. +* +* NOTES +* The provided value is decremented and its value returned in one atomic +* operation. +* +* cl_atomic_dec maintains data consistency without requiring additional +* synchronization mechanisms in multi-threaded environments. +* +* SEE ALSO +* Atomic Operations, cl_atomic_inc, cl_atomic_add, cl_atomic_sub +*********/ + +/****f* Component Library: Atomic Operations/cl_atomic_add +* NAME +* cl_atomic_add +* +* DESCRIPTION +* The cl_atomic_add function atomically adds a value to a +* 32-bit signed integer and returns the resulting value. +* +* SYNOPSIS +*/ +int32_t +cl_atomic_add(IN atomic32_t * const p_value, IN const int32_t increment); +/* +* PARAMETERS +* p_value +* [in] Pointer to a 32-bit integer that will be added to. +* +* increment +* [in] Value by which to increment the integer pointed to by p_value. +* +* RETURN VALUE +* Returns the value pointed to by p_value after the addition. +* +* NOTES +* The provided increment is added to the value and the result returned in +* one atomic operation. +* +* cl_atomic_add maintains data consistency without requiring additional +* synchronization mechanisms in multi-threaded environments. +* +* SEE ALSO +* Atomic Operations, cl_atomic_inc, cl_atomic_dec, cl_atomic_sub +*********/ + +/****f* Component Library: Atomic Operations/cl_atomic_sub +* NAME +* cl_atomic_sub +* +* DESCRIPTION +* The cl_atomic_sub function atomically subtracts a value from a +* 32-bit signed integer and returns the resulting value. +* +* SYNOPSIS +*/ +int32_t +cl_atomic_sub(IN atomic32_t * const p_value, IN const int32_t decrement); +/* +* PARAMETERS +* p_value +* [in] Pointer to a 32-bit integer that will be subtracted from. +* +* decrement +* [in] Value by which to decrement the integer pointed to by p_value. +* +* RETURN VALUE +* Returns the value pointed to by p_value after the subtraction. +* +* NOTES +* The provided decrement is subtracted from the value and the result +* returned in one atomic operation. +* +* cl_atomic_sub maintains data consistency without requiring additional +* synchronization mechanisms in multi-threaded environments. +* +* SEE ALSO +* Atomic Operations, cl_atomic_inc, cl_atomic_dec, cl_atomic_add +*********/ + +END_C_DECLS +#endif /* _CL_ATOMIC_H_ */ diff --git a/include/complib/cl_atomic_osd.h b/include/complib/cl_atomic_osd.h new file mode 100644 index 0000000..822c19d --- /dev/null +++ b/include/complib/cl_atomic_osd.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation specific header files for atomic operations. + */ + +#ifndef _CL_ATOMIC_OSD_H_ +#define _CL_ATOMIC_OSD_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +extern cl_spinlock_t cl_atomic_spinlock; + +static inline int32_t cl_atomic_inc(IN atomic32_t * const p_value) +{ + int32_t new_val; + + cl_spinlock_acquire(&cl_atomic_spinlock); + new_val = *p_value + 1; + *p_value = new_val; + cl_spinlock_release(&cl_atomic_spinlock); + return (new_val); +} + +static inline int32_t cl_atomic_dec(IN atomic32_t * const p_value) +{ + int32_t new_val; + + cl_spinlock_acquire(&cl_atomic_spinlock); + new_val = *p_value - 1; + *p_value = new_val; + cl_spinlock_release(&cl_atomic_spinlock); + return (new_val); +} + +static inline int32_t +cl_atomic_add(IN atomic32_t * const p_value, IN const int32_t increment) +{ + int32_t new_val; + + cl_spinlock_acquire(&cl_atomic_spinlock); + new_val = *p_value + increment; + *p_value = new_val; + cl_spinlock_release(&cl_atomic_spinlock); + return (new_val); +} + +static inline int32_t +cl_atomic_sub(IN atomic32_t * const p_value, IN const int32_t decrement) +{ + int32_t new_val; + + cl_spinlock_acquire(&cl_atomic_spinlock); + new_val = *p_value - decrement; + *p_value = new_val; + cl_spinlock_release(&cl_atomic_spinlock); + return (new_val); +} + +END_C_DECLS +#endif /* _CL_ATOMIC_OSD_H_ */ diff --git a/include/complib/cl_byteswap.h b/include/complib/cl_byteswap.h new file mode 100644 index 0000000..bef24b0 --- /dev/null +++ b/include/complib/cl_byteswap.h @@ -0,0 +1,524 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * provides byteswapping utilities. Basic functions are obtained from + * platform specific implementations from byteswap_osd.h. + */ + +#ifndef _CL_BYTESWAP_H_ +#define _CL_BYTESWAP_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Byte Swapping +* NAME +* Byte Swapping +* +* DESCRIPTION +* The byte swapping functions and macros allow swapping bytes from network +* byte order to host byte order. +* +* All data transmitted between systems should be in network byte order. +* In order to utilize such data, it must be converted to host byte order +* before use. +* +* SEE ALSO +* Functions: +* cl_ntoh16, cl_hton16, cl_ntoh32, cl_hton32, cl_ntoh64, cl_hton64, +* cl_ntoh +* +* Macros: +* CL_NTOH16, CL_HTON16, CL_NTOH32, CL_HTON32, CL_NTOH64, CL_HTON64 +*********/ +/* + * The byteswap_osd.h provides the following macros. + * __LITTLE_ENDIAN + * __BIG_ENDIAN + * __BYTE_ORDER + * + * If the platform provides byte swapping functions, byteswap_osd.h also + * provides the following macros. + * ntoh16, hton16 + * ntoh32, hton32 + * ntoh64, hton64 + */ +#ifndef __BYTE_ORDER +#error "__BYTE_ORDER macro undefined. Missing in endian.h?" +#endif +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define CPU_LE 1 +#define CPU_BE 0 +#else +#define CPU_LE 0 +#define CPU_BE 1 +#endif +/****d* Component Library: Byte Swapping/CL_NTOH16 +* NAME +* CL_NTOH16 +* +* DESCRIPTION +* The CL_NTOH16 macro converts a 16-bit value from network byte order to +* host byte order. The CL_NTOH16 macro will cause constant values to be +* swapped by the pre-processor. For variables, CL_NTOH16 is less efficient +* than the cl_ntoh16 function. +* +* SYNOPSIS +* CL_NTOH16( val ); +* +* PARAMETERS +* val +* [in] 16-bit value to swap from network byte order to host byte order. +* +* RESULT +* Value of val converted to host byte order. +* +* NOTES +* This macro is analogous to CL_HTON16. +* +* SEE ALSO +* Byte Swapping, CL_HTON16, CL_NTOH32, CL_NTOH64, +* cl_ntoh16, cl_ntoh32, cl_ntoh64, cl_ntoh +*********/ +/****d* Component Library: Byte Swapping/CL_HTON16 +* NAME +* CL_HTON16 +* +* DESCRIPTION +* The CL_HTON16 macro converts a 16-bit value from host byte order to +* network byte order. The CL_HTON16 macro will cause constant values to be +* swapped by the pre-processor. For variables, CL_HTON16 is less efficient +* than the cl_hton16 function. +* +* SYNOPSIS +* CL_HTON16( val ); +* +* PARAMETERS +* val +* [in] 16-bit value to swap from host byte order to network byte order. +* +* RESULT +* Value of val converted to network byte order. +* +* NOTES +* This macro is analogous to CL_NTOH16. +* +* SEE ALSO +* Byte Swapping, CL_NTOH16, CL_HTON32, CL_HTON64, +* cl_hton16, cl_hton32, cl_hton64, cl_ntoh +*********/ +#if CPU_LE +#define CL_NTOH16( x ) (uint16_t)( \ + (((uint16_t)(x) & 0x00FF) << 8) | \ + (((uint16_t)(x) & 0xFF00) >> 8) ) +#else +#define CL_NTOH16( x ) (x) +#endif +#define CL_HTON16 CL_NTOH16 +/****f* Component Library: Byte Swapping/cl_ntoh16 +* NAME +* cl_ntoh16 +* +* DESCRIPTION +* The cl_ntoh16 function converts a 16-bit value from network byte order to +* host byte order. +* +* SYNOPSIS +* uint16_t +* cl_ntoh16( +* IN const uint16_t val ); +* +* PARAMETERS +* val +* [in] Value to swap from network byte order to host byte order. +* +* RETURN VALUE +* Value of val converted to host byte order. +* +* NOTES +* This function is analogous to cl_hton16. +* +* SEE ALSO +* Byte Swapping, cl_hton16, cl_ntoh32, cl_ntoh64, cl_ntoh +*********/ +/****f* Component Library: Byte Swapping/cl_hton16 +* NAME +* cl_hton16 +* +* DESCRIPTION +* The cl_hton16 function converts a 16-bit value from host byte order to +* network byte order. +* +* SYNOPSIS +* uint16_t +* cl_hton16( +* IN const uint16_t val ); +* +* PARAMETERS +* val +* [in] Value to swap from host byte order to network byte order . +* +* RETURN VALUE +* Value of val converted to network byte order. +* +* NOTES +* This function is analogous to cl_ntoh16. +* +* SEE ALSO +* Byte Swapping, cl_ntoh16, cl_hton32, cl_hton64, cl_ntoh +*********/ +#ifndef cl_ntoh16 +#define cl_ntoh16 CL_NTOH16 +#define cl_hton16 CL_HTON16 +#endif +/****d* Component Library: Byte Swapping/CL_NTOH32 +* NAME +* CL_NTOH32 +* +* DESCRIPTION +* The CL_NTOH32 macro converts a 32-bit value from network byte order to +* host byte order. The CL_NTOH32 macro will cause constant values to be +* swapped by the pre-processor. For variables, CL_NTOH32 is less efficient +* than the cl_ntoh32 function. +* +* SYNOPSIS +* CL_NTOH32( val ); +* +* PARAMETERS +* val +* [in] 32-bit value to swap from network byte order to host byte order. +* +* RESULT +* Value of val converted to host byte order. +* +* NOTES +* This macro is analogous to CL_HTON32. +* +* SEE ALSO +* Byte Swapping, CL_HTON32, CL_NTOH16, CL_NTOH64, +* cl_ntoh16, cl_ntoh32, cl_ntoh64, cl_ntoh +*********/ +/****d* Component Library: Byte Swapping/CL_HTON32 +* NAME +* CL_HTON32 +* +* DESCRIPTION +* The CL_HTON32 macro converts a 32-bit value from host byte order to +* network byte order. The CL_HTON32 macro will cause constant values to be +* swapped by the pre-processor. For variables, CL_HTON32 is less efficient +* than the cl_hton32 function. +* +* SYNOPSIS +* CL_HTON32( val ); +* +* PARAMETERS +* val +* [in] 32-bit value to swap from host byte order to network byte order. +* +* RESULT +* Value of val converted to network byte order. +* +* NOTES +* This macro is analogous to CL_NTOH32. +* +* SEE ALSO +* Byte Swapping, CL_NTOH32, CL_HTON16, CL_HTON64, +* cl_hton16, cl_hton32, cl_hton64, cl_ntoh +*********/ +#if CPU_LE +#define CL_NTOH32( x ) (uint32_t)( \ + (((uint32_t)(x) & 0x000000FF) << 24) | \ + (((uint32_t)(x) & 0x0000FF00) << 8) | \ + (((uint32_t)(x) & 0x00FF0000) >> 8) | \ + (((uint32_t)(x) & 0xFF000000) >> 24) ) +#else +#define CL_NTOH32( x ) (x) +#endif +#define CL_HTON32 CL_NTOH32 +/****f* Component Library: Byte Swapping/cl_ntoh32 +* NAME +* cl_ntoh32 +* +* DESCRIPTION +* The cl_ntoh32 function converts a 32-bit value from network byte order to +* host byte order. +* +* SYNOPSIS +* uint32_t +* cl_ntoh32( +* IN const uint32_t val ); +* +* PARAMETERS +* val +* [in] Value to swap from network byte order to host byte order. +* +* RETURN VALUE +* Value of val converted in host byte order. +* +* NOTES +* This function is analogous to cl_hton32. +* +* SEE ALSO +* Byte Swapping, cl_hton32, cl_ntoh16, cl_ntoh64, cl_ntoh +*********/ +/****f* Component Library: Byte Swapping/cl_hton32 +* NAME +* cl_hton32 +* +* DESCRIPTION +* The cl_hton32 function converts a 32-bit value from host byte order to +* network byte order. +* +* SYNOPSIS +* uint32_t +* cl_hton32( +* IN const uint32_t val ); +* +* PARAMETERS +* val +* [in] Value to swap from host byte order to network byte order . +* +* RETURN VALUE +* Value of val converted to network byte order. +* +* NOTES +* This function is analogous to cl_ntoh32. +* +* SEE ALSO +* Byte Swapping, cl_ntoh32, cl_hton16, cl_hton64, cl_ntoh +*********/ +#ifndef cl_ntoh32 +#define cl_ntoh32 CL_NTOH32 +#define cl_hton32 CL_HTON32 +#endif +/****d* Component Library: Byte Swapping/CL_NTOH64 +* NAME +* CL_NTOH64 +* +* DESCRIPTION +* The CL_NTOH64 macro converts a 64-bit value from network byte order to +* host byte order. The CL_NTOH64 macro will cause constant values to be +* swapped by the pre-processor. For variables, CL_NTOH64 is less efficient +* than the cl_ntoh64 function. +* +* SYNOPSIS +* CL_NTOH64( val ); +* +* PARAMETERS +* val +* [in] 64-bit value to swap from network byte order to host byte order. +* +* RESULT +* Value of val converted to host byte order. +* +* NOTES +* This macro is analogous to CL_HTON64. +* +* SEE ALSO +* Byte Swapping, CL_HTON64, CL_NTOH16, CL_NTOH32, +* cl_ntoh16, cl_ntoh32, cl_ntoh64, cl_ntoh +*********/ +/****d* Component Library: Byte Swapping/CL_HTON64 +* NAME +* CL_HTON64 +* +* DESCRIPTION +* The CL_HTON64 macro converts a 64-bit value from host byte order to +* network byte order. The CL_HTON64 macro will cause constant values to be +* swapped by the pre-processor. For variables, CL_HTON64 is less efficient +* than the cl_hton64 function. +* +* SYNOPSIS +* CL_HTON64( val ); +* +* PARAMETERS +* val +* [in] 64-bit value to swap from host byte order to network byte order. +* +* RESULT +* Value of val converted to network byte order. +* +* NOTES +* This macro is analogous to CL_NTOH64. +* +* SEE ALSO +* Byte Swapping, CL_NTOH64, CL_HTON16, CL_HTON32, +* cl_hton16, cl_hton32, cl_hton64, cl_ntoh +*********/ +#if CPU_LE +#define CL_NTOH64( x ) (uint64_t)( \ + (((uint64_t)(x) & 0x00000000000000FFULL) << 56) | \ + (((uint64_t)(x) & 0x000000000000FF00ULL) << 40) | \ + (((uint64_t)(x) & 0x0000000000FF0000ULL) << 24) | \ + (((uint64_t)(x) & 0x00000000FF000000ULL) << 8 ) | \ + (((uint64_t)(x) & 0x000000FF00000000ULL) >> 8 ) | \ + (((uint64_t)(x) & 0x0000FF0000000000ULL) >> 24) | \ + (((uint64_t)(x) & 0x00FF000000000000ULL) >> 40) | \ + (((uint64_t)(x) & 0xFF00000000000000ULL) >> 56) ) +#else +#define CL_NTOH64( x ) (x) +#endif +#define CL_HTON64 CL_NTOH64 +/****f* Component Library: Byte Swapping/cl_ntoh64 +* NAME +* cl_ntoh64 +* +* DESCRIPTION +* The cl_ntoh64 function converts a 64-bit value from network byte order to +* host byte order. +* +* SYNOPSIS +* uint64_t +* cl_ntoh64( +* IN const uint64_t val ); +* +* PARAMETERS +* val +* [in] Value to swap from network byte order to host byte order. +* +* RETURN VALUE +* Value of val converted in host byte order. +* +* NOTES +* This function is analogous to cl_hton64. +* +* SEE ALSO +* Byte Swapping, cl_hton64, cl_ntoh16, cl_ntoh32, cl_ntoh +*********/ +/****f* Component Library: Byte Swapping/cl_hton64 +* NAME +* cl_hton64 +* +* DESCRIPTION +* The cl_hton64 function converts a 64-bit value from host byte order to +* network byte order. +* +* SYNOPSIS +* uint64_t +* cl_hton64( +* IN const uint64_t val ); +* +* PARAMETERS +* val +* [in] Value to swap from host byte order to network byte order . +* +* RETURN VALUE +* Value of val converted to network byte order. +* +* NOTES +* This function is analogous to cl_ntoh64. +* +* SEE ALSO +* Byte Swapping, cl_ntoh64, cl_hton16, cl_hton32, cl_ntoh +*********/ +#ifndef cl_ntoh64 +#define cl_ntoh64 CL_NTOH64 +#define cl_hton64 CL_HTON64 +#endif +/****f* Component Library: Byte Swapping/cl_ntoh +* NAME +* cl_ntoh +* +* DESCRIPTION +* The cl_ntoh function converts a value from network byte order to +* host byte order. +* +* SYNOPSIS +*/ +static inline void +cl_ntoh(OUT char *const p_dest, + IN const char *const p_src, IN const uint8_t size) +{ +#if CPU_LE + uint8_t i; + char temp; + + if (p_src == p_dest) { + /* Swap in place if source and destination are the same. */ + for (i = 0; i < size / 2; i++) { + temp = p_dest[i]; + p_dest[i] = p_src[size - 1 - i]; + p_dest[size - 1 - i] = temp; + } + } else { + for (i = 0; i < size; i++) + p_dest[i] = p_src[size - 1 - i]; + } +#else + /* + * If the source and destination are not the same, copy the source to + * the destination. + */ + if (p_src != p_dest) + memcpy(p_dest, p_src, size); +#endif +} + +/* +* PARAMETERS +* p_dest +* [in] Pointer to a byte array to contain the converted value of p_src. +* +* p_src +* [in] Pointer to a byte array to be converted from network byte +* ordering. +* +* size +* [in] Number of bytes to swap.p_dest +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_ntoh can perform in place swapping if both p_src and p_dest point to +* the same buffer. +* +* SEE ALSO +* Byte Swapping, cl_ntoh16, cl_ntoh32, cl_ntoh64 +*********/ + +END_C_DECLS +#endif /* _CL_BYTESWAP_H_ */ diff --git a/include/complib/cl_byteswap_osd.h b/include/complib/cl_byteswap_osd.h new file mode 100644 index 0000000..72ff40e --- /dev/null +++ b/include/complib/cl_byteswap_osd.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Provides common macros for dealing with byte swapping issues. + */ + +#ifndef _CL_BYTESWAP_OSD_H_ +#define _CL_BYTESWAP_OSD_H_ + +/* + * This provides defines __LITTLE_ENDIAN, __BIG_ENDIAN and __BYTE_ORDER + */ +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define cl_ntoh16(x) bswap_16(x) +#define cl_hton16(x) bswap_16(x) +#define cl_ntoh32(x) bswap_32(x) +#define cl_hton32(x) bswap_32(x) +#define cl_ntoh64(x) (uint64_t)bswap_64(x) +#define cl_hton64(x) (uint64_t)bswap_64(x) +#else /* Big Endian */ +#define cl_ntoh16(x) (x) +#define cl_hton16(x) (x) +#define cl_ntoh32(x) (x) +#define cl_hton32(x) (x) +#define cl_ntoh64(x) (x) +#define cl_hton64(x) (x) +#endif +END_C_DECLS +#endif /* _CL_BYTESWAP_OSD_H_ */ diff --git a/include/complib/cl_comppool.h b/include/complib/cl_comppool.h new file mode 100644 index 0000000..aef1573 --- /dev/null +++ b/include/complib/cl_comppool.h @@ -0,0 +1,589 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of the composite pool. + * The composite pool managers a pool of composite objects. A composite object is an object + * that is made of multiple sub objects. + * The pool can grow to meet demand, limited only by system memory. + */ + +#ifndef _CL_COMP_POOL_H_ +#define _CL_COMP_POOL_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Composite Pool +* NAME +* Composite Pool +* +* DESCRIPTION +* The Composite Pool provides a self-contained and self-sustaining pool of +* user defined composite objects. +* +* A composite object is an object that is composed of one or more +* sub-objects, each of which needs to be treated separately for +* initialization. Objects can be retrieved from the pool as long as there +* is memory in the system. +* +* To aid in object oriented design, the composite pool provides the user +* the ability to specify callbacks that are invoked for each object for +* construction, initialization, and destruction. Constructor and destructor +* callback functions may not fail. +* +* A composite pool does not return memory to the system as the user returns +* objects to the pool. The only method of returning memory to the system is +* to destroy the pool. +* +* The composite pool functions operates on a cl_cpool_t structure which +* should be treated as opaque and should be manipulated only through the +* provided functions. +* +* SEE ALSO +* Structures: +* cl_cpool_t +* +* Callbacks: +* cl_pfn_cpool_init_t, cl_pfn_cpool_dtor_t +* +* Initialization/Destruction: +* cl_cpool_construct, cl_cpool_init, cl_cpool_destroy +* +* Manipulation: +* cl_cpool_get, cl_cpool_put, cl_cpool_grow +* +* Attributes: +* cl_is_cpool_inited, cl_cpool_count +*********/ +/****d* Component Library: Composite Pool/cl_pfn_cpool_init_t +* NAME +* cl_pfn_cpool_init_t +* +* DESCRIPTION +* The cl_pfn_cpool_init_t function type defines the prototype for +* functions used as initializers for objects being allocated by a +* composite pool. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_cpool_init_t) (IN void **const p_comp_array, + IN const uint32_t num_components, IN void *context); +/* +* PARAMETERS +* p_comp_array +* [in] Pointer to the first entry in an array of pointers, each of +* which points to a component that makes up a composite object. +* +* num_components +* [in] Number of components in the composite array. +* +* context +* [in] Context provided in a call to cl_cpool_init. +* +* RETURN VALUES +* Return CL_SUCCESS to indicates that initialization of the object +* was successful and that initialization of further objects may continue. +* +* Other cl_status_t values will be returned by cl_cpool_init +* and cl_cpool_grow. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as an optional parameter to the +* cl_cpool_init function. +* +* The initializer is invoked once per allocated object, allowing the user +* to chain components to form a composite object and perform any necessary +* initialization. Returning a status other than CL_SUCCESS aborts a grow +* operation, initiated either through cl_cpool_init or cl_cpool_grow, and +* causes the initiating function to fail. Any non-CL_SUCCESS status will +* be returned by the function that initiated the grow operation. +* +* All memory for the requested number of components is pre-allocated. +* +* When later performing a cl_cpool_get call, the return value is a pointer +* to the first component. +* +* SEE ALSO +* Composite Pool, cl_cpool_init, cl_cpool_grow +*********/ + +/****d* Component Library: Composite Pool/cl_pfn_cpool_dtor_t +* NAME +* cl_pfn_cpool_dtor_t +* +* DESCRIPTION +* The cl_pfn_cpool_dtor_t function type defines the prototype for +* functions used as destructor for objects being deallocated by a +* composite pool. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_cpool_dtor_t) (IN void *const p_object, IN void *context); +/* +* PARAMETERS +* p_object +* [in] Pointer to an object to destruct. +* +* context +* [in] Context provided in the call to cl_cpool_init. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as an optional parameter to the +* cl_cpool_init function. +* +* The destructor is invoked once per allocated object, allowing the user +* to perform any necessary cleanup. Users should not attempt to deallocate +* the memory for the composite object, as the composite pool manages +* object allocation and deallocation. +* +* SEE ALSO +* Composite Pool, cl_cpool_init +*********/ + +/****s* Component Library: Composite Pool/cl_cpool_t +* NAME +* cl_cpool_t +* +* DESCRIPTION +* Composite pool structure. +* +* The cl_cpool_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_cpool { + cl_qcpool_t qcpool; + cl_pfn_cpool_init_t pfn_init; + cl_pfn_cpool_dtor_t pfn_dtor; + const void *context; +} cl_cpool_t; +/* +* FIELDS +* qcpool +* Quick composite pool that manages all objects. +* +* pfn_init +* Pointer to the user's initializer callback, used by the pool +* to translate the quick composite pool's initializer callback to +* a composite pool initializer callback. +* +* pfn_dtor +* Pointer to the user's destructor callback, used by the pool +* to translate the quick composite pool's destructor callback to +* a composite pool destructor callback. +* +* context +* User's provided context for callback functions, used by the pool +* to when invoking callbacks. +* +* SEE ALSO +* Composite Pool +*********/ + +/****f* Component Library: Composite Pool/cl_cpool_construct +* NAME +* cl_cpool_construct +* +* DESCRIPTION +* The cl_cpool_construct function constructs a composite pool. +* +* SYNOPSIS +*/ +void cl_cpool_construct(IN cl_cpool_t * const p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_pool_init, cl_cpool_destroy, cl_is_cpool_inited. +* +* Calling cl_cpool_construct is a prerequisite to calling any other +* composite pool function except cl_cpool_init. +* +* SEE ALSO +* Composite Pool, cl_cpool_init, cl_cpool_destroy, cl_is_cpool_inited +*********/ + +/****f* Component Library: Composite Pool/cl_is_cpool_inited +* NAME +* cl_is_cpool_inited +* +* DESCRIPTION +* The cl_is_cpool_inited function returns whether a composite pool was +* successfully initialized. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_cpool_inited(IN const cl_cpool_t * const p_pool) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_pool); + return (cl_is_qcpool_inited(&p_pool->qcpool)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure whose initialization state +* to check. +* +* RETURN VALUES +* TRUE if the composite pool was initialized successfully. +* +* FALSE otherwise. +* +* NOTES +* Allows checking the state of a composite pool to determine if invoking +* member functions is appropriate. +* +* SEE ALSO +* Composite Pool +*********/ + +/****f* Component Library: Composite Pool/cl_cpool_init +* NAME +* cl_cpool_init +* +* DESCRIPTION +* The cl_cpool_init function initializes a composite pool for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_cpool_init(IN cl_cpool_t * const p_pool, + IN const size_t min_size, + IN const size_t max_size, + IN const size_t grow_size, + IN size_t * const component_sizes, + IN const uint32_t num_components, + IN cl_pfn_cpool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_cpool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure to initialize. +* +* min_size +* [in] Minimum number of objects that the pool should support. All +* necessary allocations to allow storing the minimum number of items +* are performed at initialization time, and all necessary callbacks +* successfully invoked. +* +* max_size +* [in] Maximum number of objects to which the pool is allowed to grow. +* A value of zero specifies no maximum. +* +* grow_size +* [in] Number of objects to allocate when incrementally growing the pool. +* A value of zero disables automatic growth. +* +* component_sizes +* [in] Pointer to the first entry in an array of sizes describing, +* in order, the sizes of the components that make up a composite object. +* +* num_components +* [in] Number of components that make up a composite object. +* +* pfn_initializer +* [in] Initialization callback to invoke for every new object when +* growing the pool. This parameter may be NULL only if the objects +* stored in the composite pool consist of only one component. +* See the cl_pfn_cpool_init function type declaration for details +* about the callback function. +* +* pfn_destructor +* [in] Destructor callback to invoke for every object before memory for +* that object is freed. This parameter is optional and may be NULL. +* See the cl_pfn_cpool_dtor function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUES +* CL_SUCCESS if the composite pool was initialized successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to initialize the +* composite pool. +* +* CL_INVALID_SETTING if a NULL constructor was provided for composite objects +* consisting of more than one component. Also returns CL_INVALID_SETTING if +* the maximum size is non-zero and less than the minimum size. +* +* Other cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter. +* +* NOTES +* cl_cpool_init initializes, and if necessary, grows the pool to +* the capacity desired. +* +* SEE ALSO +* Composite Pool, cl_cpool_construct, cl_cpool_destroy, +* cl_cpool_get, cl_cpool_put, cl_cpool_grow, +* cl_cpool_count, cl_pfn_cpool_ctor_t, cl_pfn_cpool_init_t, +* cl_pfn_cpool_dtor_t +*********/ + +/****f* Component Library: Composite Pool/cl_cpool_destroy +* NAME +* cl_cpool_destroy +* +* DESCRIPTION +* The cl_cpool_destroy function destroys a composite pool. +* +* SYNOPSIS +*/ +static inline void cl_cpool_destroy(IN cl_cpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + + cl_qcpool_destroy(&p_pool->qcpool); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* All memory allocated for composite objects is freed. The destructor +* callback, if any, will be invoked for every allocated object. Further +* operations on the composite pool should not be attempted after +* cl_cpool_destroy is invoked. +* +* This function should only be called after a call to cl_cpool_construct. +* +* In a debug build, cl_cpool_destroy asserts that all objects are in +* the pool. +* +* SEE ALSO +* Composite Pool, cl_cpool_construct, cl_cpool_init +*********/ + +/****f* Component Library: Composite Pool/cl_cpool_count +* NAME +* cl_cpool_count +* +* DESCRIPTION +* The cl_cpool_count function returns the number of available objects +* in a composite pool. +* +* SYNOPSIS +*/ +static inline size_t cl_cpool_count(IN cl_cpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + return (cl_qcpool_count(&p_pool->qcpool)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure for which the number of +* available objects is requested. +* +* RETURN VALUE +* Returns the number of objects available in the specified +* composite pool. +* +* SEE ALSO +* Composite Pool +*********/ + +/****f* Component Library: Composite Pool/cl_cpool_get +* NAME +* cl_cpool_get +* +* DESCRIPTION +* The cl_cpool_get function retrieves an object from a +* composite pool. +* +* SYNOPSIS +*/ +static inline void *cl_cpool_get(IN cl_cpool_t * const p_pool) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_pool); + + p_pool_obj = (cl_pool_obj_t *) cl_qcpool_get(&p_pool->qcpool); + if (!p_pool_obj) + return (NULL); + + CL_ASSERT(p_pool_obj->p_object); + return ((void *)p_pool_obj->p_object); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure from which to retrieve +* an object. +* +* RETURN VALUES +* Returns a pointer to the first component of a composite object. +* +* Returns NULL if the pool is empty and can not be grown automatically. +* +* NOTES +* cl_cpool_get returns the object at the head of the pool. If the pool is +* empty, it is automatically grown to accommodate this request unless the +* grow_size parameter passed to the cl_cpool_init function was zero. +* +* SEE ALSO +* Composite Pool, cl_cpool_get_tail, cl_cpool_put, cl_cpool_grow, +* cl_cpool_count +*********/ + +/****f* Component Library: Composite Pool/cl_cpool_put +* NAME +* cl_cpool_put +* +* DESCRIPTION +* The cl_cpool_put function returns an object to a composite pool. +* +* SYNOPSIS +*/ +static inline void +cl_cpool_put(IN cl_cpool_t * const p_pool, IN void *const p_object) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_pool); + CL_ASSERT(p_object); + + /* Calculate the offset to the list object representing this object. */ + p_pool_obj = (cl_pool_obj_t *) + (((uint8_t *) p_object) - sizeof(cl_pool_obj_t)); + + /* good sanity check */ + CL_ASSERT(p_pool_obj->p_object == p_object); + + cl_qcpool_put(&p_pool->qcpool, &p_pool_obj->pool_item); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure to which to return +* an object. +* +* p_object +* [in] Pointer to the first component of an object to return to the pool. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_cpool_put places the returned object at the head of the pool. +* +* The object specified by the p_object parameter must have been +* retrieved from the pool by a previous call to cl_cpool_get. +* +* SEE ALSO +* Composite Pool, cl_cpool_put_tail, cl_cpool_get +*********/ + +/****f* Component Library: Composite Pool/cl_cpool_grow +* NAME +* cl_cpool_grow +* +* DESCRIPTION +* The cl_cpool_grow function grows a composite pool by +* the specified number of objects. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_cpool_grow(IN cl_cpool_t * const p_pool, IN const uint32_t obj_count) +{ + CL_ASSERT(p_pool); + return (cl_qcpool_grow(&p_pool->qcpool, obj_count)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_cpool_t structure whose capacity to grow. +* +* obj_count +* [in] Number of objects by which to grow the pool. +* +* RETURN VALUES +* CL_SUCCESS if the composite pool grew successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to grow the +* composite pool. +* +* cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter passed to the +* cl_cpool_init function. +* +* NOTES +* It is not necessary to call cl_cpool_grow if the pool is +* configured to grow automatically. +* +* SEE ALSO +* Composite Pool +*********/ + +END_C_DECLS +#endif /* _CL_COMP_POOL_H_ */ diff --git a/include/complib/cl_debug.h b/include/complib/cl_debug.h new file mode 100644 index 0000000..10f5e86 --- /dev/null +++ b/include/complib/cl_debug.h @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of functions for reporting debug output. + */ + +#ifndef _CL_DEBUG_H_ +#define _CL_DEBUG_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Debug Output +* NAME +* Debug Output +* +* DESCRIPTION +* The debug output functions and macros send debug messages to the current +* debug target. +*********/ +/****f* Component Library: Debug Output/cl_break +* NAME +* cl_break +* +* DESCRIPTION +* The cl_break function halts execution. +* +* SYNOPSIS +* void +* cl_break(); +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* In a release build, cl_break has no effect. +*********/ +/****f* Component Library: Debug Output/cl_is_debug +* NAME +* cl_is_debug +* +* DESCRIPTION +* The cl_is_debug function returns TRUE if the complib was compiled +* in debug mode, and FALSE otherwise. +* +* SYNOPSIS +*/ +boolean_t cl_is_debug(void); +/* +* PARAMETERS +* None +* +* RETURN VALUE +* TRUE if compiled in debug version. FALSE otherwise. +* +* NOTES +* +*********/ + +#if defined( _DEBUG_ ) +#ifndef cl_dbg_out +/****f* Component Library: Debug Output/cl_dbg_out +* NAME +* cl_dbg_out +* +* DESCRIPTION +* The cl_dbg_out function sends a debug message to the debug target in +* debug builds only. +* +* SYNOPSIS +*/ +void cl_dbg_out(IN const char *const debug_message, IN ...); +/* +* PARAMETERS +* debug_message +* [in] ANSI string formatted identically as for a call to the standard C +* function printf. +* +* ... +* [in] Extra parameters for string formatting, as defined for the +* standard C function printf. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* In a release build, cl_dbg_out has no effect. +* +* The formatting of the debug_message string is the same as for printf +* +* cl_dbg_out sends the debug message to the current debug target. +* +* SEE ALSO +* Debug Output, cl_msg_out +*********/ +#endif +#else +static inline void cl_dbg_out(IN const char *const debug_message, IN ...) +{ + UNUSED_PARAM(debug_message); +} +#endif /* defined( _DEBUG_ ) */ + +#ifndef cl_msg_out +/****f* Component Library: Debug Output/cl_msg_out +* NAME +* cl_msg_out +* +* DESCRIPTION +* The cl_msg_out function sends a debug message to the message log target. +* +* SYNOPSIS +*/ +void cl_msg_out(IN const char *const message, IN ...); +/* +* PARAMETERS +* message +* [in] ANSI string formatted identically as for a call to the standard C +* function printf. +* +* ... +* [in] Extra parameters for string formatting, as defined for the +* standard C function printf. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_msg_out is available in both debug and release builds. +* +* The formatting of the message string is the same as for printf +* +* cl_msg_out sends the message to the current message logging target. +* +* SEE ALSO +* Debug Output, cl_dbg_out +*********/ +#endif + +/****d* Component Library: Debug Output/Debug Levels +* NAME +* Debug Levels +* +* DESCRIPTION +* The debug output macros reserve the upper bit of the debug level to +* convey an error. +* +* SYNOPSIS +*/ +#define CL_DBG_DISABLE 0 +#define CL_DBG_ERROR 0x80000000 +#define CL_DBG_ALL 0xFFFFFFFF +/* +* VALUES +* CL_DBG_DISABLE +* Disable all debug output, including errors. +* +* CL_DBG_ERROR +* Enable error debug output. +* +* CL_DBG_ALL +* Enbale all debug output. +* +* NOTES +* Users can define custom debug levels using the lower 31 bits of their +* debug level to control non-error debug output. Error messages are +* always displayed, regardless of the lower bit definition. +* +* When specifying the debug output desired for non-error messages +* (the CHK_LVL parameter in the debug output macros), users must define +* all bits whose output they are interested in. +* +* SEE ALSO +* Debug Output, CL_PRINT, CL_ENTER, CL_EXIT, CL_TRACE, CL_TRACE_EXIT +*********/ + +#if defined(_DEBUG_) + +/****d* Component Library: Debug Output/CL_PRINT +* NAME +* CL_PRINT +* +* DESCRIPTION +* The CL_PRINT macro sends a string to the current debug target if +* the requested debug level matches the current debug level. +* +* SYNOPSIS +* CL_PRINT( DBG_LVL, CHK_LVL, STRING ); +* +* PARAMETERS +* DBG_LVL +* [in] Debug level for the string to output +* +* CHK_LVL +* [in] Current debug level against which to check DBG_LVL +* +* STRING +* [in] String to send to the current debug target. The string includes +* parentheses in order to allow additional parameters. +* +* RETURN VALUE +* This macro does not return a value. +* +* EXAMPLE +* #define MY_FUNC_DBG_LVL 1 +* +* uint32_t my_dbg_lvl = CL_DBG_ALL; +* +* void +* my_func() +* { +* CL_PRINT( MY_FUNC_DBG_LVL, my_dbg_lvl, ("Hello %s!\n", "world") ); +* } +* +* RESULT +* Hello world! +* +* NOTES +* The requested string is printed only if all bits set in DBG_LVL are also +* set in CHK_LVL unless the most significant bit is set (indicating an +* error), in which case the lower bits are ignored. CHK_LVL may have +* additional bits set. +* +* In multi-processor environments where the current processor can be +* determined, the zero-based number of the processor on which the output +* is generated is prepended to the output. +* +* SEE ALSO +* Debug Output, Debug Levels, CL_ENTER, CL_EXIT, CL_TRACE, CL_TRACE_EXIT +*********/ +#define CL_PRINT( DBG_LVL, CHK_LVL, STRING ) \ + { \ + if( DBG_LVL & CL_DBG_ERROR ) \ + cl_dbg_out STRING; \ + else if( (DBG_LVL & CHK_LVL) == DBG_LVL ) \ + cl_dbg_out STRING; \ + } + +/****d* Component Library: Debug Output/CL_ENTER +* NAME +* CL_ENTER +* +* DESCRIPTION +* The CL_ENTER macro marks the entrance into a function by sending a +* string to the current debug target if the requested debug level matches +* the current debug level. +* +* SYNOPSIS +* CL_ENTER( DBG_LVL, CHK_LVL ); +* +* PARAMETERS +* DBG_LVL +* [in] Debug level for the string to output +* +* CHK_LVL +* [in] Current debug level against which to check DBG_LVL +* +* RETURN VALUE +* This macro does not return a value. +* +* EXAMPLE +* #define __MODULE__ "my_module" +* #define MY_FUNC_DBG_LVL 1 +* +* uint32_t my_dbg_lvl = CL_DBG_ALL; +* +* void +* my_func() +* { +* CL_ENTER( MY_FUNC_DBG_LVL, my_dbg_lvl ); +* CL_EXIT( MY_FUNC_DBG_LVL, my_dbg_lvl ); +* } +* +* RESULT +* my_module:my_func() [ +* my_module:my_func() ] +* +* NOTES +* The function entrance notification is printed only if all bits set +* in DBG_LVL are also set in CHK_LVL. CHK_LVL may have additional bits set. +* +* If the __MODULE__ preprocessor keyword is defined, that keyword will be +* prepended to the function name, separated with a colon. +* +* In multi-processor environments where the current processor can be +* determined, the zero-based number of the processor on which the output +* is generated is prepended to the output. +* +* SEE ALSO +* Debug Output, Debug Levels, CL_PRINT, CL_EXIT, CL_TRACE, CL_TRACE_EXIT +*********/ +#define CL_ENTER( DBG_LVL, CHK_LVL ) \ + CL_CHK_STK; \ + CL_PRINT( DBG_LVL, CHK_LVL, _CL_DBG_ENTER ); + +/****d* Component Library: Debug Output/CL_EXIT +* NAME +* CL_EXIT +* +* DESCRIPTION +* The CL_EXIT macro marks the exit from a function by sending a string +* to the current debug target if the requested debug level matches the +* current debug level. +* +* SYNOPSIS +* CL_EXIT( DBG_LVL, CHK_LVL ); +* +* PARAMETERS +* DBG_LVL +* [in] Debug level for the string to output +* +* CHK_LVL +* [in] Current debug level against which to check DBG_LVL +* +* RETURN VALUE +* This macro does not return a value. +* +* EXAMPLE +* #define __MODULE__ "my_module" +* #define MY_FUNC_DBG_LVL 1 +* +* uint32_t my_dbg_lvl = CL_DBG_ALL; +* +* void +* my_func() +* { +* CL_ENTER( MY_FUNC_DBG_LVL, my_dbg_lvl ); +* CL_EXIT( MY_FUNC_DBG_LVL, my_dbg_lvl ); +* } +* +* RESULT +* my_module:my_func() [ +* my_module:my_func() ] +* +* NOTES +* The exit notification is printed only if all bits set in DBG_LVL are also +* set in CHK_LVL. CHK_LVL may have additional bits set. +* +* The CL_EXIT macro must only be used after the CL_ENTRY macro as it +* depends on that macro's implementation. +* +* If the __MODULE__ preprocessor keyword is defined, that keyword will be +* prepended to the function name, separated with a colon. +* +* In multi-processor environments where the current processor can be +* determined, the zero-based number of the processor on which the output +* is generated is prepended to the output. +* +* SEE ALSO +* Debug Output, Debug Levels, CL_PRINT, CL_ENTER, CL_TRACE, CL_TRACE_EXIT +*********/ +#define CL_EXIT( DBG_LVL, CHK_LVL ) \ + CL_PRINT( DBG_LVL, CHK_LVL, _CL_DBG_EXIT ); + +/****d* Component Library: Debug Output/CL_TRACE +* NAME +* CL_TRACE +* +* DESCRIPTION +* The CL_TRACE macro sends a string to the current debug target if +* the requested debug level matches the current debug level. The +* output is prepended with the function name and, depending on the +* debug level requested, an indication of the severity of the message. +* +* SYNOPSIS +* CL_TRACE( DBG_LVL, CHK_LVL, STRING ); +* +* PARAMETERS +* DBG_LVL +* [in] Debug level for the string to output +* +* CHK_LVL +* [in] Current debug level against which to check DBG_LVL +* +* STRING +* [in] String to send to the current debug target. The string includes +* parentheses in order to allow additional parameters. +* +* RETURN VALUE +* This macro does not return a value. +* +* EXAMPLE +* #define __MODULE__ "my_module" +* #define MY_FUNC_DBG_LVL 1 +* +* uint32_t my_dbg_lvl = CL_DBG_ALL; +* +* void +* my_func() +* { +* CL_ENTER( MY_FUNC_DBG_LVL, my_dbg_lvl ); +* CL_TRACE( MY_FUNC_DBG_LVL, my_dbg_lvl, ("Hello %s!\n", "world") ); +* CL_EXIT( MY_FUNC_DBG_LVL, my_dbg_lvl ); +* } +* +* RESULT +* my_module:my_func() [ +* my_module:my_func(): Hello world! +* my_module:my_func() ] +* +* NOTES +* The requested string is printed only if all bits set in DBG_LVL are also +* set in CHK_LVL. CHK_LVL may have additional bits set. +* +* The CL_TRACE macro must only be used after the CL_ENTRY macro as it +* depends on that macro's implementation. +* +* If the DBG_LVL has the upper bit set, the output will contain +* an "!ERROR!" statement between the function name and STRING. +* +* If the __MODULE__ preprocessor keyword is defined, that keyword will be +* prepended to the function name, separated with a colon. +* +* In multi-processor environments where the current processor can be +* determined, the zero-based number of the processor on which the output +* is generated is prepended to the output. +* +* SEE ALSO +* Debug Output, Debug Levels, CL_PRINT, CL_ENTER, CL_EXIT, CL_TRACE_EXIT +*********/ +#define CL_TRACE( DBG_LVL, CHK_LVL, STRING ) \ +{ \ +switch( DBG_LVL & CL_DBG_ERROR ) \ +{ \ + case CL_DBG_ERROR: \ + CL_PRINT( DBG_LVL, CHK_LVL, _CL_DBG_ERROR ); \ + break; \ + default: \ + CL_PRINT( DBG_LVL, CHK_LVL, _CL_DBG_INFO ); \ + break; \ +} \ +CL_PRINT( DBG_LVL, CHK_LVL, STRING ); \ +} + +/****d* Component Library: Debug Output/CL_TRACE_EXIT +* NAME +* CL_TRACE_EXIT +* +* DESCRIPTION +* The CL_TRACE_EXIT macro combines the functionality of the CL_TRACE and +* CL_EXIT macros, in that order. +* +* SYNOPSIS +* CL_TRACE_EXIT( DBG_LVL, CHK_LVL, STRING ); +* +* PARAMETERS +* DBG_LVL +* [in] Debug level for the string to output +* +* CHK_LVL +* [in] Current debug level against which to check DBG_LVL +* +* STRING +* [in] String to send to the current debug target. The string includes +* parentheses in order to allow additional parameters. +* +* RETURN VALUE +* This macro does not return a value. +* +* EXAMPLE +* #define __MODULE__ "my_module" +* #define MY_FUNC_DBG_LVL 1 +* +* uint32_t my_dbg_lvl = CL_DBG_ALL; +* +* void +* my_func() +* { +* CL_ENTER( MY_FUNC_DBG_LVL, my_dbg_lvl ); +* CL_TRACE_EXIT( MY_FUNC_DBG_LVL, my_dbg_lvl, ("Hello %s!\n", "world") ); +* } +* +* RESULT +* my_module:my_func() [ +* my_module:my_func(): Hello world! +* my_module:my_func() ] +* +* NOTES +* The requested string is printed only if all bits set in DBG_LVL are also +* set in CHK_LVL. CHK_LVL may have additional bits set. +* +* The CL_TRACE_EXIT macro must only be used after the CL_ENTRY macro as it +* depends on that macro's implementation. +* +* If the DBG_LVL has the upper bit set, the output will contain +* an "!ERROR!" statement between the function name and STRING. +* +* If the __MODULE__ preprocessor keyword is defined, that keyword will be +* prepended to the function name, separated with a colon. +* +* In multi-processor environments where the current processor can be +* determined, the zero-based number of the processor on which the output +* is generated is prepended to the output. +* +* SEE ALSO +* Debug Output, Debug Levels, CL_PRINT, CL_ENTER, CL_EXIT, CL_TRACE +*********/ +#define CL_TRACE_EXIT( DBG_LVL, CHK_LVL, STRING ) \ + CL_TRACE( DBG_LVL, CHK_LVL, STRING ); \ + CL_EXIT( DBG_LVL, CHK_LVL ); + +#else /* defined(_DEBUG_) */ + +/* Define as NULL macros in a free build. */ +#define CL_PRINT( DBG_LVL, CHK_LVL, STRING ); +#define CL_ENTER( DBG_LVL, CHK_LVL ); +#define CL_EXIT( DBG_LVL, CHK_LVL ); +#define CL_TRACE( DBG_LVL, CHK_LVL, STRING ); +#define CL_TRACE_EXIT( DBG_LVL, CHK_LVL, STRING ); + +#endif /* defined(_DEBUG_) */ + +/****d* Component Library: Debug Output/64-bit Print Format +* NAME +* 64-bit Print Format +* +* DESCRIPTION +* The 64-bit print keywords allow users to use 64-bit values in debug or +* console output. +* +* Different platforms define 64-bit print formats differently. The 64-bit +* print formats exposed by the component library are supported in all +* platforms. +* +* VALUES +* PRId64 +* Print a 64-bit integer in signed decimal format. +* PRIx64 +* Print a 64-bit integer in hexadecimal format. +* PRIo64 +* Print a 64-bit integer in octal format. +* PRIu64 +* Print a 64-bit integer in unsigned decimal format. +* +* EXAMPLE +* uint64 MyVal = 2; +* // Print a 64-bit integer in hexadecimal format. +* cl_dbg_out( "MyVal: 0x%" PRIx64 "\n", MyVal ); +* +* NOTES +* Standard print flags to specify padding and precision can still be used +* following the '%' sign in the string preceding the 64-bit print keyword. +* +* The above keywords are strings and make use of compilers' string +* concatenation ability. +*********/ + +void complib_init(void) __attribute__ ((deprecated)); +cl_status_t complib_init_v2(void); + +void complib_exit(void); + +END_C_DECLS +#endif /* _CL_DEBUG_H_ */ diff --git a/include/complib/cl_debug_osd.h b/include/complib/cl_debug_osd.h new file mode 100644 index 0000000..31d6972 --- /dev/null +++ b/include/complib/cl_debug_osd.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Debug Macros. + */ + +#ifndef _CL_DEBUG_OSD_H_ +#define _CL_DEBUG_OSD_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#if !defined(__MODULE__) +#define __MODULE__ "" +#define __MOD_DELIMITER__ "" +#else /* !defined(__MODULE__) */ +#define __MOD_DELIMITER__ ":" +#endif /* !defined(__MODULE__) */ +/* + * Define specifiers for print functions based on the platform + */ +#ifdef __IA64__ +#define PRIdSIZE_T "ld" +#else +#define PRIdSIZE_T "d" +#endif +#include +#include +#define cl_msg_out printf +#if defined( _DEBUG_ ) +#define cl_dbg_out printf +#else +#define cl_dbg_out foo +#endif /* _DEBUG_ */ +/* + * The following macros are used internally by the CL_ENTER, CL_TRACE, + * CL_TRACE_EXIT, and CL_EXIT macros. + */ +#define _CL_DBG_ENTER \ + ("%s%s%s() [\n", __MODULE__, __MOD_DELIMITER__, __func__) +#define _CL_DBG_EXIT \ + ("%s%s%s() ]\n", __MODULE__, __MOD_DELIMITER__, __func__) +#define _CL_DBG_INFO \ + ("%s%s%s(): ", __MODULE__, __MOD_DELIMITER__, __func__) +#define _CL_DBG_ERROR \ + ("%s%s%s() !ERROR!: ", __MODULE__, __MOD_DELIMITER__, __func__) +#define CL_CHK_STK +END_C_DECLS +#endif /* _CL_DEBUG_OSD_H_ */ diff --git a/include/complib/cl_dispatcher.h b/include/complib/cl_dispatcher.h new file mode 100644 index 0000000..25c811a --- /dev/null +++ b/include/complib/cl_dispatcher.h @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of dispatcher abstraction. + */ + +#ifndef _CL_DISPATCHER_H_ +#define _CL_DISPATCHER_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Dispatcher +* NAME +* Dispatcher +* +* DESCRIPTION +* The Dispatcher provides a facility for message routing to +* asynchronous worker threads. +* +* The Dispatcher functions operate on a cl_dispatcher_t structure +* which should be treated as opaque and should be manipulated +* only through the provided functions. +* +* SEE ALSO +* Structures: +* cl_dispatcher_t +* +* Initialization/Destruction: +* cl_disp_construct, cl_disp_init, cl_disp_shutdown, cl_disp_destroy +* +* Manipulation: +* cl_disp_post, cl_disp_register, cl_disp_unregister +*********/ +/****s* Component Library: Dispatcher/cl_disp_msgid_t +* NAME +* cl_disp_msgid_t +* +* DESCRIPTION +* Defines the type of dispatcher messages. +* +* SYNOPSIS +*/ +typedef uint32_t cl_disp_msgid_t; +/**********/ + +/****s* Component Library: Dispatcher/CL_DISP_MSGID_NONE +* NAME +* CL_DISP_MSGID_NONE +* +* DESCRIPTION +* Defines a message value that means "no message". +* This value is used during registration by Dispatcher clients +* that do not wish to receive messages. +* +* No Dispatcher message is allowed to have this value. +* +* SYNOPSIS +*/ +#define CL_DISP_MSGID_NONE 0xFFFFFFFF +/**********/ + +/****s* Component Library: Dispatcher/CL_DISP_INVALID_HANDLE +* NAME +* CL_DISP_INVALID_HANDLE +* +* DESCRIPTION +* Defines the value of an invalid Dispatcher registration handle. +* +* SYNOPSIS +*/ +#define CL_DISP_INVALID_HANDLE ((cl_disp_reg_handle_t)0) +/*********/ + +/****f* Component Library: Dispatcher/cl_pfn_msgrcv_cb_t +* NAME +* cl_pfn_msgrcv_cb_t +* +* DESCRIPTION +* This typedef defines the prototype for client functions invoked +* by the Dispatcher. The Dispatcher calls the corresponding +* client function when delivering a message to the client. +* +* The client function must be reentrant if the user creates a +* Dispatcher with more than one worker thread. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_msgrcv_cb_t) (IN void *context, IN void *p_data); +/* +* PARAMETERS +* context +* [in] Client specific context specified in a call to +* cl_disp_register +* +* p_data +* [in] Pointer to the client specific data payload +* of this message. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This typedef provides a function prototype reference for +* the function provided by Dispatcher clients as a parameter +* to the cl_disp_register function. +* +* SEE ALSO +* Dispatcher, cl_disp_register +*********/ + +/****f* Component Library: Dispatcher/cl_pfn_msgdone_cb_t +* NAME +* cl_pfn_msgdone_cb_t +* +* DESCRIPTION +* This typedef defines the prototype for client functions invoked +* by the Dispatcher. The Dispatcher calls the corresponding +* client function after completing delivery of a message. +* +* The client function must be reentrant if the user creates a +* Dispatcher with more than one worker thread. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_msgdone_cb_t) (IN void *context, IN void *p_data); +/* +* PARAMETERS +* context +* [in] Client specific context specified in a call to +* cl_disp_post +* +* p_data +* [in] Pointer to the client specific data payload +* of this message. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This typedef provides a function prototype reference for +* the function provided by Dispatcher clients as a parameter +* to the cl_disp_post function. +* +* SEE ALSO +* Dispatcher, cl_disp_post +*********/ + +/****s* Component Library: Dispatcher/cl_dispatcher_t +* NAME +* cl_dispatcher_t +* +* DESCRIPTION +* Dispatcher structure. +* +* The Dispatcher is thread safe. +* +* The cl_dispatcher_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_dispatcher { + cl_spinlock_t lock; + cl_ptr_vector_t reg_vec; + cl_qlist_t reg_list; + cl_thread_pool_t worker_threads; + cl_qlist_t msg_fifo; + cl_qpool_t msg_pool; + uint64_t last_msg_queue_time_us; +} cl_dispatcher_t; +/* +* FIELDS +* lock +* Spinlock to guard internal structures. +* +* reg_vec +* Vector of registration info objects. Indexed by message msg_id. +* +* reg_list +* List of registration info objects. +* +* worker_threads +* Thread pool of worker threads to dispose of posted messages. +* +* msg_fifo +* FIFO of messages being processed by the Dispatcher. New +* messages are posted to the tail of the FIFO. Worker threads +* pull messages from the front. +* +* msg_pool +* Pool of message objects to be processed through the FIFO. +* +* last_msg_queue_time_us +* The time that the last message spent in the Q in usec +* +* SEE ALSO +* Dispatcher +*********/ + +/****s* Component Library: Dispatcher/cl_disp_reg_info_t +* NAME +* cl_disp_reg_info_t +* +* DESCRIPTION +* Defines the dispatcher registration object structure. +* +* The cl_disp_reg_info_t structure is for internal use by the +* Dispatcher only. +* +* SYNOPSIS +*/ +typedef struct _cl_disp_reg_info { + cl_list_item_t list_item; + cl_pfn_msgrcv_cb_t pfn_rcv_callback; + const void *context; + atomic32_t ref_cnt; + cl_disp_msgid_t msg_id; + cl_dispatcher_t *p_disp; +} cl_disp_reg_info_t; +/* +* FIELDS +* list_item +* List linkage. Must be first element in the structure!! +* +* pfn_rcv_callback +* Client's message receive callback. +* +* context +* Client's context for message receive callback. +* +* ref_cnt +* Reference count. +* +* msg_id +* Dispatcher message msg_id value for this registration object. +* +* p_disp +* Pointer to parent Dispatcher. +* +* SEE ALSO +*********/ + +/****s* Component Library: Dispatcher/cl_disp_msg_t +* NAME +* cl_disp_msg_t +* +* DESCRIPTION +* Defines the dispatcher message structure. +* +* The cl_disp_msg_t structure is for internal use by the +* Dispatcher only. +* +* SYNOPSIS +*/ +typedef struct _cl_disp_msg { + cl_pool_item_t item; + const void *p_data; + cl_disp_reg_info_t *p_src_reg; + cl_disp_reg_info_t *p_dest_reg; + cl_pfn_msgdone_cb_t pfn_xmt_callback; + uint64_t in_time; + const void *context; +} cl_disp_msg_t; +/* +* FIELDS +* item +* List & Pool linkage. Must be first element in the structure!! +* +* p_data +* Pointer to the data payload for this message. The payload +* is opaque to the Dispatcher. +* +* p_src_reg +* Pointer to the registration info of the sender. +* +* p_dest_reg +* Pointer to the registration info of the recipient. +* +* pfn_xmt_callback +* Client's message done callback. +* +* in_time +* The absolute time the message was inserted into the queue +* +* context +* Client's message done callback context. +* +* SEE ALSO +*********/ + +/****s* Component Library: Dispatcher/cl_disp_reg_info_t +* NAME +* cl_disp_reg_info_t +* +* DESCRIPTION +* Defines the Dispatcher registration handle. This handle +* should be treated as opaque by the client. +* +* SYNOPSIS +*/ +typedef const struct _cl_disp_reg_info *cl_disp_reg_handle_t; +/**********/ + +/****f* Component Library: Dispatcher/cl_disp_construct +* NAME +* cl_disp_construct +* +* DESCRIPTION +* This function constructs a Dispatcher object. +* +* SYNOPSIS +*/ +void cl_disp_construct(IN cl_dispatcher_t * const p_disp); +/* +* PARAMETERS +* p_disp +* [in] Pointer to a Dispatcher. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_disp_init and cl_disp_destroy. +* +* SEE ALSO +* Dispatcher, cl_disp_init, cl_disp_destroy +*********/ + +/****f* Component Library: Dispatcher/cl_disp_init +* NAME +* cl_disp_init +* +* DESCRIPTION +* This function initializes a Dispatcher object. +* +* SYNOPSIS +*/ +cl_status_t +cl_disp_init(IN cl_dispatcher_t * const p_disp, + IN const uint32_t thread_count, IN const char *const name); +/* +* PARAMETERS +* p_disp +* [in] Pointer to a Dispatcher. +* +* thread_count +* [in] The number of worker threads to create in this Dispatcher. +* A value of 0 causes the Dispatcher to create one worker thread +* per CPU in the system. When the Dispatcher is created with +* only one thread, the Dispatcher guarantees to deliver posted +* messages in order. When the Dispatcher is created with more +* than one thread, messages may be delivered out of order. +* +* name +* [in] Name to associate with the threads. The name may be up to 16 +* characters, including a terminating null character. All threads +* created in the Dispatcher have the same name. +* +* RETURN VALUE +* CL_SUCCESS if the operation is successful. +* +* SEE ALSO +* Dispatcher, cl_disp_destroy, cl_disp_register, cl_disp_unregister, +* cl_disp_post +*********/ + +/****f* Component Library: Dispatcher/cl_disp_shutdown +* NAME +* cl_disp_shutdown +* +* DESCRIPTION +* This function shutdown a Dispatcher object. So it unreg all messages and +* clears the fifo and waits for the threads to exit +* +* SYNOPSIS +*/ +void cl_disp_shutdown(IN cl_dispatcher_t * const p_disp); +/* +* PARAMETERS +* p_disp +* [in] Pointer to a Dispatcher. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function does not returns until all worker threads +* have exited client callback functions and been successfully +* shutdowned. +* +* SEE ALSO +* Dispatcher, cl_disp_construct, cl_disp_init +*********/ + +/****f* Component Library: Dispatcher/cl_disp_destroy +* NAME +* cl_disp_destroy +* +* DESCRIPTION +* This function destroys a Dispatcher object. +* +* SYNOPSIS +*/ +void cl_disp_destroy(IN cl_dispatcher_t * const p_disp); +/* +* PARAMETERS +* p_disp +* [in] Pointer to a Dispatcher. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Dispatcher, cl_disp_construct, cl_disp_init +*********/ + +/****f* Component Library: Dispatcher/cl_disp_register +* NAME +* cl_disp_register +* +* DESCRIPTION +* This function registers a client with a Dispatcher object. +* +* SYNOPSIS +*/ +cl_disp_reg_handle_t +cl_disp_register(IN cl_dispatcher_t * const p_disp, + IN const cl_disp_msgid_t msg_id, + IN cl_pfn_msgrcv_cb_t pfn_callback OPTIONAL, + IN const void *const context); +/* +* PARAMETERS +* p_disp +* [in] Pointer to a Dispatcher. +* +* msg_id +* [in] Numberic message ID for which the client is registering. +* If the client does not wish to receive any messages, +* (a send-only client) then the caller should set this value +* to CL_DISP_MSGID_NONE. For efficiency, numeric message msg_id +* values should start with 0 and should be contiguous, or nearly so. +* +* pfn_callback +* [in] Message receive callback. The Dispatcher calls this +* function after receiving a posted message with the +* appropriate message msg_id value. Send-only clients may specify +* NULL for this value. +* +* context +* [in] Client context value passed to the cl_pfn_msgrcv_cb_t +* function. +* +* RETURN VALUE +* On success a Dispatcher registration handle. +* CL_CL_DISP_INVALID_HANDLE otherwise. +* +* SEE ALSO +* Dispatcher, cl_disp_unregister, cl_disp_post +*********/ + +/****f* Component Library: Dispatcher/cl_disp_unregister +* NAME +* cl_disp_unregister +* +* DESCRIPTION +* This function unregisters a client from a Dispatcher. +* +* SYNOPSIS +*/ +void cl_disp_unregister(IN const cl_disp_reg_handle_t handle); +/* +* PARAMETERS +* handle +* [in] cl_disp_reg_handle_t value return by cl_disp_register. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function will not return until worker threads have exited +* the callback functions for this client. Do not invoke this +* function from a callback. +* +* SEE ALSO +* Dispatcher, cl_disp_register +*********/ + +/****f* Component Library: Dispatcher/cl_disp_post +* NAME +* cl_disp_post +* +* DESCRIPTION +* This function posts a message to a Dispatcher object. +* +* SYNOPSIS +*/ +cl_status_t +cl_disp_post(IN const cl_disp_reg_handle_t handle, + IN const cl_disp_msgid_t msg_id, + IN const void *const p_data, + IN cl_pfn_msgdone_cb_t pfn_callback OPTIONAL, + IN const void *const context); +/* +* PARAMETERS +* handle +* [in] cl_disp_reg_handle_t value return by cl_disp_register. +* +* msg_id +* [in] Numeric message msg_id value associated with this message. +* +* p_data +* [in] Data payload for this message. +* +* pfn_callback +* [in] Pointer to a cl_pfn_msgdone_cb_t function. +* The Dispatcher calls this function after the message has been +* processed by the recipient. +* The caller may pass NULL for this value, which indicates no +* message done callback is necessary. +* +* context +* [in] Client context value passed to the cl_pfn_msgdone_cb_t +* function. +* +* RETURN VALUE +* CL_SUCCESS if the message was successfully queued in the Dispatcher. +* +* NOTES +* The caller must not modify the memory pointed to by p_data until +* the Dispatcher call the pfn_callback function. +* +* SEE ALSO +* Dispatcher +*********/ + +/****f* Component Library: Dispatcher/cl_disp_get_queue_status +* NAME +* cl_disp_get_queue_status +* +* DESCRIPTION +* This function gets queue status of a Dispatcher object. +* +* SYNOPSIS +*/ +void +cl_disp_get_queue_status(IN const cl_disp_reg_handle_t handle, + OUT uint32_t * p_num_queued_msgs, + OUT uint64_t * p_last_msg_queue_time_ms); +/* +* PARAMETERS +* handle +* [in] cl_disp_reg_handle_t value return by cl_disp_register. +* +* p_num_queued_msgs +* [out] number of messages in the queue +* +* p_last_msg_queue_time_ms +* [out] pointer to a variable to hold the time the last popped up message +* spent in the queue +* +* RETURN VALUE +* Thr time the last popped up message stayed in the queue, in msec +* +* NOTES +* Extarnel Locking is not required. +* +* SEE ALSO +* Dispatcher +*********/ + +END_C_DECLS +#endif /* !defined(_CL_DISPATCHER_H_) */ diff --git a/include/complib/cl_event.h b/include/complib/cl_event.h new file mode 100644 index 0000000..10805fb --- /dev/null +++ b/include/complib/cl_event.h @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of event abstraction. + */ + +#ifndef _CL_EVENT_H_ +#define _CL_EVENT_H_ + +/* Indicates that waiting on an event should never timeout */ +#define EVENT_NO_TIMEOUT 0xFFFFFFFF + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Event +* NAME +* Event +* +* DESCRIPTION +* The Event provides the ability to suspend and wakeup a thread. +* +* The event functions operates on a cl_event_t structure which should be +* treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_event_t +* +* Initialization/Destruction: +* cl_event_construct, cl_event_init, cl_event_destroy +* +* Manipulation: +* cl_event_signal, cl_event_reset, cl_event_wait_on +*********/ +/****f* Component Library: Event/cl_event_construct +* NAME +* cl_event_construct +* +* DESCRIPTION +* The cl_event_construct function constructs an event. +* +* SYNOPSIS +*/ +void cl_event_construct(IN cl_event_t * const p_event); +/* +* PARAMETERS +* p_event +* [in] Pointer to an cl_event_t structure to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_event_destroy without first calling cl_event_init. +* +* Calling cl_event_construct is a prerequisite to calling any other event +* function except cl_event_init. +* +* SEE ALSO +* Event, cl_event_init, cl_event_destroy +*********/ + +/****f* Component Library: Event/cl_event_init +* NAME +* cl_event_init +* +* DESCRIPTION +* The cl_event_init function initializes an event for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_event_init(IN cl_event_t * const p_event, IN const boolean_t manual_reset); +/* +* PARAMETERS +* p_event +* [in] Pointer to an cl_event_t structure to initialize. +* +* manual_reset +* [in] If FALSE, indicates that the event resets itself after releasing +* a single waiter. If TRUE, the event remains in the signalled state +* until explicitly reset by a call to cl_event_reset. +* +* RETURN VALUES +* CL_SUCCESS if event initialization succeeded. +* +* CL_ERROR otherwise. +* +* NOTES +* Allows calling event manipulation functions, such as cl_event_signal, +* cl_event_reset, and cl_event_wait_on. +* +* The event is initially in a reset state. +* +* SEE ALSO +* Event, cl_event_construct, cl_event_destroy, cl_event_signal, +* cl_event_reset, cl_event_wait_on +*********/ + +/****f* Component Library: Event/cl_event_destroy +* NAME +* cl_event_destroy +* +* DESCRIPTION +* The cl_event_destroy function performs any necessary cleanup of an event. +* +* SYNOPSIS +*/ +void cl_event_destroy(IN cl_event_t * const p_event); + +/* +* PARAMETERS +* p_event +* [in] Pointer to an cl_event_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function should only be called after a call to cl_event_construct +* or cl_event_init. +* +* SEE ALSO +* Event, cl_event_construct, cl_event_init +*********/ + +/****f* Component Library: Event/cl_event_signal +* NAME +* cl_event_signal +* +* DESCRIPTION +* The cl_event_signal function sets an event to the signalled state and +* releases at most one waiting thread. +* +* SYNOPSIS +*/ +cl_status_t cl_event_signal(IN cl_event_t * const p_event); +/* +* PARAMETERS +* p_event +* [in] Pointer to an cl_event_t structure to set. +* +* RETURN VALUES +* CL_SUCCESS if the event was successfully signalled. +* +* CL_ERROR otherwise. +* +* NOTES +* For auto-reset events, the event is reset automatically once a wait +* operation is satisfied. +* +* Triggering the event multiple times does not guarantee that the same +* number of wait operations are satisfied. This is because events are +* either in a signalled on non-signalled state, and triggering an event +* that is already in the signalled state has no effect. +* +* SEE ALSO +* Event, cl_event_reset, cl_event_wait_on +*********/ + +/****f* Component Library: Event/cl_event_reset +* NAME +* cl_event_reset +* +* DESCRIPTION +* The cl_event_reset function sets an event to the non-signalled state. +* +* SYNOPSIS +*/ +cl_status_t cl_event_reset(IN cl_event_t * const p_event); +/* +* PARAMETERS +* p_event +* [in] Pointer to an cl_event_t structure to reset. +* +* RETURN VALUES +* CL_SUCCESS if the event was successfully reset. +* +* CL_ERROR otherwise. +* +* SEE ALSO +* Event, cl_event_signal, cl_event_wait_on +*********/ + +/****f* Component Library: Event/cl_event_wait_on +* NAME +* cl_event_wait_on +* +* DESCRIPTION +* The cl_event_wait_on function waits for the specified event to be +* triggered for a minimum amount of time. +* +* SYNOPSIS +*/ +cl_status_t +cl_event_wait_on(IN cl_event_t * const p_event, + IN const uint32_t wait_us, IN const boolean_t interruptible); +/* +* PARAMETERS +* p_event +* [in] Pointer to an cl_event_t structure on which to wait. +* +* wait_us +* [in] Number of microseconds to wait. +* +* interruptible +* [in] Indicates whether the wait operation can be interrupted +* by external signals. +* +* RETURN VALUES +* CL_SUCCESS if the wait operation succeeded in response to the event +* being set. +* +* CL_TIMEOUT if the specified time period elapses. +* +* CL_NOT_DONE if the wait was interrupted by an external signal. +* +* CL_ERROR if the wait operation failed. +* +* NOTES +* If wait_us is set to EVENT_NO_TIMEOUT, the function will wait until the +* event is triggered and never timeout. +* +* If the timeout value is zero, this function simply tests the state of +* the event. +* +* If the event is already on the signalled state at the time of the call +* to cl_event_wait_on, the call completes immediately with CL_SUCCESS. +* +* SEE ALSO +* Event, cl_event_signal, cl_event_reset +*********/ + +END_C_DECLS +#endif /* _CL_EVENT_H_ */ diff --git a/include/complib/cl_event_osd.h b/include/complib/cl_event_osd.h new file mode 100644 index 0000000..541ced0 --- /dev/null +++ b/include/complib/cl_event_osd.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of event object. + */ + +#ifndef _CL_EVENT_OSD_H_ +#define _CL_EVENT_OSD_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#include /* usr/include */ +/* + * Linux user mode specific data structure for the event object. + * Users should not access these variables directly. + */ +typedef struct _cl_event_t { + pthread_cond_t condvar; + pthread_mutex_t mutex; + boolean_t signaled; + boolean_t manual_reset; + cl_state_t state; +} cl_event_t; + +END_C_DECLS +#endif /* _CL_EVENT_OSD_H_ */ diff --git a/include/complib/cl_event_wheel.h b/include/complib/cl_event_wheel.h new file mode 100644 index 0000000..20fec0f --- /dev/null +++ b/include/complib/cl_event_wheel.h @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of event wheel abstraction. + */ + +#ifndef _CL_EVENT_WHEEL_H_ +#define _CL_EVENT_WHEEL_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Event_Wheel +* NAME +* Event_Wheel +* +* DESCRIPTION +* The Event_Wheel provides a facility for registering delayed events +* and getting called once they timeout. +* +* The Event_Wheel functions operate on a cl_event_wheel_t structure +* which should be treated as opaque and should be manipulated +* only through the provided functions. +* +* SEE ALSO +* Structures: +* cl_event_wheel_t +* +* Initialization/Destruction: +* cl_event_wheel_construct, cl_event_wheel_init, cl_event_wheel_destroy +* +* Manipulation: +* cl_event_wheel_reg, cl_event_wheel_unreg +* +*********/ +/****f* Component Library: Event_Wheel/cl_pfn_event_aged_cb_t +* NAME +* cl_pfn_event_aged_cb_t +* +* DESCRIPTION +* This typedef defines the prototype for client functions invoked +* by the Event_Wheel. The Event_Wheel calls the corresponding +* client function when the specific item has aged. +* +* SYNOPSIS +*/ +typedef uint64_t + (*cl_pfn_event_aged_cb_t) (IN uint64_t key, + IN uint32_t num_regs, IN void *context); +/* +* PARAMETERS +* key +* [in] The key used for registering the item in the call to +* cl_event_wheel_reg. +* +* num_regs +* [in] The number of times this event was registered (pushed in time). +* +* context +* [in] Client specific context specified in a call to +* cl_event_wheel_reg +* +* RETURN VALUE +* This function returns the abosolute time the event should fire in [usec]. +* If lower then current time means the event should be unregistered +* immediatly. +* +* NOTES +* This typedef provides a function prototype reference for +* the function provided by Event_Wheel clients as a parameter +* to the cl_event_wheel_reg function. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_reg +*********/ + +/****s* Component Library: Event_Wheel/cl_event_wheel_t +* NAME +* cl_event_wheel_t +* +* DESCRIPTION +* Event_Wheel structure. +* +* The Event_Wheel is thread safe. +* +* The cl_event_wheel_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_event_wheel { + cl_spinlock_t lock; + cl_spinlock_t *p_external_lock; + cl_qmap_t events_map; + boolean_t closing; + cl_qlist_t events_wheel; + cl_timer_t timer; +} cl_event_wheel_t; +/* +* FIELDS +* lock +* Spinlock to guard internal structures. +* +* p_external_lock +* Reference to external spinlock to guard internal structures +* if the event wheel is part of a larger object protected by its own lock +* +* events_map +* A Map holding all registered event items by their key. +* +* closing +* A flag indicating the event wheel is closing. This means that +* callbacks that are called when closing == TRUE should just be ignored. +* +* events_wheel +* A list of the events sorted by expiration time. +* +* timer +* The timer scheduling event time propagation. +* +* SEE ALSO +* Event_Wheel +*********/ + +/****s* Component Library: Event_Wheel/cl_event_wheel_reg_info_t +* NAME +* cl_event_wheel_reg_info_t +* +* DESCRIPTION +* Defines the event_wheel registration object structure. +* +* The cl_event_wheel_reg_info_t structure is for internal use by the +* Event_Wheel only. +* +* SYNOPSIS +*/ +typedef struct _cl_event_wheel_reg_info { + cl_map_item_t map_item; + cl_list_item_t list_item; + uint64_t key; + cl_pfn_event_aged_cb_t pfn_aged_callback; + uint64_t aging_time; + uint32_t num_regs; + void *context; +} cl_event_wheel_reg_info_t; +/* +* FIELDS +* map_item +* The map item of this event +* +* list_item +* The sorted by aging time list item +* +* key +* The key by which one can find the event +* +* pfn_aged_callback +* The clients Event-Aged callback +* +* aging_time +* The delta time [msec] for which the event should age. +* +* num_regs +* The number of times the same event (key) was registered +* +* context +* Client's context for event-aged callback. +* +* SEE ALSO +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_construct +* NAME +* cl_event_wheel_construct +* +* DESCRIPTION +* This function constructs an Event_Wheel object. +* +* SYNOPSIS +*/ +void cl_event_wheel_construct(IN cl_event_wheel_t * const p_event_wheel); +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_event_wheel_init and cl_event_wheel_destroy. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_init, cl_event_wheel_destroy +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_init +* NAME +* cl_event_wheel_init +* +* DESCRIPTION +* This function initializes an Event_Wheel object. +* +* SYNOPSIS +*/ +cl_status_t +cl_event_wheel_init(IN cl_event_wheel_t * const p_event_wheel); + +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* RETURN VALUE +* CL_SUCCESS if the operation is successful. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_destroy, cl_event_wheel_reg, cl_event_wheel_unreg +* +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_init_ex +* NAME +* cl_event_wheel_init_ex +* +* DESCRIPTION +* This function initializes an Event_Wheel object with an external spinlock +* +* SYNOPSIS +*/ +cl_status_t +cl_event_wheel_init_ex(IN cl_event_wheel_t * const p_event_wheel, + IN cl_spinlock_t * p_external_lock); + +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* p_external_lock +* [in] Reference to external spinlock to guard internal structures +* if the event wheel is part of a larger object protected by its own lock +* +* RETURN VALUE +* CL_SUCCESS if the operation is successful. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_destroy, cl_event_wheel_reg, cl_event_wheel_unreg +* +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_destroy +* NAME +* cl_event_wheel_destroy +* +* DESCRIPTION +* This function destroys an Event_Wheel object. +* +* SYNOPSIS +*/ +void cl_event_wheel_destroy(IN cl_event_wheel_t * const p_event_wheel); +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function does not returns until all client callback functions +* been successfully finished. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_construct, cl_event_wheel_init +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_dump +* NAME +* cl_event_wheel_dump +* +* DESCRIPTION +* This function dumps the details of an Event_Whell object. +* +* SYNOPSIS +*/ +void cl_event_wheel_dump(IN cl_event_wheel_t * const p_event_wheel); +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Note that this function should be called inside a lock of the event wheel! +* It doesn't aquire the lock by itself. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_construct, cl_event_wheel_init +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_reg +* NAME +* cl_event_wheel_reg +* +* DESCRIPTION +* This function registers a client with an Event_Wheel object. +* +* SYNOPSIS +*/ +cl_status_t +cl_event_wheel_reg(IN cl_event_wheel_t * const p_event_wheel, + IN const uint64_t key, + IN const uint64_t aging_time_usec, + IN cl_pfn_event_aged_cb_t pfn_callback, + IN void *const context); +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* key +* [in] The specifc Key by which events are registered. +* +* aging_time_usec +* [in] The absolute time this event should age in usec +* +* pfn_callback +* [in] Event Aging callback. The Event_Wheel calls this +* function after the time the event has registed for has come. +* +* context +* [in] Client context value passed to the cl_pfn_event_aged_cb_t +* function. +* +* RETURN VALUE +* On success an Event_Wheel CL_SUCCESS or CL_ERROR otherwise. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_unreg +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_unreg +* NAME +* cl_event_wheel_unreg +* +* DESCRIPTION +* This function unregisters a client event from an Event_Wheel. +* +* SYNOPSIS +*/ +void +cl_event_wheel_unreg(IN cl_event_wheel_t * const p_event_wheel, + IN uint64_t key); +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* key +* [in] The key used for registering the event +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* After the event has aged it is automatically removed from +* the event wheel. So it should only be invoked when the need arises +* to remove existing events before they age. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_reg +*********/ + +/****f* Component Library: Event_Wheel/cl_event_wheel_num_regs +* NAME +* cl_event_wheel_num_regs +* +* DESCRIPTION +* This function returns the number of times an event was registered. +* +* SYNOPSIS +*/ +uint32_t +cl_event_wheel_num_regs(IN cl_event_wheel_t * const p_event_wheel, + IN uint64_t key); +/* +* PARAMETERS +* p_event_wheel +* [in] Pointer to an Event_Wheel. +* +* key +* [in] The key used for registering the event +* +* RETURN VALUE +* The number of times the event was registered. +* 0 if never registered or eventually aged. +* +* SEE ALSO +* Event_Wheel, cl_event_wheel_reg, cl_event_wheel_unreg +*********/ + +END_C_DECLS +#endif /* !defined(_CL_EVENT_WHEEL_H_) */ diff --git a/include/complib/cl_fleximap.h b/include/complib/cl_fleximap.h new file mode 100644 index 0000000..dfe4745 --- /dev/null +++ b/include/complib/cl_fleximap.h @@ -0,0 +1,943 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of flexi map, a binary tree where the caller always provides + * all necessary storage. + */ + +#ifndef _CL_FLEXIMAP_H_ +#define _CL_FLEXIMAP_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Flexi Map +* NAME +* Flexi Map +* +* DESCRIPTION +* Flexi map implements a binary tree that stores user provided cl_fmap_item_t +* structures. Each item stored in a flexi map has a unique user defined +* key (duplicates are not allowed). Flexi map provides the ability to +* efficiently search for an item given a key. Flexi map allows user +* defined keys of any size. Storage for keys and a comparison function +* are provided by users to allow flexi map to store items with arbitrary +* key values. +* +* Flexi map does not allocate any memory, and can therefore not fail +* any operations due to insufficient memory. Flexi map can thus be useful +* in minimizing the error paths in code. +* +* Flexi map is not thread safe, and users must provide serialization when +* adding and removing items from the map. +* +* The flexi map functions operate on a cl_fmap_t structure which should +* be treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_fmap_t, cl_fmap_item_t +* +* Callbacks: +* cl_pfn_fmap_apply_t +* +* Item Manipulation: +* cl_fmap_key +* +* Initialization: +* cl_fmap_init +* +* Iteration: +* cl_fmap_end, cl_fmap_head, cl_fmap_tail, cl_fmap_next, cl_fmap_prev +* +* Manipulation: +* cl_fmap_insert, cl_fmap_get, cl_fmap_remove_item, cl_fmap_remove, +* cl_fmap_remove_all, cl_fmap_merge, cl_fmap_delta, cl_fmap_get_next +* +* Search: +* cl_fmap_apply_func +* +* Attributes: +* cl_fmap_count, cl_is_fmap_empty, +*********/ +/****s* Component Library: Flexi Map/cl_fmap_item_t +* NAME +* cl_fmap_item_t +* +* DESCRIPTION +* The cl_fmap_item_t structure is used by maps to store objects. +* +* The cl_fmap_item_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_fmap_item { + /* Must be first to allow casting. */ + cl_pool_item_t pool_item; + struct _cl_fmap_item *p_left; + struct _cl_fmap_item *p_right; + struct _cl_fmap_item *p_up; + cl_map_color_t color; + const void *p_key; +#ifdef _DEBUG_ + struct _cl_fmap *p_map; +#endif +} cl_fmap_item_t; +/* +* FIELDS +* pool_item +* Used to store the item in a doubly linked list, allowing more +* efficient map traversal. +* +* p_left +* Pointer to the map item that is a child to the left of the node. +* +* p_right +* Pointer to the map item that is a child to the right of the node. +* +* p_up +* Pointer to the map item that is the parent of the node. +* +* color +* Indicates whether a node is red or black in the map. +* +* p_key +* Pointer to the value that uniquely represents a node in a map. This +* pointer is set by calling cl_fmap_insert and can be retrieved by +* calling cl_fmap_key. +* +* NOTES +* None of the fields of this structure should be manipulated by users, as +* they are crititcal to the proper operation of the map in which they +* are stored. +* +* To allow storing items in either a quick list, a quick pool, or a flexi +* map, the map implementation guarantees that the map item can be safely +* cast to a pool item used for storing an object in a quick pool, or cast +* to a list item used for storing an object in a quick list. This removes +* the need to embed a flexi map item, a list item, and a pool item in +* objects that need to be stored in a quick list, a quick pool, and a +* flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_insert, cl_fmap_key, cl_pool_item_t, cl_list_item_t +*********/ + +/****d* Component Library: Flexi Map/cl_pfn_fmap_cmp_t +* NAME +* cl_pfn_fmap_cmp_t +* +* DESCRIPTION +* The cl_pfn_fmap_cmp_t function type defines the prototype for functions +* used to compare item keys in a flexi map. +* +* SYNOPSIS +*/ +typedef int + (*cl_pfn_fmap_cmp_t) (IN const void *const p_key1, + IN const void *const p_key2); +/* +* PARAMETERS +* p_key1 +* [in] Pointer to the first of two keys to compare. +* +* p_key2 +* [in] Pointer to the second of two keys to compare. +* +* RETURN VALUE +* Returns 0 if the keys match. +* Returns less than 0 if *p_key1 is less than *p_key2. +* Returns greater than 0 if *p_key1 is greater than *p_key2. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_fmap_init function. +* +* SEE ALSO +* Flexi Map, cl_fmap_init +*********/ + +/****s* Component Library: Flexi Map/cl_fmap_t +* NAME +* cl_fmap_t +* +* DESCRIPTION +* Flexi map structure. +* +* The cl_fmap_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_fmap { + cl_fmap_item_t root; + cl_fmap_item_t nil; + cl_state_t state; + size_t count; + cl_pfn_fmap_cmp_t pfn_compare; +} cl_fmap_t; +/* +* PARAMETERS +* root +* Map item that serves as root of the map. The root is set up to +* always have itself as parent. The left pointer is set to point +* to the item at the root. +* +* nil +* Map item that serves as terminator for all leaves, as well as +* providing the list item used as quick list for storing map items +* in a list for faster traversal. +* +* state +* State of the map, used to verify that operations are permitted. +* +* count +* Number of items in the map. +* +* pfn_compare +* Pointer to a compare function to invoke to compare the keys of +* items in the map. +* +* SEE ALSO +* Flexi Map, cl_pfn_fmap_cmp_t +*********/ + +/****d* Component Library: Flexi Map/cl_pfn_fmap_apply_t +* NAME +* cl_pfn_fmap_apply_t +* +* DESCRIPTION +* The cl_pfn_fmap_apply_t function type defines the prototype for +* functions used to iterate items in a flexi map. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_fmap_apply_t) (IN cl_fmap_item_t * const p_map_item, + IN void *context); +/* +* PARAMETERS +* p_map_item +* [in] Pointer to a cl_fmap_item_t structure. +* +* context +* [in] Value passed to the callback function. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_fmap_apply_func +* function. +* +* SEE ALSO +* Flexi Map, cl_fmap_apply_func +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_count +* NAME +* cl_fmap_count +* +* DESCRIPTION +* The cl_fmap_count function returns the number of items stored +* in a flexi map. +* +* SYNOPSIS +*/ +static inline size_t cl_fmap_count(IN const cl_fmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + return (p_map->count); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure whose item count to return. +* +* RETURN VALUE +* Returns the number of items stored in the map. +* +* SEE ALSO +* Flexi Map, cl_is_fmap_empty +*********/ + +/****f* Component Library: Flexi Map/cl_is_fmap_empty +* NAME +* cl_is_fmap_empty +* +* DESCRIPTION +* The cl_is_fmap_empty function returns whether a flexi map is empty. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_fmap_empty(IN const cl_fmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + return (p_map->count == 0); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure to test for emptiness. +* +* RETURN VALUES +* TRUE if the flexi map is empty. +* +* FALSE otherwise. +* +* SEE ALSO +* Flexi Map, cl_fmap_count, cl_fmap_remove_all +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_key +* NAME +* cl_fmap_key +* +* DESCRIPTION +* The cl_fmap_key function retrieves the key value of a map item. +* +* SYNOPSIS +*/ +static inline const void *cl_fmap_key(IN const cl_fmap_item_t * const p_item) +{ + CL_ASSERT(p_item); + return (p_item->p_key); +} + +/* +* PARAMETERS +* p_item +* [in] Pointer to a map item whose key value to return. +* +* RETURN VALUE +* Returns the a pointer to the key value for the specified map item. +* The key value should not be modified to insure proper flexi map operation. +* +* NOTES +* The key value is set in a call to cl_fmap_insert. +* +* SEE ALSO +* Flexi Map, cl_fmap_insert +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_init +* NAME +* cl_fmap_init +* +* DESCRIPTION +* The cl_fmap_init function initialized a flexi map for use. +* +* SYNOPSIS +*/ +void cl_fmap_init(IN cl_fmap_t * const p_map, IN cl_pfn_fmap_cmp_t pfn_compare); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure to initialize. +* +* pfn_compare +* [in] Pointer to the compare function used to compare keys. +* See the cl_pfn_fmap_cmp_t function type declaration for details +* about the callback function. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Allows calling flexi map manipulation functions. +* +* SEE ALSO +* Flexi Map, cl_fmap_insert, cl_fmap_remove +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_end +* NAME +* cl_fmap_end +* +* DESCRIPTION +* The cl_fmap_end function returns the end of a flexi map. +* +* SYNOPSIS +*/ +static inline const cl_fmap_item_t *cl_fmap_end(IN const cl_fmap_t * + const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + /* Nil is the end of the map. */ + return (&p_map->nil); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure whose end to return. +* +* RETURN VALUE +* Pointer to the end of the map. +* +* NOTES +* cl_fmap_end is useful for determining the validity of map items returned +* by cl_fmap_head, cl_fmap_tail, cl_fmap_next, or cl_fmap_prev. If the +* map item pointer returned by any of these functions compares to the end, +* the end of the map was encoutered. +* When using cl_fmap_head or cl_fmap_tail, this condition indicates that +* the map is empty. +* +* SEE ALSO +* Flexi Map, cl_fmap_head, cl_fmap_tail, cl_fmap_next, cl_fmap_prev +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_head +* NAME +* cl_fmap_head +* +* DESCRIPTION +* The cl_fmap_head function returns the map item with the lowest key +* value stored in a flexi map. +* +* SYNOPSIS +*/ +static inline cl_fmap_item_t *cl_fmap_head(IN const cl_fmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + return ((cl_fmap_item_t *) p_map->nil.pool_item.list_item.p_next); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure whose item with the lowest +* key is returned. +* +* RETURN VALUES +* Pointer to the map item with the lowest key in the flexi map. +* +* Pointer to the map end if the flexi map was empty. +* +* NOTES +* cl_fmap_head does not remove the item from the map. +* +* SEE ALSO +* Flexi Map, cl_fmap_tail, cl_fmap_next, cl_fmap_prev, cl_fmap_end, +* cl_fmap_item_t +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_tail +* NAME +* cl_fmap_tail +* +* DESCRIPTION +* The cl_fmap_tail function returns the map item with the highest key +* value stored in a flexi map. +* +* SYNOPSIS +*/ +static inline cl_fmap_item_t *cl_fmap_tail(IN const cl_fmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + return ((cl_fmap_item_t *) p_map->nil.pool_item.list_item.p_prev); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure whose item with the highest key +* is returned. +* +* RETURN VALUES +* Pointer to the map item with the highest key in the flexi map. +* +* Pointer to the map end if the flexi map was empty. +* +* NOTES +* cl_fmap_end does not remove the item from the map. +* +* SEE ALSO +* Flexi Map, cl_fmap_head, cl_fmap_next, cl_fmap_prev, cl_fmap_end, +* cl_fmap_item_t +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_next +* NAME +* cl_fmap_next +* +* DESCRIPTION +* The cl_fmap_next function returns the map item with the next higher +* key value than a specified map item. +* +* SYNOPSIS +*/ +static inline cl_fmap_item_t *cl_fmap_next(IN const cl_fmap_item_t * + const p_item) +{ + CL_ASSERT(p_item); + return ((cl_fmap_item_t *) p_item->pool_item.list_item.p_next); +} + +/* +* PARAMETERS +* p_item +* [in] Pointer to a map item whose successor to return. +* +* RETURN VALUES +* Pointer to the map item with the next higher key value in a flexi map. +* +* Pointer to the map end if the specified item was the last item in +* the flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_head, cl_fmap_tail, cl_fmap_prev, cl_fmap_end, +* cl_fmap_item_t +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_prev +* NAME +* cl_fmap_prev +* +* DESCRIPTION +* The cl_fmap_prev function returns the map item with the next lower +* key value than a precified map item. +* +* SYNOPSIS +*/ +static inline cl_fmap_item_t *cl_fmap_prev(IN const cl_fmap_item_t * + const p_item) +{ + CL_ASSERT(p_item); + return ((cl_fmap_item_t *) p_item->pool_item.list_item.p_prev); +} + +/* +* PARAMETERS +* p_item +* [in] Pointer to a map item whose predecessor to return. +* +* RETURN VALUES +* Pointer to the map item with the next lower key value in a flexi map. +* +* Pointer to the map end if the specifid item was the first item in +* the flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_head, cl_fmap_tail, cl_fmap_next, cl_fmap_end, +* cl_fmap_item_t +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_insert +* NAME +* cl_fmap_insert +* +* DESCRIPTION +* The cl_fmap_insert function inserts a map item into a flexi map. +* +* SYNOPSIS +*/ +cl_fmap_item_t *cl_fmap_insert(IN cl_fmap_t * const p_map, + IN const void *const p_key, + IN cl_fmap_item_t * const p_item); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure into which to add the item. +* +* p_key +* [in] Pointer to the key value to assign to the item. Storage +* for the key must be persistant, as only the pointer is stored. +* Users are responsible for maintaining the validity of key +* pointers while they are in use. +* +* p_item +* [in] Pointer to a cl_fmap_item_t stucture to insert into the flexi map. +* +* RETURN VALUE +* Pointer to the item in the map with the specified key. If insertion +* was successful, this is the pointer to the item. If an item with the +* specified key already exists in the map, the pointer to that item is +* returned. +* +* NOTES +* Insertion operations may cause the flexi map to rebalance. +* +* SEE ALSO +* Flexi Map, cl_fmap_remove, cl_fmap_item_t +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_match +* NAME +* cl_fmap_match +* +* DESCRIPTION +* The cl_fmap_match function returns the map item matching a key. +* +* SYNOPSIS +*/ +cl_fmap_item_t *cl_fmap_match(IN const cl_fmap_t * const p_map, + IN const void *const p_key, + IN cl_pfn_fmap_cmp_t pfn_compare); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure from which to retrieve the +* item with the specified key. +* +* p_key +* [in] Pointer to a key value used to search for the desired map item. +* +* pfn_compare +* [in] Pointer to a compare function to invoke to compare the +* keys of items in the map. Passing NULL here makes such call +* to be equivalent to using cl_fmap_get(). +* +* RETURN VALUES +* Pointer to the map item matching the desired key value. +* +* Pointer to the map end if there was no item matching the desired key +* value stored in the flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_remove, cl_fmap_get +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_get +* NAME +* cl_fmap_get +* +* DESCRIPTION +* The cl_fmap_get function returns the map item associated with a key. +* +* SYNOPSIS +*/ +cl_fmap_item_t *cl_fmap_get(IN const cl_fmap_t * const p_map, + IN const void *const p_key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure from which to retrieve the +* item with the specified key. +* +* p_key +* [in] Pointer to a key value used to search for the desired map item. +* +* RETURN VALUES +* Pointer to the map item with the desired key value. +* +* Pointer to the map end if there was no item with the desired key value +* stored in the flexi map. +* +* NOTES +* cl_fmap_get does not remove the item from the flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_remove, cl_fmap_get_next +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_get_next +* NAME +* cl_fmap_get_next +* +* DESCRIPTION +* The cl_fmap_get_next function returns the first map item associated with +* a key > the key specified. +* +* SYNOPSIS +*/ +cl_fmap_item_t *cl_fmap_get_next(IN const cl_fmap_t * const p_map, + IN const void *const p_key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure from which to retrieve the +* item with the specified key. +* +* p_key +* [in] Pointer to a key value used to search for the desired map item. +* +* RETURN VALUES +* Pointer to the first map item with a key > the desired key value. +* +* Pointer to the map end if there was no item with a key > the desired key +* value stored in the flexi map. +* +* NOTES +* cl_fmap_get_next does not remove the item from the flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_remove, cl_fmap_get +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_remove_item +* NAME +* cl_fmap_remove_item +* +* DESCRIPTION +* The cl_fmap_remove_item function removes the specified map item +* from a flexi map. +* +* SYNOPSIS +*/ +void +cl_fmap_remove_item(IN cl_fmap_t * const p_map, + IN cl_fmap_item_t * const p_item); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure from which to +* remove item. +* +* p_item +* [in] Pointer to a map item to remove from its flexi map. +* +* RETURN VALUES +* This function does not return a value. +* +* In a debug build, cl_fmap_remove_item asserts that the item being +* removed is in the specified map. +* +* NOTES +* Removes the map item pointed to by p_item from its flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_remove, cl_fmap_remove_all, cl_fmap_insert +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_remove +* NAME +* cl_fmap_remove +* +* DESCRIPTION +* The cl_fmap_remove function removes the map item with the specified key +* from a flexi map. +* +* SYNOPSIS +*/ +cl_fmap_item_t *cl_fmap_remove(IN cl_fmap_t * const p_map, + IN const void *const p_key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure from which to remove the +* item with the specified key. +* +* p_key +* [in] Pointer to the key value used to search for the map item +* to remove. +* +* RETURN VALUES +* Pointer to the removed map item if it was found. +* +* Pointer to the map end if no item with the specified key exists in the +* flexi map. +* +* SEE ALSO +* Flexi Map, cl_fmap_remove_item, cl_fmap_remove_all, cl_fmap_insert +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_remove_all +* NAME +* cl_fmap_remove_all +* +* DESCRIPTION +* The cl_fmap_remove_all function removes all items in a flexi map, +* leaving it empty. +* +* SYNOPSIS +*/ +static inline void cl_fmap_remove_all(IN cl_fmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + p_map->root.p_left = &p_map->nil; + p_map->nil.pool_item.list_item.p_next = &p_map->nil.pool_item.list_item; + p_map->nil.pool_item.list_item.p_prev = &p_map->nil.pool_item.list_item; + p_map->count = 0; +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure to empty. +* +* RETURN VALUES +* This function does not return a value. +* +* SEE ALSO +* Flexi Map, cl_fmap_remove, cl_fmap_remove_item +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_merge +* NAME +* cl_fmap_merge +* +* DESCRIPTION +* The cl_fmap_merge function moves all items from one map to another, +* excluding duplicates. +* +* SYNOPSIS +*/ +void +cl_fmap_merge(OUT cl_fmap_t * const p_dest_map, + IN OUT cl_fmap_t * const p_src_map); +/* +* PARAMETERS +* p_dest_map +* [out] Pointer to a cl_fmap_t structure to which items should be added. +* +* p_src_map +* [in/out] Pointer to a cl_fmap_t structure whose items to add +* to p_dest_map. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Items are evaluated based on their keys only. +* +* Upon return from cl_fmap_merge, the flexi map referenced by p_src_map +* contains all duplicate items. +* +* SEE ALSO +* Flexi Map, cl_fmap_delta +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_delta +* NAME +* cl_fmap_delta +* +* DESCRIPTION +* The cl_fmap_delta function computes the differences between two maps. +* +* SYNOPSIS +*/ +void +cl_fmap_delta(IN OUT cl_fmap_t * const p_map1, + IN OUT cl_fmap_t * const p_map2, + OUT cl_fmap_t * const p_new, OUT cl_fmap_t * const p_old); +/* +* PARAMETERS +* p_map1 +* [in/out] Pointer to the first of two cl_fmap_t structures whose +* differences to compute. +* +* p_map2 +* [in/out] Pointer to the second of two cl_fmap_t structures whose +* differences to compute. +* +* p_new +* [out] Pointer to an empty cl_fmap_t structure that contains the +* items unique to p_map2 upon return from the function. +* +* p_old +* [out] Pointer to an empty cl_fmap_t structure that contains the +* items unique to p_map1 upon return from the function. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Items are evaluated based on their keys. Items that exist in both +* p_map1 and p_map2 remain in their respective maps. Items that +* exist only p_map1 are moved to p_old. Likewise, items that exist only +* in p_map2 are moved to p_new. This function can be useful in evaluating +* changes between two maps. +* +* Both maps pointed to by p_new and p_old must be empty on input. This +* requirement removes the possibility of failures. +* +* SEE ALSO +* Flexi Map, cl_fmap_merge +*********/ + +/****f* Component Library: Flexi Map/cl_fmap_apply_func +* NAME +* cl_fmap_apply_func +* +* DESCRIPTION +* The cl_fmap_apply_func function executes a specified function +* for every item stored in a flexi map. +* +* SYNOPSIS +*/ +void +cl_fmap_apply_func(IN const cl_fmap_t * const p_map, + IN cl_pfn_fmap_apply_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_fmap_t structure. +* +* pfn_func +* [in] Function invoked for every item in the flexi map. +* See the cl_pfn_fmap_apply_t function type declaration for +* details about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* The function provided must not perform any map operations, as these +* would corrupt the flexi map. +* +* SEE ALSO +* Flexi Map, cl_pfn_fmap_apply_t +*********/ + +END_C_DECLS +#endif /* _CL_FLEXIMAP_H_ */ diff --git a/include/complib/cl_heap.h b/include/complib/cl_heap.h new file mode 100644 index 0000000..3781008 --- /dev/null +++ b/include/complib/cl_heap.h @@ -0,0 +1,748 @@ +/* + * Copyright (c) 2009-2015 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of a d-ary heap. The caller must provide + * key/context and must provide a callback function to update + * the index of heap elements. Additionally, the caller can + * provide a compare function in case that the minimum d-ary heap + * based on uint64_t keys is not sufficient enough. The + * heap allocates internal structures and can be resized, + * which will not relocate or change existing elements. + */ + +#ifndef _CL_HEAP_H_ +#define _CL_HEAP_H_ + +#include + +#ifdef __cplusplus +#define BEGIN_C_DECLS extern "C" { +#define END_C_DECLS } +#else /* !__cplusplus */ +#define BEGIN_C_DECLS +#define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/d-ary (Min-)Heap +* NAME +* Heap +* +* DESCRIPTION +* The d-ary heap is stored implicitly in an array and parent/child nodes +* are accessed via index calculations rather than using allocated +* elements and pointer structures. It has been proven that implicit 4-ary +* heaps are in many use cases the most efficient implementation (e.g., +* see "A Back-to-Basics Empirical Study of Priority Queues" by +* Larkin et al.). +* +* The heap has to be initialized to a certain size by the caller, +* however a function to resize the heap is available. The resize function +* will not delete or relocate existing elements in the heap. +* +* Typical heap operations, such as insert, extract_[min | max], +* decrease_key, and delete, are provided and referencing an element, +* e.g., for deletion is done via indices. The implication is, that +* the caller needs to be informed about index changes, e.g., after +* internal reordering through heap_[up | down], to always know the +* index of each element in the heap. Therefore, the caller has to +* provide a callback function, which forwards the context of an element +* and the new index in the heap back to the caller. The caller is +* responsible for updating its internal sturctures/indices accordingly. +* +* An implementation of heapify is omitted, because the caller should not +* be able to allocate and provide an unsorted array. All heapify +* operations with heap_up and heap_down are done internally after the +* caller manipulated the heap with the provided functions. +* +* Heaps are used extensively in some routing functions, such as [DF]SSSP +* routing. Therefore, the [DF]SSSP implementation can be used as a +* prototype to adapt and use the d-ary heap in other parts of OpenSM. +* +* The cl_heap_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SEE ALSO +* Structures: +* cl_heap_t +* +* Callbacks: +* cl_pfn_heap_apply_index_update_t, cl_pfn_heap_compare_keys_t +* +* Initialization: +* cl_heap_construct, cl_heap_init, cl_heap_destroy +* +* Manipulation: +* cl_heap_insert, cl_heap_delete, cl_heap_extract_root, +* cl_heap_modify_key, cl_heap_resize +* +* Attributes: +* cl_heap_get_capacity, cl_heap_get_size, cl_heap_is_empty +* cl_is_stored_in_heap, cl_is_heap_inited +*********/ +/****d* Component Library: Heap/cl_pfn_heap_apply_index_update_t +* NAME +* cl_pfn_heap_apply_index_update_t +* +* DESCRIPTION +* The cl_pfn_heap_apply_index_update_t function type defines the prototype +* to update the heap index, position of the element, in the user supplied +* context. The index is the only information the user needs to store +* somewhere in his/her structures and is essential for all munipulations +* operations on the heap (except resize), since these will change the +* position of elements through heap_up/heap_down. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_heap_apply_index_update_t) (IN const void *context, + IN const size_t new_index); +/* +* PARAMETERS +* context +* [in] Pointer to the user supplied context which is associated +* with a heap element. +* +* new_index +* [in] The new index in the heap, i.e., position in the +* element_array of cl_heap_t. +* +* RETURN VALUES +* This callback function should not return any value. +* +* NOTES +* The function is necessary to update the indices for the caller, +* since the caller MUST keep track of the position of heap elements +* to make changes, such as decrease_key or delete. For a working +* reference implementation on how to define and handle this callback, +* please refer to the [DF]SSSP routing in opensm/osm_ucast_dfsssp.c. +* +* SEE ALSO +* Heap, cl_heap_modify_key, cl_heap_insert, cl_heap_delete, +* cl_heap_extract_root +*********/ + +/****d* Component Library: Heap/cl_pfn_heap_compare_keys_t +* NAME +* cl_pfn_heap_heap_compare_keys_t +* +* DESCRIPTION +* The cl_pfn_heap_heap_compare_keys_t function type defines the prototype +* to compare the keys of two heap elements. +* +* SYNOPSIS +*/ +typedef int + (*cl_pfn_heap_compare_keys_t) (IN const void *p_key_1, IN const void *p_key_2); +/* +* PARAMETERS +* p_key_1 +* [in] Pointer to the first key. +* +* p_key_2 +* [in] Pointer to the second key. +* +* RETURN VALUES +* The function should return an integer less than, equal to, or greater +* than zero indicating that key1 is "smaller", "equal" or "greater" than +* key2. +* +* NOTES +* If user does not provide a compare function, then the default behavior +* is to assume all keys are uint64_t and the heap is a minimum d-ary +* heap, i.e., the smallest key is stored at the root node. +* +* SEE ALSO +* Heap +*********/ + +/****s* Component Library: Heap/cl_heap_t +* NAME +* cl_heap_t +* +* DESCRIPTION +* Heap structure. +* +* The cl_heap_t structure should be treated as opaque and should be +* manipulated only through the provided functions.. +* +* SYNOPSIS +* +*/ +struct _cl_heap_elem; +typedef struct _cl_heap { + cl_state_t state; + uint8_t branching_factor; + struct _cl_heap_elem *element_array; + size_t size; + size_t capacity; + cl_pfn_heap_apply_index_update_t pfn_index_update; + cl_pfn_heap_compare_keys_t pfn_compare; +} cl_heap_t; +/* +* FIELDS +* state +* State of the heap. +* +* branching_factor +* Branching factor d for the d-ary heap, i.e., number of children +* per node. +* +* element_array +* Array of elements for the heap. Each element consists of a key +* and user supplied context pointer, i.e., usually very compact +* storage with 16 bytes per element. +* +* size +* Number of elements successfully inserted into the heap. +* +* capacity +* Total number of elements allocated. +* +* pfn_index_update +* User supplied function to update position indicies for +* elements in the heap. +* +* pfn_compare +* User supplied comparison of element keys. +* +* SEE ALSO +* Heap, cl_pfn_heap_apply_index_update_t, cl_pfn_heap_compare_keys_t +*********/ + +/****f* Component Library: Heap/cl_heap_construct +* NAME +* cl_heap_construct +* +* DESCRIPTION +* The cl_heap_construct function constructs a d-ary heap. +* The result is a valid, but uninitialized, heap. The function +* cl_heap_construct does not allocate any memory. +* +* SYNOPSIS +*/ +void cl_heap_construct(IN cl_heap_t * const p_heap); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_heap_destroy without first calling cl_heap_init. +* Calling cl_heap_construct is a prerequisite to calling any other +* heap function except cl_heap_init. The caller must allocate the +* memory for the heap, i.e., p_heap==NULL results in an exception. +* +* SEE ALSO +* Heap, cl_heap_init, cl_heap_destroy +*********/ + +/****f* Component Library: Heap/cl_heap_init +* NAME +* cl_heap_init +* +* DESCRIPTION +* The cl_heap_init function initializes a d-ary heap for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_heap_init(IN cl_heap_t * const p_heap, + IN const size_t max_size, + IN const uint8_t branching_factor, + IN cl_pfn_heap_apply_index_update_t pfn_index_update, + IN cl_pfn_heap_compare_keys_t pfn_compare OPTIONAL); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to inititalize. +* +* max_size +* [in] Total number of elements the heap should be able to store. +* +* branching_factor +* [in] Branching factor d for the d-ary heap, i.e., number of +* children. For example, using d=2 yields in a binary heap... +* +* pfn_index_update +* [in] User supplied callback to inform the user about index +* changes of individual elements stored in the heap. +* +* pfn_compare +* [in] User supplied callback to compare two keys. This function +* pointer is optional, i.e., if pfn_compare=NULL, then an internal +* compare function is used to create a min-heap. +* +* RETURN VALUES +* CL_SUCCESS if the heap was initialized successfully. +* +* CL_INVALID_PARAMETER if max_size or branching_factor are less than or +* equal to zero, or if pfn_index_update is NULL. +* +* CL_INSUFFICIENT_MEMORY if the initialization failed. +* +* NOTES +* Can be called without calling cl_heap_construct first. Calling +* cl_heap_init on an already initialized heap will result in an internal +* call to cl_heap_destroy prior to the memory allocation of the +* element_array. +* +* SEE ALSO +* Heap, cl_pfn_heap_apply_index_update_t, cl_pfn_heap_compare_keys_t, +* cl_heap_construct +*********/ + +/****f* Component Library: Heap/cl_heap_destroy +* NAME +* cl_heap_destroy +* +* DESCRIPTION +* The cl_heap_destroy function destroys the heap. The heap is afterwards +* in the CL_UNINITIALIZED state. +* +* SYNOPSIS +*/ +void cl_heap_destroy(IN cl_heap_t * const p_heap); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_heap_destroy frees all memory allocated for the heap. The heap +* is left valid, but uninitialized, with a zero capacity and size, and +* must be re-initialized by calling cl_heap_init for further usage. +* +* This function should only be called after a call to cl_heap_construct +* or cl_heap_init. +* +* SEE ALSO +* Heap, cl_heap_construct, cl_heap_init +*********/ + +/****f* Component Library: Heap/cl_heap_modify_key +* NAME +* cl_heap_modify_key +* +* DESCRIPTION +* The cl_heap_modify_key function changes the key of an element in the +* heap identified by an index. The result could be invalidate the heap +* property for the stored elements. Therefore, the cl_heap_modify_key +* function calls heap_[up | down] internally to reconstruct a valid +* heap. +* +* SYNOPSIS +*/ +cl_status_t cl_heap_modify_key(IN cl_heap_t * const p_heap, + IN const uint64_t key, IN const size_t index); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to modify. +* +* key +* [in] The new key for an existing heap element. +* +* index +* [in] Index of the heap elemnt in element_array, which should +* be modified. +* +* RETURN VALUE +* CL_SUCCESS if the key of a heap element was modified successfully. +* +* CL_INVALID_PARAMETER if the user supplied index is out of bounds. +* +* NOTES +* This function is similar to the common decrease_key for minimum heaps, +* however the naming scheme is more generic to support maximum heaps +* as well. +* +* This function should only be called after a call to cl_heap_init. +* +* SEE ALSO +* Heap, cl_heap_init +*********/ + +/****f* Component Library: Heap/cl_heap_insert +* NAME +* cl_heap_insert +* +* DESCRIPTION +* The cl_heap_insert function adds a new element to the existing heap +* and restores the heap property afterwards. +* +* SYNOPSIS +*/ +cl_status_t cl_heap_insert(IN cl_heap_t * const p_heap, IN const uint64_t key, + IN const void *const context); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to modify. +* +* key +* [in] Initial key value to compare heap elements and reorder +* the element_array according to the heap property. +* +* context +* [in] User supplied pointer to a context which "represents" +* the heap element for the user. +* +* RETURN VALUE +* CL_SUCCESS if the new heap element was stored successfully. +* +* CL_INVALID_PARAMETER if the user has no association to the heap +* element, i.e., the supplied context is NULL. +* +* CL_INSUFFICIENT_RESOURCES if the heap is already full and no further +* elements can be stored. +* +* NOTES +* The user supplied context will be returned to the user in case the +* user decides to remove either the root of the heap or any element +* in the heap with cl_heap_extract_root or cl_heap_delete. +* Furthermore, the context will be forwarded to the user through +* cl_pfn_heap_apply_index_update_t to indicate for which element an index +* change in the heap has happened. +* +* SEE ALSO +* Heap, cl_pfn_heap_apply_index_update_t, cl_heap_get_capacity, +* cl_heap_delete, cl_heap_extract_root +*********/ + +/****f* Component Library: Heap/cl_heap_delete +* NAME +* cl_heap_delete +* +* DESCRIPTION +* The cl_heap_delete function removes an arbitrary element, referenced +* by index, from the heap. Afterwards, the heap property is restored. +* +* SYNOPSIS +*/ +void *cl_heap_delete(IN cl_heap_t * const p_heap, IN const size_t index); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to modify. +* +* index +* [in] Index to an element in the element_array of the heap which +* should be deleted. +* +* RETURN VALUE +* The context pointer associated with the index (and key) in the heap, +* (i.e., the context initially supplied by the user through +* cl_heap_insert). If the index is invalid or the heap is empty, then +* NULL is returned. +* +* NOTES +* The function will move the element to a "out-of-bounds" position, i.e., +* relative to the size but not capacity of the heap, and update +* the index for the caller via cl_pfn_heap_apply_index_update_t. This +* ensures that later attempts to modify/delete this element can be +* intercepted. +* +* SEE ALSO +* Heap, cl_pfn_heap_apply_index_update_t, cl_heap_insert +*********/ + +/****f* Component Library: Heap/cl_heap_extract_root +* NAME +* cl_heap_extract_root +* +* DESCRIPTION +* The cl_heap_extract_root deletes the root of the heap and returns +* the stored context to the user. The naming scheme is generalized +* here to support minimum or maximum heaps with one function, e.g., +* the root refers to the element with the smallest key in a minimum +* heap. +* +* SYNOPSIS +*/ +void *cl_heap_extract_root(IN cl_heap_t * const p_heap); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to modify. +* +* RETURN VALUE +* The context pointer associated with the smallest or largest key +* in a minimum or maximum heap, respectively. +* +* NOTES +* Internally, the cl_heap_extract_root function calls cl_heap_delete for +* the element with index zero. Therefore, refer to cl_heap_delete for +* further explanation. +* +* SEE ALSO +* Heap, cl_heap_delete +*********/ + +/****f* Component Library: Heap/cl_heap_resize +* NAME +* cl_heap_resize +* +* DESCRIPTION +* The cl_heap_resize function changes the capacity of an existing heap. +* The function will not delete or relocate existing elements in the heap, +* consequently changing the capacity to a value smaller than the current +* number of elements in the heap will result in an error. +* +* SYNOPSIS +*/ +cl_status_t cl_heap_resize(IN cl_heap_t * const p_heap, + IN const size_t new_size); +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure to resize. +* +* new_size +* [in] Total number of elements allocated for the heap. +* +* RETURN VALUE +* CL_SUCCESS if the heap has been changed in size. +* +* CL_INVALID_PARAMETER if either new_size is less than or equal to zero, +* or if the requested size is smaller than the number of currently +* stored heap elements (to prevent loss of data). +* +* CL_INSUFFICIENT_MEMORY if resizing element_array failed due to an +* insufficient amount of memory. The data stored in the heap is not lost +* and can still be retrieved or modified. +* +* NOTES +* Resizing the heap to a zero capacity is not supported. Please, use the +* cl_heap_destroy function to free the memory allocated in cl_heap_t. +* +* SEE ALSO +* Heap, cl_heap_get_capacity, cl_heap_destroy +*********/ + +/****f* Component Library: Heap/cl_heap_get_capacity +* NAME +* cl_heap_get_capacity +* +* DESCRIPTION +* The cl_heap_get_capacity function returns the capacity of a heap. +* +* SYNOPSIS +*/ +static inline size_t cl_heap_get_capacity(IN const cl_heap_t * const p_heap) +{ + CL_ASSERT(p_heap); + CL_ASSERT(p_heap->state == CL_INITIALIZED); + + return (p_heap->capacity); +} + +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure whose capacity to return. +* +* RETURN VALUE +* Total number of elements which can be stored in the heap. +* +* NOTES +* The capacity is the number of elements that the heap can store, and +* can be greater than the number of elements stored. To get the number of +* elements stored in the heap, use cl_heap_get_size. +* +* SEE ALSO +* Heap, cl_heap_get_size +*********/ + +/****f* Component Library: Heap/cl_heap_get_size +* NAME +* cl_heap_get_size +* +* DESCRIPTION +* The cl_heap_get_size function returns the number of elements stored +* in the heap. +* +* SYNOPSIS +*/ +static inline size_t cl_heap_get_size(IN const cl_heap_t * const p_heap) +{ + CL_ASSERT(p_heap); + CL_ASSERT(p_heap->state == CL_INITIALIZED); + + return (p_heap->size); +} + +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure whose size to return. +* +* RETURN VALUE +* Number of elements in the heap. +* +* SEE ALSO +* Heap, cl_heap_resize, cl_heap_get_capacity +*********/ + +/****f* Component Library: Heap/cl_heap_is_empty +* NAME +* cl_heap_is_empty +* +* DESCRIPTION +* The cl_heap_is_empty function checks whether elements are stored in the +* heap, or not. +* +* SYNOPSIS +*/ +static inline boolean_t cl_heap_is_empty(IN const cl_heap_t * const p_heap) +{ + CL_ASSERT(p_heap); + CL_ASSERT(p_heap->state == CL_INITIALIZED); + + return (p_heap->size) ? FALSE : TRUE; +} + +/* +* PARAMETERS +* p_heap +* [in] Pointer to an initialized cl_heap_t structure. +* +* RETURN VALUES +* TRUE if no elements are stored in the heap, otherwise FALSE. +* +* SEE ALSO +* Heap, cl_heap_get_size +*********/ + +/****f* Component Library: Heap/cl_is_stored_in_heap +* NAME +* cl_is_stored_in_heap +* +* DESCRIPTION +* The function cl_is_stored_in_heap can be used by the caller to verify +* that a context, initially supplied via cl_heap_insert, is stored +* in the heap at a index known by the caller. +* +* SYNOPSIS +*/ +boolean_t cl_is_stored_in_heap(IN const cl_heap_t * const p_heap, + IN const void *const ctx, + IN const size_t index); +/* +* PARAMETERS +* p_heap +* [in] Pointer to an initialized cl_heap_t structure. +* +* context +* [in] Pointer to a context which "represents" the heap element +* initially stored with the cl_heap_insert function. +* +* index +* [in] Index to the element in the element_array. +* +* RETURN VALUES +* TRUE if the index is not out-of-bounds and the stored and requested +* context matches, otherwise FALSE. +* +* SEE ALSO +* Heap, cl_heap_insert +*********/ + +/****f* Component Library: Heap/cl_is_heap_inited +* NAME +* cl_is_heap_inited +* +* DESCRIPTION +* The function cl_is_heap_inited can be used to verify that the state +* of a heap is CL_INITIALIZED. +* +* SYNOPSIS +*/ +static inline uint32_t cl_is_heap_inited(IN const cl_heap_t * const p_heap) +{ + CL_ASSERT(p_heap); + CL_ASSERT(cl_is_state_valid(p_heap->state)); + + return (p_heap->state == CL_INITIALIZED); +} + +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure. +* +* RETURN VALUES +* TRUE if the heap is initialized, otherwise FALSE. +* +* NOTES +* Can be called for every allocaetd heap for which at least +* cl_heap_construct or cl_heap_init was called beforehand. +* +* SEE ALSO +* Heap, cl_heap_construct, cl_heap_init +*********/ + +/****f* Component Library: Heap/cl_verify_heap_property +* NAME +* cl_verify_heap_property +* +* DESCRIPTION +* The function cl_verify_heap_property validates the correctness of a +* given heap. +* +* SYNOPSIS +*/ +boolean_t cl_verify_heap_property(IN const cl_heap_t * const p_heap); + +/* +* PARAMETERS +* p_heap +* [in] Pointer to a cl_heap_t structure. +* +* RETURN VALUES +* TRUE if the heap complies with the heap property, otherwise FALSE. +* +* SEE ALSO +* Heap, cl_heap_construct, cl_heap_init +*********/ + +END_C_DECLS +#endif /* _CL_HEAP_H_ */ diff --git a/include/complib/cl_list.h b/include/complib/cl_list.h new file mode 100644 index 0000000..62f674f --- /dev/null +++ b/include/complib/cl_list.h @@ -0,0 +1,1284 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of list. + */ + +#ifndef _CL_LIST_H_ +#define _CL_LIST_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/List +* NAME +* List +* +* DESCRIPTION +* List stores objects in a doubly linked list. +* +* Unlike quick list, users pass pointers to the object being stored, rather +* than to a cl_list_item_t structure. Insertion operations on a list can +* fail, and callers should trap for such failures. +* +* Use quick list in situations where insertion failures cannot be tolerated. +* +* List is not thread safe, and users must provide serialization. +* +* The list functions operates on a cl_list_t structure which should be +* treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Types: +* cl_list_iterator_t +* +* Structures: +* cl_list_t +* +* Callbacks: +* cl_pfn_list_apply_t, cl_pfn_list_find_t +* +* Initialization/Destruction: +* cl_list_construct, cl_list_init, cl_list_destroy +* +* Iteration: +* cl_list_next, cl_list_prev, cl_list_head, cl_list_tail, +* cl_list_end +* +* Manipulation: +* cl_list_insert_head, cl_list_insert_tail, +* cl_list_insert_array_head, cl_list_insert_array_tail, +* cl_list_insert_prev, cl_list_insert_next, +* cl_list_remove_head, cl_list_remove_tail, +* cl_list_remove_object, cl_list_remove_item, cl_list_remove_all +* +* Search: +* cl_is_object_in_list, cl_list_find_from_head, cl_list_find_from_tail, +* cl_list_apply_func +* +* Attributes: +* cl_list_count, cl_is_list_empty, cl_is_list_inited +*********/ +/****s* Component Library: List/cl_list_t +* NAME +* cl_list_t +* +* DESCRIPTION +* List structure. +* +* The cl_list_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_list { + cl_qlist_t list; + cl_qpool_t list_item_pool; +} cl_list_t; +/* +* FIELDS +* list +* Quick list of items stored in the list. +* +* list_item_pool +* Quick pool of list objects for storing objects in the quick list. +* +* SEE ALSO +* List +*********/ + +/****d* Component Library: List/cl_list_iterator_t +* NAME +* cl_list_iterator_t +* +* DESCRIPTION +* Iterator type used to walk a list. +* +* SYNOPSIS +*/ +typedef const cl_list_item_t *cl_list_iterator_t; +/* +* NOTES +* The iterator should be treated as opaque to prevent corrupting the list. +* +* SEE ALSO +* List, cl_list_head, cl_list_tail, cl_list_next, cl_list_prev, +* cl_list_obj +*********/ + +/****d* Component Library: List/cl_pfn_list_apply_t +* NAME +* cl_pfn_list_apply_t +* +* DESCRIPTION +* The cl_pfn_list_apply_t function type defines the prototype for functions +* used to iterate objects in a list. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_list_apply_t) (IN void *const p_object, IN void *context); +/* +* PARAMETERS +* p_object +* [in] Pointer to an object stored in a list. +* +* context +* [in] Context provided in a call to cl_list_apply_func. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_list_apply_func +* function. +* +* SEE ALSO +* List, cl_list_apply_func +*********/ + +/****d* Component Library: List/cl_pfn_list_find_t +* NAME +* cl_pfn_list_find_t +* +* DESCRIPTION +* The cl_pfn_list_find_t function type defines the prototype for functions +* used to find objects in a list. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_list_find_t) (IN const void *const p_object, IN void *context); +/* +* PARAMETERS +* p_object +* [in] Pointer to an object stored in a list. +* +* context +* [in] Context provided in a call to ListFindFromHead or ListFindFromTail. +* +* RETURN VALUES +* Return CL_SUCCESS if the desired item was found. This stops list iteration. +* +* Return CL_NOT_FOUND to continue the list iteration. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_list_find_from_head +* and cl_list_find_from_tail functions. +* +* SEE ALSO +* List, cl_list_find_from_head, cl_list_find_from_tail +*********/ + +/****f* Component Library: List/cl_list_construct +* NAME +* cl_list_construct +* +* DESCRIPTION +* The cl_list_construct function constructs a list. +* +* SYNOPSIS +*/ +void cl_list_construct(IN cl_list_t * const p_list); +/* +* PARAMETERS +* p_list +* [in] Pointer to cl_list_t object whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_list_init, cl_list_destroy and cl_is_list_inited. +* +* Calling cl_list_construct is a prerequisite to calling any other +* list function except cl_list_init. +* +* SEE ALSO +* List, cl_list_init, cl_list_destroy, cl_is_list_inited +*********/ + +/****f* Component Library: List/cl_is_list_inited +* NAME +* cl_is_list_inited +* +* DESCRIPTION +* The cl_is_list_inited function returns whether a list was +* initialized successfully. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_list_inited(IN const cl_list_t * const p_list) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* + * The pool is the last thing initialized. If it is initialized, the + * list is initialized too. + */ + return (cl_is_qpool_inited(&p_list->list_item_pool)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure whose initilization state +* to check. +* +* RETURN VALUES +* TRUE if the list was initialized successfully. +* +* FALSE otherwise. +* +* NOTES +* Allows checking the state of a list to determine if invoking +* member functions is appropriate. +* +* SEE ALSO +* List +*********/ + +/****f* Component Library: List/cl_list_init +* NAME +* cl_list_init +* +* DESCRIPTION +* The cl_list_init function initializes a list for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_list_init(IN cl_list_t * const p_list, IN const size_t min_items); +/* +* PARAMETERS +* p_list +* [in] Pointer to cl_list_t structure to initialize. +* +* min_items +* [in] Minimum number of items that can be stored. All necessary +* allocations to allow storing the minimum number of items is performed +* at initialization time. +* +* RETURN VALUES +* CL_SUCCESS if the list was initialized successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for initialization. +* +* NOTES +* The list will always be able to store at least as many items as specified +* by the min_items parameter. +* +* SEE ALSO +* List, cl_list_construct, cl_list_destroy, cl_list_insert_head, +* cl_list_insert_tail, cl_list_remove_head, cl_list_remove_tail +*********/ + +/****f* Component Library: List/cl_list_destroy +* NAME +* cl_list_destroy +* +* DESCRIPTION +* The cl_list_destroy function destroys a list. +* +* SYNOPSIS +*/ +void cl_list_destroy(IN cl_list_t * const p_list); +/* +* PARAMETERS +* p_list +* [in] Pointer to cl_list_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_list_destroy does not affect any of the objects stored in the list, +* but does release all memory allocated internally. Further operations +* should not be attempted on the list after cl_list_destroy is invoked. +* +* This function should only be called after a call to cl_list_construct +* or cl_list_init. +* +* In debug builds, cl_list_destroy asserts if the list is not empty. +* +* SEE ALSO +* List, cl_list_construct, cl_list_init +*********/ + +/****f* Component Library: List/cl_is_list_empty +* NAME +* cl_is_list_empty +* +* DESCRIPTION +* The cl_is_list_empty function returns whether a list is empty. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_list_empty(IN const cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + return (cl_is_qlist_empty(&p_list->list)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure. +* +* RETURN VALUES +* TRUE if the specified list is empty. +* +* FALSE otherwise. +* +* SEE ALSO +* List, cl_list_count, cl_list_remove_all +*********/ + +/****f* Component Library: List/cl_list_insert_head +* NAME +* cl_list_insert_head +* +* DESCRIPTION +* The cl_list_insert_head function inserts an object at the head of a list. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_list_insert_head(IN cl_list_t * const p_list, IN const void *const p_object) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* Get a list item to add to the list. */ + p_pool_obj = (cl_pool_obj_t *) cl_qpool_get(&p_list->list_item_pool); + if (!p_pool_obj) + return (CL_INSUFFICIENT_MEMORY); + + p_pool_obj->p_object = p_object; + cl_qlist_insert_head(&p_list->list, &p_pool_obj->pool_item.list_item); + return (CL_SUCCESS); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure into which to insert the object. +* +* p_object +* [in] Pointer to an object to insert into the list. +* +* RETURN VALUES +* CL_SUCCESS if the insertion was successful. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the insertion. +* +* NOTES +* Inserts the specified object at the head of the list. List insertion +* operations are guaranteed to work for the minimum number of items as +* specified in cl_list_init by the min_items parameter. +* +* SEE ALSO +* List, cl_list_insert_tail, cl_list_insert_array_head, +* cl_list_insert_array_tail, cl_list_insert_prev, cl_list_insert_next, +* cl_list_remove_head +*********/ + +/****f* Component Library: List/cl_list_insert_tail +* NAME +* cl_list_insert_tail +* +* DESCRIPTION +* The cl_list_insert_tail function inserts an object at the tail of a list. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_list_insert_tail(IN cl_list_t * const p_list, IN const void *const p_object) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* Get a list item to add to the list. */ + p_pool_obj = (cl_pool_obj_t *) cl_qpool_get(&p_list->list_item_pool); + if (!p_pool_obj) + return (CL_INSUFFICIENT_MEMORY); + + p_pool_obj->p_object = p_object; + cl_qlist_insert_tail(&p_list->list, &p_pool_obj->pool_item.list_item); + return (CL_SUCCESS); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure into which to insert the object. +* +* p_object +* [in] Pointer to an object to insert into the list. +* +* RETURN VALUES +* CL_SUCCESS if the insertion was successful. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the insertion. +* +* NOTES +* Inserts the specified object at the tail of the list. List insertion +* operations are guaranteed to work for the minimum number of items as +* specified in cl_list_init by the min_items parameter. +* +* SEE ALSO +* List, cl_list_insert_head, cl_list_insert_array_head, +* cl_list_insert_array_tail, cl_list_insert_prev, cl_list_insert_next, +* cl_list_remove_tail +*********/ + +/****f* Component Library: List/cl_list_insert_array_head +* NAME +* cl_list_insert_array_head +* +* DESCRIPTION: +* The cl_list_insert_array_head function inserts an array of objects +* at the head of a list. +* +* SYNOPSIS +*/ +cl_status_t +cl_list_insert_array_head(IN cl_list_t * const p_list, + IN const void *const p_array, + IN uint32_t item_count, IN const uint32_t item_size); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure into which to insert the objects. +* +* p_array +* [in] Pointer to the first object in an array. +* +* item_count +* [in] Number of objects in the array. +* +* item_size +* [in] Size of the objects added to the list. This is the stride in the +* array from one object to the next. +* +* RETURN VALUES +* CL_SUCCESS if the insertion was successful. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the insertion. +* +* NOTES +* Inserts all objects in the array to the head of the list, preserving the +* ordering of the objects. If not successful, no items are added. +* List insertion operations are guaranteed to work for the minimum number +* of items as specified in cl_list_init by the min_items parameter. +* +* SEE ALSO +* List, cl_list_insert_array_tail, cl_list_insert_head, cl_list_insert_tail, +* cl_list_insert_prev, cl_list_insert_next +*********/ + +/****f* Component Library: List/cl_list_insert_array_tail +* NAME +* cl_list_insert_array_tail +* +* DESCRIPTION +* The cl_list_insert_array_tail function inserts an array of objects +* at the tail of a list. +* +* SYNOPSIS +*/ +cl_status_t +cl_list_insert_array_tail(IN cl_list_t * const p_list, + IN const void *const p_array, + IN uint32_t item_count, IN const uint32_t item_size); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure into which to insert the objects. +* +* p_array +* [in] Pointer to the first object in an array. +* +* item_count +* [in] Number of objects in the array. +* +* item_size +* [in] Size of the objects added to the list. This is the stride in the +* array from one object to the next. +* +* RETURN VALUES +* CL_SUCCESS if the insertion was successful. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the insertion. +* +* NOTES +* Inserts all objects in the array to the tail of the list, preserving the +* ordering of the objects. If not successful, no items are added. +* List insertion operations are guaranteed to work for the minimum number +* of items as specified in cl_list_init by the min_items parameter. +* +* SEE ALSO +* List, cl_list_insert_array_head, cl_list_insert_head, cl_list_insert_tail, +* cl_list_insert_prev, cl_list_insert_next +*********/ + +/****f* Component Library: List/cl_list_insert_next +* NAME +* cl_list_insert_next +* +* DESCRIPTION +* The cl_list_insert_next function inserts an object in a list after +* the object associated with a given iterator. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_list_insert_next(IN cl_list_t * const p_list, + IN cl_list_iterator_t iterator, + IN const void *const p_object) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* Get a list item to add to the list. */ + p_pool_obj = (cl_pool_obj_t *) cl_qpool_get(&p_list->list_item_pool); + if (!p_pool_obj) + return (CL_INSUFFICIENT_MEMORY); + + p_pool_obj->p_object = p_object; + cl_qlist_insert_next(&p_list->list, (cl_list_item_t *) iterator, + &p_pool_obj->pool_item.list_item); + return (CL_SUCCESS); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure into which to insert the object. +* +* iterator +* [in] cl_list_iterator_t returned by a previous call to cl_list_head, +* cl_list_tail, cl_list_next, or cl_list_prev. +* +* p_object +* [in] Pointer to an object to insert into the list. +* +* RETURN VALUES +* CL_SUCCESS if the insertion was successful. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the insertion. +* +* SEE ALSO +* List, cl_list_insert_prev, cl_list_insert_head, cl_list_insert_tail, +* cl_list_insert_array_head, cl_list_insert_array_tail +*********/ + +/****f* Component Library: List/cl_list_insert_prev +* NAME +* cl_list_insert_prev +* +* DESCRIPTION +* The cl_list_insert_prev function inserts an object in a list before +* the object associated with a given iterator. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_list_insert_prev(IN cl_list_t * const p_list, + IN cl_list_iterator_t iterator, + IN const void *const p_object) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* Get a list item to add to the list. */ + p_pool_obj = (cl_pool_obj_t *) cl_qpool_get(&p_list->list_item_pool); + if (!p_pool_obj) + return (CL_INSUFFICIENT_MEMORY); + + p_pool_obj->p_object = p_object; + cl_qlist_insert_prev(&p_list->list, (cl_list_item_t *) iterator, + &p_pool_obj->pool_item.list_item); + return (CL_SUCCESS); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure into which to insert the object. +* +* iterator +* [in] cl_list_iterator_t returned by a previous call to cl_list_head, +* cl_list_tail, cl_list_next, or cl_list_prev. +* +* p_object +* [in] Pointer to an object to insert into the list. +* +* RETURN VALUES +* CL_SUCCESS if the insertion was successful. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the insertion. +* +* SEE ALSO +* List, cl_list_insert_next, cl_list_insert_head, cl_list_insert_tail, +* cl_list_insert_array_head, cl_list_insert_array_tail +*********/ + +/****f* Component Library: List/cl_list_remove_head +* NAME +* cl_list_remove_head +* +* DESCRIPTION +* The cl_list_remove_head function removes an object from the head of a list. +* +* SYNOPSIS +*/ +static inline void *cl_list_remove_head(IN cl_list_t * const p_list) +{ + cl_pool_obj_t *p_pool_obj; + void *p_obj; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* See if the list is empty. */ + if (cl_is_qlist_empty(&p_list->list)) + return (NULL); + + /* Get the item at the head of the list. */ + p_pool_obj = (cl_pool_obj_t *) cl_qlist_remove_head(&p_list->list); + + p_obj = (void *)p_pool_obj->p_object; + /* Place the pool item back into the pool. */ + cl_qpool_put(&p_list->list_item_pool, &p_pool_obj->pool_item); + + return (p_obj); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure from which to remove an object. +* +* RETURN VALUES +* Returns the pointer to the object formerly at the head of the list. +* +* NULL if the list was empty. +* +* SEE ALSO +* List, cl_list_remove_tail, cl_list_remove_all, cl_list_remove_object, +* cl_list_remove_item, cl_list_insert_head +*********/ + +/****f* Component Library: List/cl_list_remove_tail +* NAME +* cl_list_remove_tail +* +* DESCRIPTION +* The cl_list_remove_tail function removes an object from the tail of a list. +* +* SYNOPSIS +*/ +static inline void *cl_list_remove_tail(IN cl_list_t * const p_list) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* See if the list is empty. */ + if (cl_is_qlist_empty(&p_list->list)) + return (NULL); + + /* Get the item at the head of the list. */ + p_pool_obj = (cl_pool_obj_t *) cl_qlist_remove_tail(&p_list->list); + + /* Place the list item back into the pool. */ + cl_qpool_put(&p_list->list_item_pool, &p_pool_obj->pool_item); + + return ((void *)p_pool_obj->p_object); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure from which to remove an object. +* +* RETURN VALUES +* Returns the pointer to the object formerly at the tail of the list. +* +* NULL if the list was empty. +* +* SEE ALSO +* List, cl_list_remove_head, cl_list_remove_all, cl_list_remove_object, +* cl_list_remove_item, cl_list_insert_head +*********/ + +/****f* Component Library: List/cl_list_remove_all +* NAME +* cl_list_remove_all +* +* DESCRIPTION +* The cl_list_remove_all function removes all objects from a list, +* leaving it empty. +* +* SYNOPSIS +*/ +static inline void cl_list_remove_all(IN cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + /* Return all the list items to the pool. */ + cl_qpool_put_list(&p_list->list_item_pool, &p_list->list); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure from which to remove all objects. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* List, cl_list_remove_head, cl_list_remove_tail, cl_list_remove_object, +* cl_list_remove_item +*********/ + +/****f* Component Library: List/cl_list_remove_object +* NAME +* cl_list_remove_object +* +* DESCRIPTION +* The cl_list_remove_object function removes a specific object from a list. +* +* SYNOPSIS +*/ +cl_status_t +cl_list_remove_object(IN cl_list_t * const p_list, + IN const void *const p_object); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure from which to remove the object. +* +* p_object +* [in] Pointer to an object to remove from the list. +* +* RETURN VALUES +* CL_SUCCESS if the object was removed. +* +* CL_NOT_FOUND if the object was not found in the list. +* +* NOTES +* Removes the first occurrence of an object from a list. +* +* SEE ALSO +* List, cl_list_remove_item, cl_list_remove_head, cl_list_remove_tail, +* cl_list_remove_all +*********/ + +/****f* Component Library: List/cl_list_remove_item +* NAME +* cl_list_remove_item +* +* DESCRIPTION +* The cl_list_remove_item function removes an object from the head of a list. +* +* SYNOPSIS +*/ +static inline void +cl_list_remove_item(IN cl_list_t * const p_list, IN cl_list_iterator_t iterator) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + cl_qlist_remove_item(&p_list->list, (cl_list_item_t *) iterator); + + /* Place the list item back into the pool. */ + cl_qpool_put(&p_list->list_item_pool, (cl_pool_item_t *) iterator); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure from which to remove the item. +* +* iterator +* [in] cl_list_iterator_t returned by a previous call to cl_list_head, +* cl_list_tail, cl_list_next, or cl_list_prev. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* List, cl_list_remove_object, cl_list_remove_head, cl_list_remove_tail, +* cl_list_remove_all +*********/ + +/****f* Component Library: List/cl_is_object_in_list +* NAME +* cl_is_object_in_list +* +* DESCRIPTION +* The cl_is_object_in_list function returns whether an object +* is stored in a list. +* +* SYNOPSIS +*/ +boolean_t +cl_is_object_in_list(IN const cl_list_t * const p_list, + IN const void *const p_object); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure in which to look for the object. +* +* p_object +* [in] Pointer to an object stored in a list. +* +* RETURN VALUES +* TRUE if p_object was found in the list. +* +* FALSE otherwise. +* +* SEE ALSO +* List +*********/ + +/****f* Component Library: List/cl_list_end +* NAME +* cl_list_end +* +* DESCRIPTION +* The cl_list_end function returns returns the list iterator for +* the end of a list. +* +* SYNOPSIS +*/ +static inline cl_list_iterator_t cl_list_end(IN const cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + return (cl_qlist_end(&p_list->list)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure for which the iterator for the +* object at the head is to be returned. +* +* RETURN VALUE +* cl_list_iterator_t for the end of the list. +* +* NOTES +* Use cl_list_obj to retrieve the object associated with the +* returned cl_list_iterator_t. +* +* SEE ALSO +* List, cl_list_head, cl_list_tail, cl_list_next, cl_list_prev, +* cl_list_obj +*********/ + +/****f* Component Library: List/cl_list_head +* NAME +* cl_list_head +* +* DESCRIPTION +* The cl_list_head function returns returns a list iterator for +* the head of a list. +* +* SYNOPSIS +*/ +static inline cl_list_iterator_t cl_list_head(IN const cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + return (cl_qlist_head(&p_list->list)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure for which the iterator for the +* object at the head is to be returned. +* +* RETURN VALUES +* cl_list_iterator_t for the head of the list. +* +* cl_list_iterator_t for the end of the list if the list is empty. +* +* NOTES +* Use cl_list_obj to retrieve the object associated with the +* returned cl_list_iterator_t. +* +* SEE ALSO +* List, cl_list_tail, cl_list_next, cl_list_prev, cl_list_end, +* cl_list_obj +*********/ + +/****f* Component Library: List/cl_list_tail +* NAME +* cl_list_tail +* +* DESCRIPTION +* The cl_list_tail function returns returns a list iterator for +* the tail of a list. +* +* SYNOPSIS +*/ +static inline cl_list_iterator_t cl_list_tail(IN const cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + return (cl_qlist_tail(&p_list->list)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure for which the iterator for the +* object at the tail is to be returned. +* +* RETURN VALUES +* cl_list_iterator_t for the tail of the list. +* +* cl_list_iterator_t for the end of the list if the list is empty. +* +* NOTES +* Use cl_list_obj to retrieve the object associated with the +* +* returned cl_list_iterator_t. +* +* SEE ALSO +* List, cl_list_head, cl_list_next, cl_list_prev, cl_list_end, +* cl_list_obj +*********/ + +/****f* Component Library: List/cl_list_next +* NAME +* cl_list_next +* +* DESCRIPTION +* The cl_list_next function returns a list iterator for the object stored +* in a list after the object associated with a given list iterator. +* +* SYNOPSIS +*/ +static inline cl_list_iterator_t cl_list_next(IN cl_list_iterator_t iterator) +{ + CL_ASSERT(iterator); + + return (cl_qlist_next(iterator)); +} + +/* +* PARAMETERS +* iterator +* [in] cl_list_iterator_t returned by a previous call to cl_list_head, +* cl_list_tail, cl_list_next, or cl_list_prev. +* +* RETURN VALUES +* cl_list_iterator_t for the object following the object associated with +* the list iterator specified by the iterator parameter. +* +* cl_list_iterator_t for the end of the list if the list is empty. +* +* NOTES +* Use cl_list_obj to retrieve the object associated with the +* returned cl_list_iterator_t. +* +* SEE ALSO +* List, cl_list_prev, cl_list_head, cl_list_tail, cl_list_end, +* cl_list_obj +*********/ + +/****f* Component Library: List/cl_list_prev +* NAME +* cl_list_prev +* +* DESCRIPTION +* The cl_list_prev function returns a list iterator for the object stored +* in a list before the object associated with a given list iterator. +* +* SYNOPSIS +*/ +static inline cl_list_iterator_t cl_list_prev(IN cl_list_iterator_t iterator) +{ + CL_ASSERT(iterator); + + return (cl_qlist_prev(iterator)); +} + +/* +* PARAMETERS +* iterator +* [in] cl_list_iterator_t returned by a previous call to cl_list_head, +* cl_list_tail, cl_list_next, or cl_list_prev. +* +* RETURN VALUES +* cl_list_iterator_t for the object preceding the object associated with +* the list iterator specified by the iterator parameter. +* +* cl_list_iterator_t for the end of the list if the list is empty. +* +* NOTES +* Use cl_list_obj to retrieve the object associated with the +* returned cl_list_iterator_t. +* +* SEE ALSO +* List, cl_list_next, cl_list_head, cl_list_tail, cl_list_end, +* cl_list_obj +*********/ + +/****f* Component Library: List/cl_list_obj +* NAME +* cl_list_obj +* +* DESCRIPTION +* The cl_list_obj function returns the object associated +* with a list iterator. +* +* SYNOPSIS +*/ +static inline void *cl_list_obj(IN cl_list_iterator_t iterator) +{ + CL_ASSERT(iterator); + + return ((void *)((cl_pool_obj_t *) iterator)->p_object); +} + +/* +* PARAMETERS +* iterator +* [in] cl_list_iterator_t returned by a previous call to cl_list_head, +* cl_list_tail, cl_list_next, or cl_list_prev whose object is requested. +* +* RETURN VALUE +* Pointer to the object associated with the list iterator specified +* by the iterator parameter. +* +* SEE ALSO +* List, cl_list_head, cl_list_tail, cl_list_next, cl_list_prev +*********/ + +/****f* Component Library: List/cl_list_find_from_head +* NAME +* cl_list_find_from_head +* +* DESCRIPTION +* The cl_list_find_from_head function uses a specified function +* to search for an object starting from the head of a list. +* +* SYNOPSIS +*/ +cl_list_iterator_t +cl_list_find_from_head(IN const cl_list_t * const p_list, + IN cl_pfn_list_find_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure to search. +* +* pfn_func +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_list_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUES +* Returns the iterator for the object if found. +* +* Returns the iterator for the list end otherwise. +* +* NOTES +* cl_list_find_from_head does not remove the found object from +* the list. The iterator for the object is returned when the function +* provided by the pfn_func parameter returns CL_SUCCESS. The function +* specified by the pfn_func parameter must not perform any list +* operations as these would corrupt the list. +* +* SEE ALSO +* List, cl_list_find_from_tail, cl_list_apply_func_t, +* cl_pfn_list_find_t +*********/ + +/****f* Component Library: List/cl_list_find_from_tail +* NAME +* cl_list_find_from_tail +* +* DESCRIPTION +* The cl_list_find_from_tail function uses a specified function +* to search for an object starting from the tail of a list. +* +* SYNOPSIS +*/ +cl_list_iterator_t +cl_list_find_from_tail(IN const cl_list_t * const p_list, + IN cl_pfn_list_find_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure to search. +* +* pfn_func +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_list_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUES +* Returns the iterator for the object if found. +* +* Returns the iterator for the list end otherwise. +* +* NOTES +* cl_list_find_from_tail does not remove the found object from +* the list. The iterator for the object is returned when the function +* provided by the pfn_func parameter returns CL_SUCCESS. The function +* specified by the pfn_func parameter must not perform any list +* operations as these would corrupt the list. +* +* SEE ALSO +* List, cl_list_find_from_head, cl_list_apply_func_t, +* cl_pfn_list_find_t +*********/ + +/****f* Component Library: List/cl_list_apply_func +* NAME +* cl_list_apply_func +* +* DESCRIPTION +* The cl_list_apply_func function executes a specified function for every +* object stored in a list. +* +* SYNOPSIS +*/ +void +cl_list_apply_func(IN const cl_list_t * const p_list, + IN cl_pfn_list_apply_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure to iterate. +* +* pfn_func +* [in] Function invoked for every item in a list. +* See the cl_pfn_list_apply_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_list_apply_func invokes the specified callback function for every +* object stored in the list, starting from the head. The function specified +* by the pfn_func parameter must not perform any list operations as these +* would corrupt the list. +* +* SEE ALSO +* List, cl_list_find_from_head, cl_list_find_from_tail, +* cl_pfn_list_apply_t +*********/ + +/****f* Component Library: List/cl_list_count +* NAME +* cl_list_count +* +* DESCRIPTION +* The cl_list_count function returns the number of objects stored in a list. +* +* SYNOPSIS +*/ +static inline size_t cl_list_count(IN const cl_list_t * const p_list) +{ + CL_ASSERT(p_list); + CL_ASSERT(cl_is_qpool_inited(&p_list->list_item_pool)); + + return (cl_qlist_count(&p_list->list)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_list_t structure whose object to count. +* +* RETURN VALUES +* Number of objects stored in the specified list. +* +* SEE ALSO +* List +*********/ + +END_C_DECLS +#endif /* _CL_LIST_H_ */ diff --git a/include/complib/cl_log.h b/include/complib/cl_log.h new file mode 100644 index 0000000..45c7ec3 --- /dev/null +++ b/include/complib/cl_log.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of logging mechanisms. + */ + +#ifndef _CL_LOG_H_ +#define _CL_LOG_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Log Provider +* NAME +* Log Provider +* +* DESCRIPTION +* The log provider allows users to log information in a system log instead of +* the console or debugger target. +**********/ +/****d* Component Library: Log Provider/cl_log_type_t +* NAME +* cl_log_type_t +* +* DESCRIPTION +* The cl_log_type_t enumerated type is used to differentiate between +* different types of log entries. +* +* SYNOPSIS +*/ +typedef enum _cl_log_type { + CL_LOG_INFO, + CL_LOG_WARN, + CL_LOG_ERROR +} cl_log_type_t; +/* +* VALUES +* CL_LOG_INFO +* Indicates a log entry is purely informational. +* +* CL_LOG_WARN +* Indicates a log entry is a warning but non-fatal. +* +* CL_LOG_ERROR +* Indicates a log entry is a fatal error. +* +* SEE ALSO +* Log Provider, cl_log_event +*********/ + +/****f* Component Library: Log Provider/cl_log_event +* NAME +* cl_log_event +* +* DESCRIPTION +* The cl_log_event function adds a new entry to the system log. +* +* SYNOPSIS +*/ +void +cl_log_event(IN const char *const name, + IN const cl_log_type_t type, + IN const char *const message, + IN const void *const p_data OPTIONAL, IN const uint32_t data_len); +/* +* PARAMETERS +* name +* [in] Pointer to an ANSI string containing the name of the source for +* the log entry. +* +* type +* [in] Defines the type of log entry to add to the system log. +* See the definition of cl_log_type_t for acceptable values. +* +* message +* [in] Pointer to an ANSI string containing the text for the log entry. +* The message should not be terminated with a new line, as the log +* provider appends a new line to all log entries. +* +* p_data +* [in] Optional pointer to data providing context for the log entry. +* At most 256 bytes of data can be successfully logged. +* +* data_len +* [in] Length of the buffer pointed to by the p_data parameter. Ignored +* if p_data is NULL. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* If the data length exceeds the maximum supported, the event is logged +* without its accompanying data. +* +* SEE ALSO +* Log Provider, cl_log_type_t +*********/ + +END_C_DECLS +#endif /* _CL_LOG_H_ */ diff --git a/include/complib/cl_map.h b/include/complib/cl_map.h new file mode 100644 index 0000000..5bf779c --- /dev/null +++ b/include/complib/cl_map.h @@ -0,0 +1,846 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of map, a binary tree. + */ + +#ifndef _CL_MAP_H_ +#define _CL_MAP_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Map +* NAME +* Map +* +* DESCRIPTION +* Map implements a binary tree that stores user objects. Each item stored +* in a map has a unique 64-bit key (duplicates are not allowed). Map +* provides the ability to efficiently search for an item given a key. +* +* Map may allocate memory when inserting objects, and can therefore fail +* operations due to insufficient memory. Use quick map in situations +* where such insertion failures cannot be tolerated. +* +* Map is not thread safe, and users must provide serialization when adding +* and removing items from the map. +* +* The map functions operates on a cl_map_t structure which should be treated +* as opaque and should be manipulated only through the provided functions. +* +* SEE ALSO +* Types: +* cl_map_iterator_t +* +* Structures: +* cl_map_t, cl_map_item_t, cl_map_obj_t +* +* Item Manipulation: +* cl_map_obj, cl_map_key +* +* Initialization: +* cl_map_construct, cl_map_init, cl_map_destroy +* +* Iteration: +* cl_map_end, cl_map_head, cl_map_tail, cl_map_next, cl_map_prev +* +* Manipulation +* cl_map_insert, cl_map_get, cl_map_remove_item, cl_map_remove, +* cl_map_remove_all, cl_map_merge, cl_map_delta, cl_map_get_next +* +* Attributes: +* cl_map_count, cl_is_map_empty, cl_is_map_inited +*********/ +/****s* Component Library: Map/cl_map_t +* NAME +* cl_map_t +* +* DESCRIPTION +* Quick map structure. +* +* The cl_map_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_map { + cl_qmap_t qmap; + cl_qpool_t pool; +} cl_map_t; +/* +* FIELDS +* qmap +* Quick map object that maintains the map. +* +* pool +* Pool of cl_map_obj_t structures used to store user objects +* in the map. +* +* SEE ALSO +* Map, cl_map_obj_t +*********/ + +/****d* Component Library: Map/cl_map_iterator_t +* NAME +* cl_map_iterator_t +* +* DESCRIPTION +* Iterator type used to walk a map. +* +* SYNOPSIS +*/ +typedef const cl_map_item_t *cl_map_iterator_t; +/* +* NOTES +* The iterator should be treated as opaque to prevent corrupting the map. +* +* SEE ALSO +* Map, cl_map_head, cl_map_tail, cl_map_next, cl_map_prev, cl_map_key +*********/ + +/****f* Component Library: Map/cl_map_count +* NAME +* cl_map_count +* +* DESCRIPTION +* The cl_map_count function returns the number of items stored +* in a map. +* +* SYNOPSIS +*/ +static inline size_t cl_map_count(IN const cl_map_t * const p_map) +{ + CL_ASSERT(p_map); + return (cl_qmap_count(&p_map->qmap)); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a map whose item count to return. +* +* RETURN VALUE +* Returns the number of items stored in the map. +* +* SEE ALSO +* Map, cl_is_map_empty +*********/ + +/****f* Component Library: Map/cl_is_map_empty +* NAME +* cl_is_map_empty +* +* DESCRIPTION +* The cl_is_map_empty function returns whether a map is empty. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_map_empty(IN const cl_map_t * const p_map) +{ + CL_ASSERT(p_map); + return (cl_is_qmap_empty(&p_map->qmap)); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a map to test for emptiness. +* +* RETURN VALUES +* TRUE if the map is empty. +* +* FALSE otherwise. +* +* SEE ALSO +* Map, cl_map_count, cl_map_remove_all +*********/ + +/****f* Component Library: Map/cl_map_key +* NAME +* cl_map_key +* +* DESCRIPTION +* The cl_map_key function retrieves the key value of a map item. +* +* SYNOPSIS +*/ +static inline uint64_t cl_map_key(IN const cl_map_iterator_t itor) +{ + return (cl_qmap_key(itor)); +} + +/* +* PARAMETERS +* itor +* [in] Iterator for the item whose key to return. +* +* RETURN VALUE +* Returns the 64-bit key value for the specified iterator. +* +* NOTES +* The iterator specified by the itor parameter must have been retrived by +* a previous call to cl_map_head, cl_map_tail, cl_map_next, or cl_map_prev. +* +* The key value is set in a call to cl_map_insert. +* +* SEE ALSO +* Map, cl_map_insert, cl_map_head, cl_map_tail, cl_map_next, cl_map_prev +*********/ + +/****f* Component Library: Map/cl_map_construct +* NAME +* cl_map_construct +* +* DESCRIPTION +* The cl_map_construct function constructs a map. +* +* SYNOPSIS +*/ +void cl_map_construct(IN cl_map_t * const p_map); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_map_t structure to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_map_init, cl_map_destroy, and cl_is_map_inited. +* +* Calling cl_map_construct is a prerequisite to calling any other +* map function except cl_map_init. +* +* SEE ALSO +* Map, cl_map_init, cl_map_destroy, cl_is_map_inited +*********/ + +/****f* Component Library: Event/cl_is_map_inited +* NAME +* cl_is_map_inited +* +* DESCRIPTION +* The cl_is_map_inited function returns whether a map was +* successfully initialized. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_map_inited(IN const cl_map_t * const p_map) +{ + /* + * The map's pool of map items is the last thing initialized. + * We can therefore use it to test for initialization. + */ + return (cl_is_qpool_inited(&p_map->pool)); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_map_t structure whose initialization state +* to check. +* +* RETURN VALUES +* TRUE if the map was initialized successfully. +* +* FALSE otherwise. +* +* NOTES +* Allows checking the state of a map to determine if invoking +* member functions is appropriate. +* +* SEE ALSO +* Map +*********/ + +/****f* Component Library: Map/cl_map_init +* NAME +* cl_map_init +* +* DESCRIPTION +* The cl_map_init function initialized a map for use. +* +* SYNOPSIS +*/ +cl_status_t cl_map_init(IN cl_map_t * const p_map, IN const uint32_t min_items); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_map_t structure to initialize. +* +* min_items +* [in] Minimum number of items that can be stored. All necessary +* allocations to allow storing the minimum number of items is +* performed at initialization time. +* +* RETURN VALUES +* CL_SUCCESS if the map was initialized successfully. +* +* NOTES +* Allows calling map manipulation functions. +* +* SEE ALSO +* Map, cl_map_destroy, cl_map_insert, cl_map_remove +*********/ + +/****f* Component Library: Map/cl_map_destroy +* NAME +* cl_map_destroy +* +* DESCRIPTION +* The cl_map_destroy function destroys a map. +* +* SYNOPSIS +*/ +void cl_map_destroy(IN cl_map_t * const p_map); +/* +* PARAMETERS +* p_map +* [in] Pointer to a map to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified map. Further +* operations should not be attempted on the map. cl_map_destroy does +* not affect any of the objects stored in the map. +* This function should only be called after a call to cl_map_construct. +* +* In debug builds, cl_map_destroy asserts that the map is empty. +* +* SEE ALSO +* Map, cl_map_construct, cl_map_init +*********/ + +/****f* Component Library: Map/cl_map_end +* NAME +* cl_map_end +* +* DESCRIPTION +* The cl_map_end function returns the iterator for the end of a map. +* +* SYNOPSIS +*/ +static inline cl_map_iterator_t cl_map_end(IN const cl_map_t * const p_map) +{ + CL_ASSERT(p_map); + return (cl_qmap_end(&p_map->qmap)); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_map_t structure whose end to return. +* +* RETURN VALUE +* Iterator for the end of the map. +* +* NOTES +* cl_map_end is useful for determining the validity of map items returned +* by cl_map_head, cl_map_tail, cl_map_next, cl_map_prev. If the iterator +* by any of these functions compares to the end, the end of the map was +* encoutered. +* When using cl_map_head or cl_map_tail, this condition indicates that +* the map is empty. +* +* SEE ALSO +* Map, cl_qmap_head, cl_qmap_tail, cl_qmap_next, cl_qmap_prev +*********/ + +/****f* Component Library: Map/cl_map_head +* NAME +* cl_map_head +* +* DESCRIPTION +* The cl_map_head function returns the map item with the lowest key +* value stored in a map. +* +* SYNOPSIS +*/ +static inline cl_map_iterator_t cl_map_head(IN const cl_map_t * const p_map) +{ + CL_ASSERT(p_map); + return (cl_qmap_head(&p_map->qmap)); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a map whose item with the lowest key is returned. +* +* RETURN VALUES +* Iterator for the object with the lowest key in the map. +* +* Iterator for the map end if the map was empty. +* +* NOTES +* cl_map_head does not remove the object from the map. +* +* SEE ALSO +* Map, cl_map_tail, cl_map_next, cl_map_prev, cl_map_end +*********/ + +/****f* Component Library: Map/cl_map_tail +* NAME +* cl_map_tail +* +* DESCRIPTION +* The cl_map_tail function returns the map item with the highest key +* value stored in a map. +* +* SYNOPSIS +*/ +static inline cl_map_iterator_t cl_map_tail(IN const cl_map_t * const p_map) +{ + CL_ASSERT(p_map); + return (cl_qmap_tail(&p_map->qmap)); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a map whose item with the highest key +* is returned. +* +* RETURN VALUES +* Iterator for the object with the highest key in the map. +* +* Iterator for the map end if the map was empty. +* +* NOTES +* cl_map_end does no remove the object from the map. +* +* SEE ALSO +* Map, cl_map_head, cl_map_next, cl_map_prev, cl_map_end +*********/ + +/****f* Component Library: Map/cl_map_next +* NAME +* cl_map_next +* +* DESCRIPTION +* The cl_map_next function returns the map item with the next higher +* key value than a specified map item. +* +* SYNOPSIS +*/ +static inline cl_map_iterator_t cl_map_next(IN const cl_map_iterator_t itor) +{ + CL_ASSERT(itor); + return (cl_qmap_next(itor)); +} + +/* +* PARAMETERS +* itor +* [in] Iterator for an object in a map whose successor to return. +* +* RETURN VALUES +* Iterator for the object with the next higher key value in a map. +* +* Iterator for the map end if the specified object was the last item in +* the map. +* +* NOTES +* The iterator must have been retrieved by a previous call to cl_map_head, +* cl_map_tail, cl_map_next, or cl_map_prev. +* +* SEE ALSO +* Map, cl_map_head, cl_map_tail, cl_map_prev, cl_map_end +*********/ + +/****f* Component Library: Map/cl_map_prev +* NAME +* cl_map_prev +* +* DESCRIPTION +* The cl_map_prev function returns the map item with the next lower +* key value than a precified map item. +* +* SYNOPSIS +*/ +static inline cl_map_iterator_t cl_map_prev(IN const cl_map_iterator_t itor) +{ + CL_ASSERT(itor); + return (cl_qmap_prev(itor)); +} + +/* +* PARAMETERS +* itor +* [in] Iterator for an object in a map whose predecessor to return. +* +* RETURN VALUES +* Iterator for the object with the next lower key value in a map. +* +* Iterator for the map end if the specified object was the first item in +* the map. +* +* NOTES +* The iterator must have been retrieved by a previous call to cl_map_head, +* cl_map_tail, cl_map_next, or cl_map_prev. +* +* SEE ALSO +* Map, cl_map_head, cl_map_tail, cl_map_next, cl_map_end +*********/ + +/****f* Component Library: Map/cl_map_insert +* NAME +* cl_map_insert +* +* DESCRIPTION +* The cl_map_insert function inserts a map item into a map. +* +* SYNOPSIS +*/ +void *cl_map_insert(IN cl_map_t * const p_map, + IN const uint64_t key, IN const void *const p_object); +/* +* PARAMETERS +* p_map +* [in] Pointer to a map into which to add the item. +* +* key +* [in] Value to associate with the object. +* +* p_object +* [in] Pointer to an object to insert into the map. +* +* RETURN VALUES +* Pointer to the object in the map with the specified key after the call +* completes. +* +* NULL if there was not enough memory to insert the desired item. +* +* NOTES +* Insertion operations may cause the map to rebalance. +* +* If the map already contains an object already with the specified key, +* that object will not be replaced and the pointer to that object is +* returned. +* +* SEE ALSO +* Map, cl_map_remove, cl_map_item_t +*********/ + +/****f* Component Library: Map/cl_map_get +* NAME +* cl_map_get +* +* DESCRIPTION +* The cl_map_get function returns the object associated with a key. +* +* SYNOPSIS +*/ +void *cl_map_get(IN const cl_map_t * const p_map, IN const uint64_t key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a map from which to retrieve the object with +* the specified key. +* +* key +* [in] Key value used to search for the desired object. +* +* RETURN VALUES +* Pointer to the object with the desired key value. +* +* NULL if there was no item with the desired key value stored in +* the map. +* +* NOTES +* cl_map_get does not remove the item from the map. +* +* SEE ALSO +* Map, cl_map_remove, cl_map_get_next +*********/ + +/****f* Component Library: Map/cl_map_get_next +* NAME +* cl_map_get_next +* +* DESCRIPTION +* The cl_qmap_get_next function returns the first object associated with a +* key > the key specified. +* +* SYNOPSIS +*/ +void *cl_map_get_next(IN const cl_map_t * const p_map, IN const uint64_t key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a map from which to retrieve the object with +* the specified key. +* +* key +* [in] Key value used to search for the desired object. +* +* RETURN VALUES +* Pointer to the first object with a key > the desired key value. +* +* NULL if there was no item with a key > the desired key +* value stored in the map. +* +* NOTES +* cl_map_get does not remove the item from the map. +* +* SEE ALSO +* Map, cl_map_remove, cl_map_get +*********/ + +/****f* Component Library: Map/cl_map_remove_item +* NAME +* cl_map_remove_item +* +* DESCRIPTION +* The cl_map_remove_item function removes the specified map item +* from a map. +* +* SYNOPSIS +*/ +void +cl_map_remove_item(IN cl_map_t * const p_map, IN const cl_map_iterator_t itor); +/* +* PARAMETERS +* p_map +* [in] Pointer to a map from which to remove the object associated +* with the specified iterator. +* +* itor +* [in] Iterator for an object to remove from its map. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Removes the object associated with the specifid iterator from its map. +* +* The specified iterator is no longer valid after the call completes. +* +* The iterator must have been retrieved by a previous call to cl_map_head, +* cl_map_tail, cl_map_next, or cl_map_prev. +* +* SEE ALSO +* Map, cl_map_remove, cl_map_remove_all, cl_map_insert, cl_map_head, +* cl_map_tail, cl_map_next, cl_map_prev +*********/ + +/****f* Component Library: Map/cl_map_remove +* NAME +* cl_map_remove +* +* DESCRIPTION +* The cl_map_remove function removes the map item with the specified key +* from a map. +* +* SYNOPSIS +*/ +void *cl_map_remove(IN cl_map_t * const p_map, IN const uint64_t key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_map_t structure from which to remove the +* item with the specified key. +* +* key +* [in] Key value used to search for the object to remove. +* +* RETURN VALUES +* Pointer to the object associated with the specified key if +* it was found and removed. +* +* NULL if no object with the specified key exists in the map. +* +* SEE ALSO +* Map, cl_map_remove_item, cl_map_remove_all, cl_map_insert +*********/ + +/****f* Component Library: Map/cl_map_remove_all +* NAME +* cl_map_remove_all +* +* DESCRIPTION +* The cl_map_remove_all function removes all objects from a map, +* leaving it empty. +* +* SYNOPSIS +*/ +void cl_map_remove_all(IN cl_map_t * const p_map); +/* +* PARAMETERS +* p_map +* [in] Pointer to a map to empty. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Map, cl_map_remove, cl_map_remove_item +*********/ + +/****f* Component Library: Map/cl_map_obj +* NAME +* cl_map_obj +* +* DESCRIPTION +* The cl_map_obj function returns the object associated with an iterator. +* +* SYNOPSIS +*/ +static inline void *cl_map_obj(IN const cl_map_iterator_t itor) +{ + return (cl_qmap_obj(PARENT_STRUCT(itor, cl_map_obj_t, item))); +} + +/* +* PARAMETERS +* itor +* [in] Iterator whose object to return. +* +* RETURN VALUES +* Returns the value of the object pointer associated with the iterator. +* +* The iterator must have been retrieved by a previous call to cl_map_head, +* cl_map_tail, cl_map_next, or cl_map_prev. +* +* SEE ALSO +* Map, cl_map_head, cl_map_tail, cl_map_next, cl_map_prev +*********/ + +/****f* Component Library: Map/cl_map_merge +* NAME +* cl_map_merge +* +* DESCRIPTION +* The cl_map_merge function moves all items from one map to another, +* excluding duplicates. +* +* SYNOPSIS +*/ +cl_status_t +cl_map_merge(OUT cl_map_t * const p_dest_map, + IN OUT cl_map_t * const p_src_map); +/* +* PARAMETERS +* p_dest_map +* [out] Pointer to a cl_map_t structure to which items should be added. +* +* p_src_map +* [in/out] Pointer to a cl_map_t structure whose items to add +* to p_dest_map. +* +* RETURN VALUES +* CL_SUCCESS if the operation succeeded. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the operation +* to succeed. +* +* NOTES +* Items are evaluated based on their keys only. +* +* Upon return from cl_map_merge, the map referenced by p_src_map contains +* all duplicate items. +* +* SEE ALSO +* Map, cl_map_delta +*********/ + +/****f* Component Library: Map/cl_map_delta +* NAME +* cl_map_delta +* +* DESCRIPTION +* The cl_map_delta function computes the differences between two maps. +* +* SYNOPSIS +*/ +cl_status_t +cl_map_delta(IN OUT cl_map_t * const p_map1, + IN OUT cl_map_t * const p_map2, + OUT cl_map_t * const p_new, OUT cl_map_t * const p_old); +/* +* PARAMETERS +* p_map1 +* [in/out] Pointer to the first of two cl_map_t structures whose +* differences to compute. +* +* p_map2 +* [in/out] Pointer to the second of two cl_map_t structures whose +* differences to compute. +* +* p_new +* [out] Pointer to an empty cl_map_t structure that contains the +* items unique to p_map2 upon return from the function. +* +* p_old +* [out] Pointer to an empty cl_map_t structure that contains the +* items unique to p_map1 upon return from the function. +* +* RETURN VALUES +* CL_SUCCESS if the operation succeeded. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory for the operation +* to succeed. +* +* NOTES +* Items are evaluated based on their keys. Items that exist in both +* p_map1 and p_map2 remain in their respective maps. Items that +* exist only p_map1 are moved to p_old. Likewise, items that exist only +* in p_map2 are moved to p_new. This function can be useful in evaluating +* changes between two maps. +* +* Both maps pointed to by p_new and p_old must be empty on input. +* +* Upon failure, all input maps are restored to their original state. +* +* SEE ALSO +* Map, cl_map_merge +*********/ + +END_C_DECLS +#endif /* _CL_MAP_H_ */ diff --git a/include/complib/cl_math.h b/include/complib/cl_math.h new file mode 100644 index 0000000..47489bd --- /dev/null +++ b/include/complib/cl_math.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Defines standard math related macros and functions. + */ + +#ifndef _CL_MATH_H_ +#define _CL_MATH_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****d* Component Library: Math/MAX +* NAME +* MAX +* +* DESCRIPTION +* The MAX macro returns the greater of two values. +* +* SYNOPSIS +* MAX( x, y ); +* +* PARAMETERS +* x +* [in] First of two values to compare. +* +* y +* [in] Second of two values to compare. +* +* RETURN VALUE +* Returns the greater of the x and y parameters. +* +* SEE ALSO +* MIN, ROUNDUP +*********/ +#ifndef MAX +#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#endif +/****d* Component Library: Math/MIN +* NAME +* MIN +* +* DESCRIPTION +* The MIN macro returns the greater of two values. +* +* SYNOPSIS +* MIN( x, y ); +* +* PARAMETERS +* x +* [in] First of two values to compare. +* +* y +* [in] Second of two values to compare. +* +* RETURN VALUE +* Returns the lesser of the x and y parameters. +* +* SEE ALSO +* MAX, ROUNDUP +*********/ +#ifndef MIN +#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#endif +/****d* Component Library: Math/ROUNDUP +* NAME +* ROUNDUP +* +* DESCRIPTION +* The ROUNDUP macro rounds a value up to a given multiple. +* +* SYNOPSIS +* ROUNDUP( val, align ); +* +* PARAMETERS +* val +* [in] Value that is to be rounded up. The type of the value is +* indeterminate, but must be at most the size of a natural integer +* for the platform. +* +* align +* [in] Multiple to which the val parameter must be rounded up. +* +* RETURN VALUE +* Returns a value that is the input value specified by val rounded up to +* the nearest multiple of align. +* +* NOTES +* The value provided must be of a type at most the size of a natural integer. +*********/ +#ifndef ROUNDUP +#define ROUNDUP(val, align) \ + ((((val) / (align))*(align)) + (((val) % (align)) ? (align) : 0)) +#endif +END_C_DECLS +#endif /* _CL_MATH_H_ */ diff --git a/include/complib/cl_nodenamemap.h b/include/complib/cl_nodenamemap.h new file mode 100644 index 0000000..bfe290b --- /dev/null +++ b/include/complib/cl_nodenamemap.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 Lawrence Livermore National Lab + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _CL_NODE_NAME_MAP_H_ +#define _CL_NODE_NAME_MAP_H_ + +#include +#include +#include + +/* NOTE: this may modify the parameter "nodedesc". */ +char *clean_nodedesc(char *nodedesc); + +typedef struct _name_map_item { + cl_map_item_t item; + uint64_t guid; + char *name; +} name_map_item_t; + +typedef cl_qmap_t nn_map_t; + +/** + * Node name map interface. + * It is OK to pass NULL for the node_name_map[_fp] parameters. + */ +nn_map_t *open_node_name_map(const char *node_name_map); +void close_node_name_map(nn_map_t *map); +char *remap_node_name(nn_map_t *map, uint64_t target_guid, char *nodedesc); + /* NOTE: parameter "nodedesc" may be modified here. */ +int parse_node_map(const char *file_name, + int (*create)(void *, uint64_t, char *), void *cxt); + +#endif /* _CL_NODE_NAME_MAP_H_ */ diff --git a/include/complib/cl_packoff.h b/include/complib/cl_packoff.h new file mode 100644 index 0000000..52ee381 --- /dev/null +++ b/include/complib/cl_packoff.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Turns off byte packing, which is necessary for passing information from + * system to system over a network to ensure no padding by the compiler has + * taken place. + */ + +#ifdef PACK_SUFFIX +#undef PACK_SUFFIX +#endif + +#ifdef _MSC_VER +#pragma pack (pop) +#endif diff --git a/include/complib/cl_packon.h b/include/complib/cl_packon.h new file mode 100644 index 0000000..e2e45b4 --- /dev/null +++ b/include/complib/cl_packon.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Turns on byte packing, which is necessary for passing information from + * system to system over a network to ensure no padding by the compiler has + * taken place. + */ + +/****h* Component Library/Structure Packing +* NAME +* Structure Packing +* +* DESCRIPTION +* The structure packing header files allow packing structures on byte +* boundaries. +* +* Structure packing should be used whenever a structure is transmitted +* between systems, as different platforms pad structures differently if +* they are not packed. Packing a structure that is not transmitted between +* systems can be detrimental to performance, as fields in the structure may +* not align properly for some platforms. Care must be taken when creating +* packed structures that the alignment rules for all platforms are followed. +* +* To pack a structure, include cl_packon.h before defining the structure, and +* include cl_packoff.h after the structure definition. Multiple structures +* can be packed between the two include statements if desired. +* +* The structure definition itself must use the PACK_SUFFIX keyword. +* +* EXAMPLE +* #include +* +* typedef _my_struct_t +* { +* uint64 large; +* uint32 medium; +* uint16 small; +* +* } PACK_SUFFIX my_struct_t; +* #include +*********/ + +#ifndef PACK_SUFFIX +#define PACK_SUFFIX __attribute__((packed)) +#endif + +#ifdef _MSC_VER +#pragma pack (push, 1) +#endif diff --git a/include/complib/cl_passivelock.h b/include/complib/cl_passivelock.h new file mode 100644 index 0000000..0fad6ef --- /dev/null +++ b/include/complib/cl_passivelock.h @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * This file contains the passive lock, which synchronizes passive threads. + * The passive lock allows multiple readers to access a resource + * simultaneously, exclusive from a single thread allowed writing. + * Several writer threads are allowed - but only one can write at a given time + */ + +#ifndef _CL_PASSIVE_LOCK_H_ +#define _CL_PASSIVE_LOCK_H_ +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Passive Lock +* NAME +* Passive Lock +* +* DESCRIPTION +* The Passive Lock provides synchronization between multiple threads that +* are sharing the lock with a single thread holding the lock exclusively. +* +* Passive lock works exclusively between threads and cannot be used in +* situations where the caller cannot be put into a waiting state. +* +* The passive lock functions operate a cl_plock_t structure which should +* be treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_plock_t +* +* Initialization: +* cl_plock_construct, cl_plock_init, cl_plock_destroy +* +* Manipulation +* cl_plock_acquire, cl_plock_excl_acquire, cl_plock_release +*********/ +/****s* Component Library: Passive Lock/cl_plock_t +* NAME +* cl_plock_t +* +* DESCRIPTION +* Passive Lock structure. +* +* The cl_plock_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_plock { + pthread_rwlock_t lock; + cl_state_t state; +} cl_plock_t; +/* +* FIELDS +* lock +* Pthread RWLOCK object +* +* state +* Records the current state of the lock, such as initialized, +* destroying, etc. +* +* SEE ALSO +* Passive Lock +*********/ + +/****f* Component Library: Passive Lock/cl_plock_construct +* NAME +* cl_plock_construct +* +* DESCRIPTION +* The cl_plock_construct function initializes the state of a +* passive lock. +* +* SYNOPSIS +*/ +static inline void cl_plock_construct(IN cl_plock_t * const p_lock) +{ + CL_ASSERT(p_lock); + + p_lock->state = CL_UNINITIALIZED; +} + +/* +* PARAMETERS +* p_lock +* [in] Pointer to a cl_plock_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_plock_destroy without first calling cl_plock_init. +* +* Calling cl_plock_construct is a prerequisite to calling any other +* passive lock function except cl_plock_init. +* +* SEE ALSO +* Passive Lock, cl_plock_init, cl_plock_destroy +*********/ + +/****f* Component Library: Passive Lock/cl_plock_destroy +* NAME +* cl_plock_destroy +* +* DESCRIPTION +* The cl_plock_destroy function performs any necessary cleanup +* of a passive lock. +* +* SYNOPSIS +*/ +static inline void cl_plock_destroy(IN cl_plock_t * const p_lock) +{ + CL_ASSERT(p_lock); + p_lock->state = CL_DESTROYING; + pthread_rwlock_destroy(&p_lock->lock); + p_lock->state = CL_DESTROYED; +} + +/* +* PARAMETERS +* p_lock +* [in] Pointer to a cl_plock_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_plock_destroy performs any necessary cleanup of the specified +* passive lock. +* +* This function must only be called if cl_plock_construct or +* cl_plock_init has been called. The passive lock must not be held +* when calling this function. +* +* SEE ALSO +* Passive Lock, cl_plock_construct, cl_plock_init +*********/ + +/****f* Component Library: Passive Lock/cl_plock_init +* NAME +* cl_plock_init +* +* DESCRIPTION +* The cl_plock_init function initializes a passive lock. +* +* SYNOPSIS +*/ +static inline cl_status_t cl_plock_init(IN cl_plock_t * const p_lock) +{ + cl_status_t status; + + CL_ASSERT(p_lock); + status = pthread_rwlock_init(&p_lock->lock, NULL); + if (status) + return CL_ERROR; + p_lock->state = CL_INITIALIZED; + return (CL_SUCCESS); +} + +/* +* PARAMETERS +* p_lock +* [in] Pointer to a cl_plock_t structure to initialize. +* +* RETURN VALUES +* CL_SUCCESS if the passive lock was initialized successfully. +* +* CL_ERROR otherwise. +* +* NOTES +* Allows calling cl_plock_acquire, cl_plock_release, +* cl_plock_excl_acquire +* +* SEE ALSO +* Passive Lock, cl_plock_construct, cl_plock_destroy, +* cl_plock_excl_acquire, cl_plock_acquire, cl_plock_release +*********/ + +/****f* Component Library: Passive Lock/cl_plock_acquire +* NAME +* cl_plock_acquire +* +* DESCRIPTION +* The cl_plock_acquire function acquires a passive lock for +* shared access. +* +* SYNOPSIS +*/ +static inline void cl_plock_acquire(IN cl_plock_t * const p_lock) +{ + cl_status_t __attribute__((unused)) status; + CL_ASSERT(p_lock); + CL_ASSERT(p_lock->state == CL_INITIALIZED); + + status = pthread_rwlock_rdlock(&p_lock->lock); + CL_ASSERT(status == 0); +} + +/* +* PARAMETERS +* p_lock +* [in] Pointer to a cl_plock_t structure to acquire. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Passive Lock, cl_plock_release, cl_plock_excl_acquire +*********/ + +/****f* Component Library: Passive Lock/cl_plock_excl_acquire +* NAME +* cl_plock_excl_acquire +* +* DESCRIPTION +* The cl_plock_excl_acquire function acquires exclusive access +* to a passive lock. +* +* SYNOPSIS +*/ +static inline void cl_plock_excl_acquire(IN cl_plock_t * const p_lock) +{ + cl_status_t __attribute__((unused)) status; + + CL_ASSERT(p_lock); + CL_ASSERT(p_lock->state == CL_INITIALIZED); + + status = pthread_rwlock_wrlock(&p_lock->lock); + CL_ASSERT(status == 0); +} + +/* +* PARAMETERS +* p_lock +* [in] Pointer to a cl_plock_t structure to acquire exclusively. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Passive Lock, cl_plock_release, cl_plock_acquire +*********/ + +/****f* Component Library: Passive Lock/cl_plock_release +* NAME +* cl_plock_release +* +* DESCRIPTION +* The cl_plock_release function releases a passive lock from +* shared or exclusive access. +* +* SYNOPSIS +*/ +static inline void cl_plock_release(IN cl_plock_t * const p_lock) +{ + cl_status_t __attribute__((unused)) status; + CL_ASSERT(p_lock); + CL_ASSERT(p_lock->state == CL_INITIALIZED); + + status = pthread_rwlock_unlock(&p_lock->lock); + CL_ASSERT(status == 0); +} + +/* +* PARAMETERS +* p_lock +* [in] Pointer to a cl_plock_t structure to release. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Passive Lock, cl_plock_acquire, cl_plock_excl_acquire +*********/ + +END_C_DECLS +#endif /* _CL_PASSIVE_LOCK_H_ */ diff --git a/include/complib/cl_pool.h b/include/complib/cl_pool.h new file mode 100644 index 0000000..f01de96 --- /dev/null +++ b/include/complib/cl_pool.h @@ -0,0 +1,561 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of the pool. + * The pool manages a pool of objects. + * The pool can grow to meet demand, limited only by system memory. + */ + +#ifndef _CL_POOL_H_ +#define _CL_POOL_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Pool +* NAME +* Pool +* +* DESCRIPTION +* The pool provides a self-contained and self-sustaining pool +* of user defined objects. +* +* To aid in object oriented design, the pool provides the user +* the ability to specify callbacks that are invoked for each object for +* construction, initialization, and destruction. Constructor and destructor +* callback functions may not fail. +* +* A pool does not return memory to the system as the user returns +* objects to the pool. The only method of returning memory to the system is +* to destroy the pool. +* +* The Pool functions operate on a cl_pool_t structure which should be treated +* as opaque and should be manipulated only through the provided functions. +* +* SEE ALSO +* Structures: +* cl_pool_t +* +* Callbacks: +* cl_pfn_pool_init_t, cl_pfn_pool_dtor_t +* +* Initialization/Destruction: +* cl_pool_construct, cl_pool_init, cl_pool_destroy +* +* Manipulation: +* cl_pool_get, cl_pool_put, cl_pool_grow +* +* Attributes: +* cl_is_pool_inited, cl_pool_count +*********/ +/****d* Component Library: Pool/cl_pfn_pool_init_t +* NAME +* cl_pfn_pool_init_t +* +* DESCRIPTION +* The cl_pfn_pool_init_t function type defines the prototype for +* functions used as initializers for objects being allocated by a +* pool. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_pool_init_t) (IN void *const p_object, IN void *context); +/* +* PARAMETERS +* p_object +* [in] Pointer to an object to initialize. +* +* context +* [in] Context provided in a call to cl_pool_init. +* +* RETURN VALUES +* Return CL_SUCCESS to indicates that initialization of the object +* was successful and initialization of further objects may continue. +* +* Other cl_status_t values will be returned by cl_pool_init +* and cl_pool_grow. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as an optional parameter to the +* cl_pool_init function. +* +* The initializer is invoked once per allocated object, allowing the user +* to trap initialization failures. Returning a status other than CL_SUCCESS +* aborts a grow operation, initiated either through cl_pool_init or +* cl_pool_grow, and causes the initiating function to fail. +* Any non-CL_SUCCESS status will be returned by the function that initiated +* the grow operation. +* +* SEE ALSO +* Pool, cl_pool_init, cl_pool_grow +*********/ + +/****d* Component Library: Pool/cl_pfn_pool_dtor_t +* NAME +* cl_pfn_pool_dtor_t +* +* DESCRIPTION +* The cl_pfn_pool_dtor_t function type defines the prototype for +* functions used as destructor for objects being deallocated by a +* pool. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_pool_dtor_t) (IN void *const p_object, IN void *context); +/* +* PARAMETERS +* p_object +* [in] Pointer to an object to destruct. +* +* context +* [in] Context provided in the call to cl_pool_init. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as an optional parameter to the +* cl_pool_init function. +* +* The destructor is invoked once per allocated object, allowing the user +* to perform any necessary cleanup. Users should not attempt to deallocate +* the memory for the object, as the pool manages object +* allocation and deallocation. +* +* SEE ALSO +* Pool, cl_pool_init +*********/ + +/****s* Component Library: Pool/cl_pool_t +* NAME +* cl_pool_t +* +* DESCRIPTION +* pool structure. +* +* The cl_pool_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_pool { + cl_qcpool_t qcpool; + cl_pfn_pool_init_t pfn_init; + cl_pfn_pool_dtor_t pfn_dtor; + const void *context; +} cl_pool_t; +/* +* FIELDS +* qcpool +* Quick composite pool that manages all objects. +* +* pfn_init +* Pointer to the user's initializer callback, used by the pool +* to translate the quick composite pool's initializer callback to +* a pool initializer callback. +* +* pfn_dtor +* Pointer to the user's destructor callback, used by the pool +* to translate the quick composite pool's destructor callback to +* a pool destructor callback. +* +* context +* User's provided context for callback functions, used by the pool +* to when invoking callbacks. +* +* SEE ALSO +* Pool +*********/ + +/****f* Component Library: Pool/cl_pool_construct +* NAME +* cl_pool_construct +* +* DESCRIPTION +* The cl_pool_construct function constructs a pool. +* +* SYNOPSIS +*/ +void cl_pool_construct(IN cl_pool_t * const p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_pool_init, cl_pool_destroy, and cl_is_pool_inited. +* +* Calling cl_pool_construct is a prerequisite to calling any other +* pool function except cl_pool_init. +* +* SEE ALSO +* Pool, cl_pool_init, cl_pool_destroy, cl_is_pool_inited +*********/ + +/****f* Component Library: Pool/cl_is_pool_inited +* NAME +* cl_is_pool_inited +* +* DESCRIPTION +* The cl_is_pool_inited function returns whether a pool was successfully +* initialized. +* +* SYNOPSIS +*/ +static inline uint32_t cl_is_pool_inited(IN const cl_pool_t * const p_pool) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_pool); + return (cl_is_qcpool_inited(&p_pool->qcpool)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure whose initialization state +* to check. +* +* RETURN VALUES +* TRUE if the pool was initialized successfully. +* +* FALSE otherwise. +* +* NOTES +* Allows checking the state of a pool to determine if invoking member +* functions is appropriate. +* +* SEE ALSO +* Pool +*********/ + +/****f* Component Library: Pool/cl_pool_init +* NAME +* cl_pool_init +* +* DESCRIPTION +* The cl_pool_init function initializes a pool for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_pool_init(IN cl_pool_t * const p_pool, + IN const size_t min_count, + IN const size_t max_count, + IN const size_t grow_size, + IN const size_t object_size, + IN cl_pfn_pool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_pool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure to initialize. +* +* min_count +* [in] Minimum number of objects that the pool should support. All +* necessary allocations to allow storing the minimum number of items +* are performed at initialization time, and all necessary callbacks +* invoked. +* +* max_count +* [in] Maximum number of objects to which the pool is allowed to grow. +* A value of zero specifies no maximum. +* +* grow_size +* [in] Number of objects to allocate when incrementally growing the pool. +* A value of zero disables automatic growth. +* +* object_size +* [in] Size, in bytes, of each object. +* +* pfn_initializer +* [in] Initialization callback to invoke for every new object when +* growing the pool. This parameter is optional and may be NULL. +* See the cl_pfn_pool_init_t function type declaration for details +* about the callback function. +* +* pfn_destructor +* [in] Destructor callback to invoke for every object before memory for +* that object is freed. This parameter is optional and may be NULL. +* See the cl_pfn_pool_dtor_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUES +* CL_SUCCESS if the pool was initialized successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to initialize the +* pool. +* +* CL_INVALID_SETTING if a the maximum size is non-zero and less than the +* minimum size. +* +* Other cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter. +* +* NOTES +* cl_pool_init initializes, and if necessary, grows the pool to +* the capacity desired. +* +* SEE ALSO +* Pool, cl_pool_construct, cl_pool_destroy, +* cl_pool_get, cl_pool_put, cl_pool_grow, +* cl_pool_count, cl_pfn_pool_init_t, cl_pfn_pool_dtor_t +*********/ + +/****f* Component Library: Pool/cl_pool_destroy +* NAME +* cl_pool_destroy +* +* DESCRIPTION +* The cl_pool_destroy function destroys a pool. +* +* SYNOPSIS +*/ +static inline void cl_pool_destroy(IN cl_pool_t * const p_pool) +{ + CL_ASSERT(p_pool); + cl_qcpool_destroy(&p_pool->qcpool); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* All memory allocated for objects is freed. The destructor callback, +* if any, will be invoked for every allocated object. Further operations +* on the pool should not be attempted after cl_pool_destroy +* is invoked. +* +* This function should only be called after a call to +* cl_pool_construct or cl_pool_init. +* +* In a debug build, cl_pool_destroy asserts that all objects are in +* the pool. +* +* SEE ALSO +* Pool, cl_pool_construct, cl_pool_init +*********/ + +/****f* Component Library: Pool/cl_pool_count +* NAME +* cl_pool_count +* +* DESCRIPTION +* The cl_pool_count function returns the number of available objects +* in a pool. +* +* SYNOPSIS +*/ +static inline size_t cl_pool_count(IN cl_pool_t * const p_pool) +{ + CL_ASSERT(p_pool); + return (cl_qcpool_count(&p_pool->qcpool)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure for which the number of +* available objects is requested. +* +* RETURN VALUE +* Returns the number of objects available in the specified pool. +* +* SEE ALSO +* Pool +*********/ + +/****f* Component Library: Pool/cl_pool_get +* NAME +* cl_pool_get +* +* DESCRIPTION +* The cl_pool_get function retrieves an object from a pool. +* +* SYNOPSIS +*/ +static inline void *cl_pool_get(IN cl_pool_t * const p_pool) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_pool); + + p_pool_obj = (cl_pool_obj_t *) cl_qcpool_get(&p_pool->qcpool); + if (!p_pool_obj) + return (NULL); + + CL_ASSERT(p_pool_obj->p_object); + return ((void *)p_pool_obj->p_object); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure from which to retrieve +* an object. +* +* RETURN VALUES +* Returns a pointer to an object. +* +* Returns NULL if the pool is empty and can not be grown automatically. +* +* NOTES +* cl_pool_get returns the object at the head of the pool. If the pool is +* empty, it is automatically grown to accommodate this request unless the +* grow_size parameter passed to the cl_pool_init function was zero. +* +* SEE ALSO +* Pool, cl_pool_get_tail, cl_pool_put, cl_pool_grow, cl_pool_count +*********/ + +/****f* Component Library: Pool/cl_pool_put +* NAME +* cl_pool_put +* +* DESCRIPTION +* The cl_pool_put function returns an object to a pool. +* +* SYNOPSIS +*/ +static inline void +cl_pool_put(IN cl_pool_t * const p_pool, IN void *const p_object) +{ + cl_pool_obj_t *p_pool_obj; + + CL_ASSERT(p_pool); + CL_ASSERT(p_object); + + /* Calculate the offset to the list object representing this object. */ + p_pool_obj = (cl_pool_obj_t *) + (((uint8_t *) p_object) - sizeof(cl_pool_obj_t)); + + /* good sanity check */ + CL_ASSERT(p_pool_obj->p_object == p_object); + + cl_qcpool_put(&p_pool->qcpool, &p_pool_obj->pool_item); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure to which to return +* an object. +* +* p_object +* [in] Pointer to an object to return to the pool. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_pool_put places the returned object at the head of the pool. +* +* The object specified by the p_object parameter must have been +* retrieved from the pool by a previous call to cl_pool_get. +* +* SEE ALSO +* Pool, cl_pool_put_tail, cl_pool_get +*********/ + +/****f* Component Library: Pool/cl_pool_grow +* NAME +* cl_pool_grow +* +* DESCRIPTION +* The cl_pool_grow function grows a pool by +* the specified number of objects. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_pool_grow(IN cl_pool_t * const p_pool, IN const size_t obj_count) +{ + CL_ASSERT(p_pool); + return (cl_qcpool_grow(&p_pool->qcpool, obj_count)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_pool_t structure whose capacity to grow. +* +* obj_count +* [in] Number of objects by which to grow the pool. +* +* RETURN VALUES +* CL_SUCCESS if the pool grew successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to grow the +* pool. +* +* cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter passed to the +* cl_pool_init function. +* +* NOTES +* It is not necessary to call cl_pool_grow if the pool is +* configured to grow automatically. +* +* SEE ALSO +* Pool +*********/ + +END_C_DECLS +#endif /* _CL_POOL_H_ */ diff --git a/include/complib/cl_ptr_vector.h b/include/complib/cl_ptr_vector.h new file mode 100644 index 0000000..93c217e --- /dev/null +++ b/include/complib/cl_ptr_vector.h @@ -0,0 +1,825 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * This file contains pointer vector definitions. Pointer Vector provides + * dynmically resizable array functionality. + */ + +#ifndef _CL_PTR_VECTOR_H_ +#define _CL_PTR_VECTOR_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Pointer Vector +* NAME +* Pointer Vector +* +* DESCRIPTION +* The Pointer Vector is a self-sizing array of pointers. Like a traditonal +* array, a pointer vector allows efficient constant time access to elements +* with a specified index. A pointer vector grows transparently as the +* user adds elements to the array. +* +* The cl_pointer vector_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SEE ALSO +* Structures: +* cl_ptr_vector_t +* +* Callbacks: +* cl_pfn_ptr_vec_apply_t, cl_pfn_ptr_vec_find_t +* +* Item Manipulation: +* cl_ptr_vector_set, cl_ptr_vector_obj +* +* Initialization: +* cl_ptr_vector_construct, cl_ptr_vector_init, cl_ptr_vector_destroy +* +* Manipulation: +* cl_ptr_vector_get_capacity, cl_ptr_vector_set_capacity, +* cl_ptr_vector_get_size, cl_ptr_vector_set_size, cl_ptr_vector_set_min_size +* cl_ptr_vector_get_ptr, cl_ptr_vector_get, cl_ptr_vector_at, cl_ptr_vector_set +* +* Search: +* cl_ptr_vector_find_from_start, cl_ptr_vector_find_from_end +* cl_ptr_vector_apply_func +*********/ +/****d* Component Library: Pointer Vector/cl_pfn_ptr_vec_apply_t +* NAME +* cl_pfn_ptr_vec_apply_t +* +* DESCRIPTION +* The cl_pfn_ptr_vec_apply_t function type defines the prototype for +* functions used to iterate elements in a pointer vector. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_ptr_vec_apply_t) (IN const size_t index, + IN void *const element, IN void *context); +/* +* PARAMETERS +* index +* [in] Index of the element. +* +* p_element +* [in] Pointer to an element at the specified index in the pointer vector. +* +* context +* [in] Context provided in a call to cl_ptr_vector_apply_func. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the function passed by users as a parameter to the cl_ptr_vector_apply_func +* function. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_apply_func +*********/ + +/****d* Component Library: Pointer Vector/cl_pfn_ptr_vec_find_t +* NAME +* cl_pfn_ptr_vec_find_t +* +* DESCRIPTION +* The cl_pfn_ptr_vec_find_t function type defines the prototype for +* functions used to find elements in a pointer vector. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_ptr_vec_find_t) (IN const size_t index, + IN const void *const element, IN void *context); +/* +* PARAMETERS +* index +* [in] Index of the element. +* +* p_element +* [in] Pointer to an element at the specified index in the +* pointer vector. +* +* context +* [in] Context provided in a call to cl_ptr_vector_find_from_start or +* cl_ptr_vector_find_from_end. +* +* RETURN VALUES +* Return CL_SUCCESS if the element was found. This stops pointer vector +* iteration. +* +* CL_NOT_FOUND to continue the pointer vector iteration. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the +* cl_ptr_vector_find_from_start and cl_ptr_vector_find_from_end functions. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_find_from_start, cl_ptr_vector_find_from_end +*********/ + +/****s* Component Library: Pointer Vector/cl_ptr_vector_t +* NAME +* cl_ptr_vector_t +* +* DESCRIPTION +* Pointer Vector structure. +* +* The cl_ptr_vector_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_ptr_vector { + size_t size; + size_t grow_size; + size_t capacity; + const void **p_ptr_array; + cl_state_t state; +} cl_ptr_vector_t; +/* +* FIELDS +* size +* Number of elements successfully initialized in the pointer vector. +* +* grow_size +* Number of elements to allocate when growing. +* +* capacity +* total # of elements allocated. +* +* alloc_list +* List of allocations. +* +* p_ptr_array +* Internal array of pointers to elements. +* +* state +* State of the pointer vector. +* +* SEE ALSO +* Pointer Vector +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_construct +* NAME +* cl_ptr_vector_construct +* +* DESCRIPTION +* The cl_ptr_vector_construct function constructs a pointer vector. +* +* SYNOPSIS +*/ +void cl_ptr_vector_construct(IN cl_ptr_vector_t * const p_vector); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_ptr_vector_destroy without first calling +* cl_ptr_vector_init. +* +* Calling cl_ptr_vector_construct is a prerequisite to calling any other +* pointer vector function except cl_ptr_vector_init. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_init, cl_ptr_vector_destroy +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_init +* NAME +* cl_ptr_vector_init +* +* DESCRIPTION +* The cl_ptr_vector_init function initializes a pointer vector for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_ptr_vector_init(IN cl_ptr_vector_t * const p_vector, + IN const size_t min_size, IN const size_t grow_size); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure to inititalize. +* +* min_size +* [in] Initial number of elements. +* +* grow_size +* [in] Number of elements to allocate when incrementally growing +* the pointer vector. A value of zero disables automatic growth. +* +* RETURN VALUES +* CL_SUCCESS if the pointer vector was initialized successfully. +* +* CL_INSUFFICIENT_MEMORY if the initialization failed. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_construct, cl_ptr_vector_destroy, +* cl_ptr_vector_set, cl_ptr_vector_get, cl_ptr_vector_at +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_destroy +* NAME +* cl_ptr_vector_destroy +* +* DESCRIPTION +* The cl_ptr_vector_destroy function destroys a pointer vector. +* +* SYNOPSIS +*/ +void cl_ptr_vector_destroy(IN cl_ptr_vector_t * const p_vector); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_ptr_vector_destroy frees all memory allocated for the pointer vector. +* +* This function should only be called after a call to cl_ptr_vector_construct +* or cl_ptr_vector_init. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_construct, cl_ptr_vector_init +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_get_capacity +* NAME +* cl_ptr_vector_get_capacity +* +* DESCRIPTION +* The cl_ptr_vector_get_capacity function returns the capacity of +* a pointer vector. +* +* SYNOPSIS +*/ +static inline size_t +cl_ptr_vector_get_capacity(IN const cl_ptr_vector_t * const p_vector) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + return (p_vector->capacity); +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure whose capacity to return. +* +* RETURN VALUE +* Capacity, in elements, of the pointer vector. +* +* NOTES +* The capacity is the number of elements that the pointer vector can store, +* and can be greater than the number of elements stored. To get the number +* of elements stored in the pointer vector, use cl_ptr_vector_get_size. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_set_capacity, cl_ptr_vector_get_size +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_get_size +* NAME +* cl_ptr_vector_get_size +* +* DESCRIPTION +* The cl_ptr_vector_get_size function returns the size of a pointer vector. +* +* SYNOPSIS +*/ +static inline uint32_t +cl_ptr_vector_get_size(IN const cl_ptr_vector_t * const p_vector) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + return ((uint32_t) p_vector->size); + +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure whose size to return. +* +* RETURN VALUE +* Size, in elements, of the pointer vector. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_set_size, cl_ptr_vector_get_capacity +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_get +* NAME +* cl_ptr_vector_get +* +* DESCRIPTION +* The cl_ptr_vector_get function returns the pointer stored in a +* pointer vector at a specified index. +* +* SYNOPSIS +*/ +static inline void *cl_ptr_vector_get(IN const cl_ptr_vector_t * const p_vector, + IN const size_t index) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(p_vector->size > index); + + return ((void *)p_vector->p_ptr_array[index]); +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure from which to get an +* element. +* +* index +* [in] Index of the element. +* +* RETURN VALUE +* Value of the pointer stored at the specified index. +* +* NOTES +* cl_ptr_vector_get provides constant access times regardless of the index. +* +* cl_ptr_vector_get does not perform boundary checking. Callers are +* responsible for providing an index that is within the range of the pointer +* vector. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_at, cl_ptr_vector_set, cl_ptr_vector_get_size +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_at +* NAME +* cl_ptr_vector_at +* +* DESCRIPTION +* The cl_ptr_vector_at function copies an element stored in a pointer +* vector at a specified index, performing boundary checks. +* +* SYNOPSIS +*/ +cl_status_t +cl_ptr_vector_at(IN const cl_ptr_vector_t * const p_vector, + IN const size_t index, OUT void **const p_element); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure from which to get a copy of +* an element. +* +* index +* [in] Index of the element. +* +* p_element +* [out] Pointer to storage for the pointer element. Contains a copy of +* the desired pointer upon successful completion of the call. +* +* RETURN VALUES +* CL_SUCCESS if an element was found at the specified index. +* +* CL_INVALID_SETTING if the index was out of range. +* +* NOTES +* cl_ptr_vector_at provides constant time access regardless of +* the index, and performs boundary checking on the pointer vector. +* +* Upon success, the p_element parameter contains a copy of the +* desired element. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_get +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_set +* NAME +* cl_ptr_vector_set +* +* DESCRIPTION +* The cl_ptr_vector_set function sets the element at the specified index. +* +* SYNOPSIS +*/ +cl_status_t +cl_ptr_vector_set(IN cl_ptr_vector_t * const p_vector, + IN const size_t index, IN const void *const element); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure into which to store +* an element. +* +* index +* [in] Index of the element. +* +* element +* [in] Pointer to store in the pointer vector. +* +* RETURN VALUES +* CL_SUCCESS if the element was successfully set. +* +* CL_INSUFFICIENT_MEMORY if the pointer vector could not be resized to +* accommodate the new element. +* +* NOTES +* cl_ptr_vector_set grows the pointer vector as needed to accommodate +* the new element, unless the grow_size parameter passed into the +* cl_ptr_vector_init function was zero. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_get +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_insert +* NAME +* cl_ptr_vector_insert +* +* DESCRIPTION +* The cl_ptr_vector_insert function inserts an element into a pointer vector. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_ptr_vector_insert(IN cl_ptr_vector_t * const p_vector, + IN const void *const element, + OUT size_t * const p_index OPTIONAL) +{ + cl_status_t status; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + status = cl_ptr_vector_set(p_vector, p_vector->size, element); + if (status == CL_SUCCESS && p_index) + *p_index = p_vector->size - 1; + + return (status); +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure into which to store +* an element. +* +* element +* [in] Pointer to store in the pointer vector. +* +* p_index +* [out] Pointer to the index of the element. Valid only if +* insertion was successful. +* +* RETURN VALUES +* CL_SUCCESS if the element was successfully inserted. +* +* CL_INSUFFICIENT_MEMORY if the pointer vector could not be resized to +* accommodate the new element. +* +* NOTES +* cl_ptr_vector_insert places the new element at the end of +* the pointer vector. +* +* cl_ptr_vector_insert grows the pointer vector as needed to accommodate +* the new element, unless the grow_size parameter passed into the +* cl_ptr_vector_init function was zero. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_remove, cl_ptr_vector_set +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_remove +* NAME +* cl_ptr_vector_remove +* +* DESCRIPTION +* The cl_ptr_vector_remove function removes and returns the pointer stored +* in a pointer vector at a specified index. Items beyond the removed item +* are shifted down and the size of the pointer vector is decremented. +* +* SYNOPSIS +*/ +void *cl_ptr_vector_remove(IN cl_ptr_vector_t * const p_vector, + IN const size_t index); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure from which to get an +* element. +* +* index +* [in] Index of the element. +* +* RETURN VALUE +* Value of the pointer stored at the specified index. +* +* NOTES +* cl_ptr_vector_remove does not perform boundary checking. Callers are +* responsible for providing an index that is within the range of the pointer +* vector. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_insert, cl_ptr_vector_get_size +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_set_capacity +* NAME +* cl_ptr_vector_set_capacity +* +* DESCRIPTION +* The cl_ptr_vector_set_capacity function reserves memory in a +* pointer vector for a specified number of pointers. +* +* SYNOPSIS +*/ +cl_status_t +cl_ptr_vector_set_capacity(IN cl_ptr_vector_t * const p_vector, + IN const size_t new_capacity); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure whose capacity to set. +* +* new_capacity +* [in] Total number of elements for which the pointer vector should +* allocate memory. +* +* RETURN VALUES +* CL_SUCCESS if the capacity was successfully set. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to satisfy the +* operation. The pointer vector is left unchanged. +* +* NOTES +* cl_ptr_vector_set_capacity increases the capacity of the pointer vector. +* It does not change the size of the pointer vector. If the requested +* capacity is less than the current capacity, the pointer vector is left +* unchanged. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_get_capacity, cl_ptr_vector_set_size, +* cl_ptr_vector_set_min_size +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_set_size +* NAME +* cl_ptr_vector_set_size +* +* DESCRIPTION +* The cl_ptr_vector_set_size function resizes a pointer vector, either +* increasing or decreasing its size. +* +* SYNOPSIS +*/ +cl_status_t +cl_ptr_vector_set_size(IN cl_ptr_vector_t * const p_vector, + IN const size_t size); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure whose size to set. +* +* size +* [in] Number of elements desired in the pointer vector. +* +* RETURN VALUES +* CL_SUCCESS if the size of the pointer vector was set successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to complete the +* operation. The pointer vector is left unchanged. +* +* NOTES +* cl_ptr_vector_set_size sets the pointer vector to the specified size. +* If size is smaller than the current size of the pointer vector, the size +* is reduced. +* +* This function can only fail if size is larger than the current capacity. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_get_size, cl_ptr_vector_set_min_size, +* cl_ptr_vector_set_capacity +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_set_min_size +* NAME +* cl_ptr_vector_set_min_size +* +* DESCRIPTION +* The cl_ptr_vector_set_min_size function resizes a pointer vector to a +* specified size if the pointer vector is smaller than the specified size. +* +* SYNOPSIS +*/ +cl_status_t +cl_ptr_vector_set_min_size(IN cl_ptr_vector_t * const p_vector, + IN const size_t min_size); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure whose minimum size to set. +* +* min_size +* [in] Minimum number of elements that the pointer vector should contain. +* +* RETURN VALUES +* CL_SUCCESS if the pointer vector size is greater than or equal to min_size. +* This could indicate that the pointer vector's capacity was increased to +* min_size or that the pointer vector was already of sufficient size. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to resize the +* pointer vector. The pointer vector is left unchanged. +* +* NOTES +* If min_size is smaller than the current size of the pointer vector, +* the pointer vector is unchanged. The pointer vector is unchanged if the +* size could not be changed due to insufficient memory being available to +* perform the operation. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_get_size, cl_ptr_vector_set_size, +* cl_ptr_vector_set_capacity +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_apply_func +* NAME +* cl_ptr_vector_apply_func +* +* DESCRIPTION +* The cl_ptr_vector_apply_func function invokes a specified function for +* every element in a pointer vector. +* +* SYNOPSIS +*/ +void +cl_ptr_vector_apply_func(IN const cl_ptr_vector_t * const p_vector, + IN cl_pfn_ptr_vec_apply_t pfn_callback, + IN const void *const context); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure whose elements to iterate. +* +* pfn_callback +* [in] Function invoked for every element in the array. +* See the cl_pfn_ptr_vec_apply_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_ptr_vector_apply_func invokes the specified function for every element +* in the pointer vector, starting from the beginning of the pointer vector. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_find_from_start, cl_ptr_vector_find_from_end, +* cl_pfn_ptr_vec_apply_t +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_find_from_start +* NAME +* cl_ptr_vector_find_from_start +* +* DESCRIPTION +* The cl_ptr_vector_find_from_start function uses a specified function to +* search for elements in a pointer vector starting from the lowest index. +* +* SYNOPSIS +*/ +size_t +cl_ptr_vector_find_from_start(IN const cl_ptr_vector_t * const p_vector, + IN cl_pfn_ptr_vec_find_t pfn_callback, + IN const void *const context); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure to inititalize. +* +* pfn_callback +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_ptr_vec_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* RETURN VALUES +* Index of the element, if found. +* +* Size of the pointer vector if the element was not found. +* +* NOTES +* cl_ptr_vector_find_from_start does not remove the found element from +* the pointer vector. The index of the element is returned when the function +* provided by the pfn_callback parameter returns CL_SUCCESS. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_find_from_end, cl_ptr_vector_apply_func, +* cl_pfn_ptr_vec_find_t +*********/ + +/****f* Component Library: Pointer Vector/cl_ptr_vector_find_from_end +* NAME +* cl_ptr_vector_find_from_end +* +* DESCRIPTION +* The cl_ptr_vector_find_from_end function uses a specified function to +* search for elements in a pointer vector starting from the highest index. +* +* SYNOPSIS +*/ +size_t +cl_ptr_vector_find_from_end(IN const cl_ptr_vector_t * const p_vector, + IN cl_pfn_ptr_vec_find_t pfn_callback, + IN const void *const context); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_ptr_vector_t structure to inititalize. +* +* pfn_callback +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_ptr_vec_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* RETURN VALUES +* Index of the element, if found. +* +* Size of the pointer vector if the element was not found. +* +* NOTES +* cl_ptr_vector_find_from_end does not remove the found element from +* the pointer vector. The index of the element is returned when the function +* provided by the pfn_callback parameter returns CL_SUCCESS. +* +* SEE ALSO +* Pointer Vector, cl_ptr_vector_find_from_start, cl_ptr_vector_apply_func, +* cl_pfn_ptr_vec_find_t +*********/ + +END_C_DECLS +#endif /* _CL_PTR_VECTOR_H_ */ diff --git a/include/complib/cl_qcomppool.h b/include/complib/cl_qcomppool.h new file mode 100644 index 0000000..0400b00 --- /dev/null +++ b/include/complib/cl_qcomppool.h @@ -0,0 +1,738 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of the quick composite pool. The quick composite pool + * manages a pool of composite objects. A composite object is an object + * that is made of multiple sub objects. + * It can grow to meet demand, limited only by system memory. + */ + +#ifndef _CL_QUICK_COMPOSITE_POOL_H_ +#define _CL_QUICK_COMPOSITE_POOL_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Quick Composite Pool +* NAME +* Quick Composite Pool +* +* DESCRIPTION +* The Quick Composite Pool provides a self-contained and self-sustaining +* pool of user defined composite objects. +* +* A composite object is an object that is composed of one or more +* sub-objects, each of which needs to be treated separately for +* initialization. Objects can be retrieved from the pool as long as there +* is memory in the system. +* +* To aid in object oriented design, the Quick Composite Pool provides users +* the ability to specify callbacks that are invoked for each object for +* construction, initialization, and destruction. Constructor and destructor +* callback functions may not fail. +* +* A Quick Composite Pool does not return memory to the system as the user +* returns objects to the pool. The only method of returning memory to the +* system is to destroy the pool. +* +* The Quick Composite Pool operates on cl_pool_item_t structures that +* describe composite objects. This provides for more efficient memory use. +* If using a cl_pool_item_t is not desired, the Composite Pool provides +* similar functionality but operates on opaque objects. +* +* The Quick Composit Pool functions operate on a cl_qcpool_t structure +* which should be treated as opaque and should be manipulated only through +* the provided functions. +* +* SEE ALSO +* Structures: +* cl_qcpool_t, cl_pool_item_t +* +* Callbacks: +* cl_pfn_qcpool_init_t, cl_pfn_qcpool_dtor_t +* +* Initialization/Destruction: +* cl_qcpool_construct, cl_qcpool_init, cl_qcpool_destroy +* +* Manipulation: +* cl_qcpool_get, cl_qcpool_put, cl_qcpool_put_list, cl_qcpool_grow +* +* Attributes: +* cl_is_qcpool_inited, cl_qcpool_count +*********/ +/****s* Component Library: Quick Composite Pool/cl_pool_item_t +* NAME +* cl_pool_item_t +* +* DESCRIPTION +* The cl_pool_item_t structure is used by pools to store objects. +* +* SYNOPSIS +*/ +typedef struct _cl_pool_item { + cl_list_item_t list_item; +#ifdef _DEBUG_ + /* Pointer to the owner pool used for sanity checks. */ + struct _cl_qcpool *p_pool; +#endif +} cl_pool_item_t; +/* +* FIELDS +* list_item +* Used internally by the pool. Users should not use this field. +* +* p_pool +* Used internally by the pool in debug builds to check for consistency. +* +* NOTES +* The pool item structure is defined in such a way as to safely allow +* users to cast from a pool item to a list item for storing items +* retrieved from a quick pool in a quick list. +* +* SEE ALSO +* Quick Composite Pool, cl_list_item_t +*********/ + +/****i* Component Library: Quick List/cl_pool_obj_t +* NAME +* cl_pool_obj_t +* +* DESCRIPTION +* The cl_pool_obj_t structure is used by pools to store objects. +* +* SYNOPSIS +*/ +typedef struct _cl_pool_obj { + /* The pool item must be the first item to allow casting. */ + cl_pool_item_t pool_item; + const void *p_object; +} cl_pool_obj_t; +/* +* FIELDS +* pool_item +* Used internally by the pool. Users should not use this field. +* +* p_object +* Pointer to the user's object being stored in the pool. +* +* NOTES +* The pool object structure is used by non-quick pools to store object. +* +* SEE ALSO +* cl_pool_item_t +*********/ + +/****d* Component Library: Quick Composite Pool/cl_pfn_qcpool_init_t +* NAME +* cl_pfn_qcpool_init_t +* +* DESCRIPTION +* The cl_pfn_qcpool_init_t function type defines the prototype for +* functions used as initializer for objects being allocated by a +* quick composite pool. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_qcpool_init_t) (IN void **const p_comp_array, + IN const uint32_t num_components, + IN void *context, + OUT cl_pool_item_t ** const pp_pool_item); +/* +* PARAMETERS +* p_comp_array +* [in] Pointer to the first entry in an array of pointers, each of +* which points to a component that makes up a composite object. +* +* num_components +* [in] Number of components in the component array. +* +* context +* [in] Context provided in a call to cl_qcpool_init. +* +* pp_pool_item +* [out] Users should set this pointer to reference the cl_pool_item_t +* structure that represents the composite object. This pointer must +* not be NULL if the function returns CL_SUCCESS. +* +* RETURN VALUE +* Return CL_SUCCESS to indicate that initialization of the object +* was successful and that initialization of further objects may continue. +* +* Other cl_status_t values will be returned by cl_qcpool_init +* and cl_qcpool_grow. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as a parameter to the +* cl_qcpool_init function. +* +* The initializer is invoked once per allocated object, allowing the user +* to chain components to form a composite object and perform any necessary +* initialization. Returning a status other than CL_SUCCESS aborts a grow +* operation, initiated either through cl_qcpool_init or cl_qcpool_grow, +* and causes the initiating function to fail. Any non-CL_SUCCESS status +* will be returned by the function that initiated the grow operation. +* +* All memory for the requested number of components is pre-allocated. Users +* should include space in one of their components for the cl_pool_item_t +* structure that will represent the composite object to avoid having to +* allocate that structure in the initialization callback. Alternatively, +* users may specify an additional component for the cl_pool_item_t structure. +* +* When later performing a cl_qcpool_get call, the return value is a pointer +* to the cl_pool_item_t returned by this function in the pp_pool_item +* parameter. Users must set pp_pool_item to a valid pointer to the +* cl_pool_item_t representing the object if they return CL_SUCCESS. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_init +*********/ + +/****d* Component Library: Quick Composite Pool/cl_pfn_qcpool_dtor_t +* NAME +* cl_pfn_qcpool_dtor_t +* +* DESCRIPTION +* The cl_pfn_qcpool_dtor_t function type defines the prototype for +* functions used as destructor for objects being deallocated by a +* quick composite pool. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_qcpool_dtor_t) (IN const cl_pool_item_t * const p_pool_item, + IN void *context); +/* +* PARAMETERS +* p_pool_item +* [in] Pointer to a cl_pool_item_t structure representing an object. +* +* context +* [in] Context provided in a call to cl_qcpool_init. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as an optional parameter to the +* cl_qcpool_init function. +* +* The destructor is invoked once per allocated object, allowing the user +* to perform any necessary cleanup. Users should not attempt to deallocate +* the memory for the composite object, as the quick composite pool manages +* object allocation and deallocation. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_init +*********/ + +/****s* Component Library: Quick Composite Pool/cl_qcpool_t +* NAME +* cl_qcpool_t +* +* DESCRIPTION +* Quick composite pool structure. +* +* The cl_qcpool_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_qcpool { + uint32_t num_components; + size_t *component_sizes; + void **p_components; + size_t num_objects; + size_t max_objects; + size_t grow_size; + cl_pfn_qcpool_init_t pfn_init; + cl_pfn_qcpool_dtor_t pfn_dtor; + const void *context; + cl_qlist_t free_list; + cl_qlist_t alloc_list; + cl_state_t state; +} cl_qcpool_t; +/* +* FIELDS +* num_components +* Number of components per object. +* +* component_sizes +* Array of sizes, one for each component. +* +* p_components +* Array of pointers to components, used for the constructor callback. +* +* num_objects +* Number of objects managed by the pool +* +* max_objects +* Maximum number of objects allowed to be created in pool +* +* grow_size +* Number of objects to add when automatically growing the pool. +* +* pfn_init +* Pointer to the user's initializer callback to invoke when initializing +* new objects. +* +* pfn_dtor +* Pointer to the user's destructor callback to invoke before deallocating +* memory allocated for objects. +* +* context +* User's provided context for callback functions, used by the pool +* when invoking callbacks. +* +* free_list +* Quick list of objects available. +* +* alloc_list +* Quick list used to store information about allocations. +* +* state +* State of the pool. +* +* SEE ALSO +* Quick Composite Pool +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_construct +* NAME +* cl_qcpool_construct +* +* DESCRIPTION +* The cl_qcpool_construct function constructs a quick composite pool. +* +* SYNOPSIS +*/ +void cl_qcpool_construct(IN cl_qcpool_t * const p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_qcpool_init, cl_qcpool_destroy, cl_is_qcpool_inited. +* +* Calling cl_qcpool_construct is a prerequisite to calling any other +* quick composite pool function except cl_qcpool_init. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_init, cl_qcpool_destroy, +* cl_is_qcpool_inited +*********/ + +/****f* Component Library: Quick Composite Pool/cl_is_qcpool_inited +* NAME +* cl_is_qcpool_inited +* +* DESCRIPTION +* The cl_is_qcpool_inited function returns whether a quick composite pool was +* successfully initialized. +* +* SYNOPSIS +*/ +static inline uint32_t cl_is_qcpool_inited(IN const cl_qcpool_t * const p_pool) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_pool); + /* CL_ASSERT that the pool is not in some invalid state. */ + CL_ASSERT(cl_is_state_valid(p_pool->state)); + + return (p_pool->state == CL_INITIALIZED); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure to check. +* +* RETURN VALUES +* TRUE if the quick composite pool was initialized successfully. +* +* FALSE otherwise. +* +* NOTES +* Allows checking the state of a quick composite pool to determine if +* invoking member functions is appropriate. +* +* SEE ALSO +* Quick Composite Pool +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_init +* NAME +* cl_qcpool_init +* +* DESCRIPTION +* The cl_qcpool_init function initializes a quick composite pool for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_qcpool_init(IN cl_qcpool_t * const p_pool, + IN const size_t min_size, + IN const size_t max_size, + IN const size_t grow_size, + IN const size_t * const component_sizes, + IN const uint32_t num_components, + IN cl_pfn_qcpool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_qcpool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure to initialize. +* +* min_size +* [in] Minimum number of objects that the pool should support. All +* necessary allocations to allow storing the minimum number of items +* are performed at initialization time, and all necessary callbacks +* successfully invoked. +* +* max_size +* [in] Maximum number of objects to which the pool is allowed to grow. +* A value of zero specifies no maximum. +* +* grow_size +* [in] Number of objects to allocate when incrementally growing the pool. +* A value of zero disables automatic growth. +* +* component_sizes +* [in] Pointer to the first entry in an array of sizes describing, +* in order, the sizes of the components that make up a composite object. +* +* num_components +* [in] Number of components that make up a composite object. +* +* pfn_initializer +* [in] Initializer callback to invoke for every new object when growing +* the pool. This parameter may be NULL only if the objects stored in +* the quick composite pool consist of only one component. If NULL, the +* pool assumes the cl_pool_item_t structure describing objects is +* located at the head of each object. See the cl_pfn_qcpool_init_t +* function type declaration for details about the callback function. +* +* pfn_destructor +* [in] Destructor callback to invoke for every object before memory for +* that object is freed. This parameter is optional and may be NULL. +* See the cl_pfn_qcpool_dtor_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUES +* CL_SUCCESS if the quick composite pool was initialized successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to initialize the +* quick composite pool. +* +* CL_INVALID_SETTING if a NULL constructor was provided for composite objects +* consisting of more than one component. Also returns CL_INVALID_SETTING if +* the maximum size is non-zero and less than the minimum size. +* +* Other cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter. +* +* If initialization fails, the pool is left in a destroyed state. Callers +* may still safely call cl_qcpool_destroy. +* +* NOTES +* cl_qcpool_init initializes, and if necessary, grows the pool to +* the capacity desired. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_construct, cl_qcpool_destroy, +* cl_qcpool_get, cl_qcpool_put, cl_qcpool_grow, +* cl_qcpool_count, cl_pfn_qcpool_init_t, cl_pfn_qcpool_dtor_t +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_destroy +* NAME +* cl_qcpool_destroy +* +* DESCRIPTION +* The cl_qcpool_destroy function destroys a quick composite pool. +* +* SYNOPSIS +*/ +void cl_qcpool_destroy(IN cl_qcpool_t * const p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* All memory allocated for composite objects is freed. The destructor +* callback, if any, will be invoked for every allocated object. Further +* operations on the composite pool should not be attempted after +* cl_qcpool_destroy is invoked. +* +* This function should only be called after a call to +* cl_qcpool_construct or cl_qcpool_init. +* +* In a debug build, cl_qcpool_destroy asserts that all objects are in +* the pool. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_construct, cl_qcpool_init +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_count +* NAME +* cl_qcpool_count +* +* DESCRIPTION +* The cl_qcpool_count function returns the number of available objects +* in a quick composite pool. +* +* SYNOPSIS +*/ +static inline size_t cl_qcpool_count(IN cl_qcpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->state == CL_INITIALIZED); + + return (cl_qlist_count(&p_pool->free_list)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure for which the number of +* available objects is requested. +* +* RETURN VALUE +* Returns the number of objects available in the specified +* quick composite pool. +* +* SEE ALSO +* Quick Composite Pool +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_get +* NAME +* cl_qcpool_get +* +* DESCRIPTION +* The cl_qcpool_get function retrieves an object from a +* quick composite pool. +* +* SYNOPSIS +*/ +cl_pool_item_t *cl_qcpool_get(IN cl_qcpool_t * const p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure from which to retrieve +* an object. +* +* RETURN VALUES +* Returns a pointer to a cl_pool_item_t for a composite object. +* +* Returns NULL if the pool is empty and can not be grown automatically. +* +* NOTES +* cl_qcpool_get returns the object at the head of the pool. If the pool is +* empty, it is automatically grown to accommodate this request unless the +* grow_size parameter passed to the cl_qcpool_init function was zero. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_get_tail, cl_qcpool_put, +* cl_qcpool_grow, cl_qcpool_count +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_put +* NAME +* cl_qcpool_put +* +* DESCRIPTION +* The cl_qcpool_put function returns an object to a quick composite pool. +* +* SYNOPSIS +*/ +static inline void +cl_qcpool_put(IN cl_qcpool_t * const p_pool, + IN cl_pool_item_t * const p_pool_item) +{ + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->state == CL_INITIALIZED); + CL_ASSERT(p_pool_item); + /* Make sure items being returned came from the specified pool. */ + CL_ASSERT(p_pool_item->p_pool == p_pool); + + /* return this lil' doggy to the pool */ + cl_qlist_insert_head(&p_pool->free_list, &p_pool_item->list_item); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure to which to return +* an object. +* +* p_pool_item +* [in] Pointer to a cl_pool_item_t structure for the object +* being returned. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_qcpool_put places the returned object at the head of the pool. +* +* The object specified by the p_pool_item parameter must have been +* retrieved from the pool by a previous call to cl_qcpool_get. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_put_tail, cl_qcpool_get +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_put_list +* NAME +* cl_qcpool_put_list +* +* DESCRIPTION +* The cl_qcpool_put_list function returns a list of objects to the head of +* a quick composite pool. +* +* SYNOPSIS +*/ +static inline void +cl_qcpool_put_list(IN cl_qcpool_t * const p_pool, IN cl_qlist_t * const p_list) +{ +#ifdef _DEBUG_ + cl_list_item_t *p_item; +#endif + + CL_ASSERT(p_pool); + CL_ASSERT(p_pool->state == CL_INITIALIZED); + CL_ASSERT(p_list); + +#ifdef _DEBUG_ + /* Check that all items in the list came from this pool. */ + p_item = cl_qlist_head(p_list); + while (p_item != cl_qlist_end(p_list)) { + CL_ASSERT(((cl_pool_item_t *) p_item)->p_pool == p_pool); + p_item = cl_qlist_next(p_item); + } +#endif + + cl_qlist_insert_list_head(&p_pool->free_list, p_list); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure to which to return +* a list of objects. +* +* p_list +* [in] Pointer to a cl_qlist_t structure for the list of objects +* being returned. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_qcpool_put_list places the returned objects at the head of the pool. +* +* The objects in the list specified by the p_list parameter must have been +* retrieved from the pool by a previous call to cl_qcpool_get. +* +* SEE ALSO +* Quick Composite Pool, cl_qcpool_put, cl_qcpool_put_tail, cl_qcpool_get +*********/ + +/****f* Component Library: Quick Composite Pool/cl_qcpool_grow +* NAME +* cl_qcpool_grow +* +* DESCRIPTION +* The cl_qcpool_grow function grows a quick composite pool by +* the specified number of objects. +* +* SYNOPSIS +*/ +cl_status_t cl_qcpool_grow(IN cl_qcpool_t * const p_pool, IN size_t obj_count); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qcpool_t structure whose capacity to grow. +* +* obj_count +* [in] Number of objects by which to grow the pool. +* +* RETURN VALUES +* CL_SUCCESS if the quick composite pool grew successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to grow the +* quick composite pool. +* +* cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter passed to the +* cl_qcpool_init function. +* +* NOTES +* It is not necessary to call cl_qcpool_grow if the pool is +* configured to grow automatically. +* +* SEE ALSO +* Quick Composite Pool +*********/ + +END_C_DECLS +#endif /* _CL_QUICK_COMPOSITE_POOL_H_ */ diff --git a/include/complib/cl_qlist.h b/include/complib/cl_qlist.h new file mode 100644 index 0000000..accbd98 --- /dev/null +++ b/include/complib/cl_qlist.h @@ -0,0 +1,1702 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of quick list. + */ + +#ifndef _CL_QUICK_LIST_H_ +#define _CL_QUICK_LIST_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Quick List +* NAME +* Quick List +* +* DESCRIPTION +* Quick list implements a doubly linked that stores user provided +* cl_list_item_t structures. +* Quick list does not allocate any memory, and can therefore not fail any +* operations. Quick list can therefore be useful in minimizing the error +* paths in code. +* +* Quick list is not thread safe, and users must provide serialization when +* adding and removing items from the list. Note that it is possible to +* walk a quick list while simultaneously adding to it. +* +* The Quick List functions operate on a cl_qlist_t structure which should be +* treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_qlist_t, cl_list_item_t, cl_list_obj_t +* +* Callbacks: +* cl_pfn_qlist_apply_t, cl_pfn_qlist_find_t +* +* Item Manipulation: +* cl_qlist_set_obj, cl_qlist_obj +* +* Initialization: +* cl_qlist_init +* +* Iteration: +* cl_qlist_next, cl_qlist_prev, cl_qlist_head, cl_qlist_tail, +* cl_qlist_end +* +* Manipulation: +* cl_qlist_insert_head, cl_qlist_insert_tail, +* cl_qlist_insert_list_head, cl_qlist_insert_list_tail, +* cl_qlist_insert_array_head, cl_qlist_insert_array_tail, +* cl_qlist_insert_prev, cl_qlist_insert_next, +* cl_qlist_remove_head, cl_qlist_remove_tail, +* cl_qlist_remove_item, cl_qlist_remove_all +* +* Search: +* cl_is_item_in_qlist, cl_qlist_find_next, cl_qlist_find_prev, +* cl_qlist_find_from_head, cl_qlist_find_from_tail +* cl_qlist_apply_func, cl_qlist_move_items +* +* Attributes: +* cl_qlist_count, cl_is_qlist_empty +*********/ +/****s* Component Library: Quick List/cl_list_item_t +* NAME +* cl_list_item_t +* +* DESCRIPTION +* The cl_list_item_t structure is used by lists to store objects. +* +* SYNOPSIS +*/ +typedef struct _cl_list_item { + struct _cl_list_item *p_next; + struct _cl_list_item *p_prev; +#ifdef _DEBUG_ + struct _cl_qlist *p_list; +#endif +} cl_list_item_t; +/* +* FIELDS +* p_next +* Used internally by the list. Users should not use this field. +* +* p_prev +* Used internally by the list. Users should not use this field. +* +* SEE ALSO +* Quick List +*********/ + +#define cl_item_obj(item_ptr, obj_ptr, item_field) (typeof(obj_ptr)) \ + ((void *)item_ptr - (unsigned long)&((typeof(obj_ptr))0)->item_field) + + +/****s* Component Library: Quick List/cl_list_obj_t +* NAME +* cl_list_obj_t +* +* DESCRIPTION +* The cl_list_obj_t structure is used by lists to store objects. +* +* SYNOPSIS +*/ +typedef struct _cl_list_obj { + cl_list_item_t list_item; + const void *p_object; /* User's context */ +} cl_list_obj_t; +/* +* FIELDS +* list_item +* Used internally by the list. Users should not use this field. +* +* p_object +* User defined context. Users should not access this field directly. +* Use cl_qlist_set_obj and cl_qlist_obj to set and retrieve the value +* of this field. +* +* NOTES +* Users can use the cl_qlist_set_obj and cl_qlist_obj functions to store +* and retrieve context information in the list item. +* +* SEE ALSO +* Quick List, cl_qlist_set_obj, cl_qlist_obj, cl_list_item_t +*********/ + +/****s* Component Library: Quick List/cl_qlist_t +* NAME +* cl_qlist_t +* +* DESCRIPTION +* Quick list structure. +* +* The cl_qlist_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_qlist { + cl_list_item_t end; + size_t count; + cl_state_t state; +} cl_qlist_t; +/* +* FIELDS +* end +* List item used to mark the end of the list. +* +* count +* Number of items in the list. +* +* state +* State of the quick list. +* +* SEE ALSO +* Quick List +*********/ + +/****d* Component Library: Quick List/cl_pfn_qlist_apply_t +* NAME +* cl_pfn_qlist_apply_t +* +* DESCRIPTION +* The cl_pfn_qlist_apply_t function type defines the prototype for functions +* used to iterate items in a quick list. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_qlist_apply_t) (IN cl_list_item_t * const p_list_item, + IN void *context); +/* +* PARAMETERS +* p_list_item +* [in] Pointer to a cl_list_item_t structure. +* +* context +* [in] Value passed to the callback function. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_qlist_apply_func +* function. +* +* SEE ALSO +* Quick List, cl_qlist_apply_func +*********/ + +/****d* Component Library: Quick List/cl_pfn_qlist_find_t +* NAME +* cl_pfn_qlist_find_t +* +* DESCRIPTION +* The cl_pfn_qlist_find_t function type defines the prototype for functions +* used to find items in a quick list. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_qlist_find_t) (IN const cl_list_item_t * const p_list_item, + IN void *context); +/* +* PARAMETERS +* p_list_item +* [in] Pointer to a cl_list_item_t. +* +* context +* [in] Value passed to the callback function. +* +* RETURN VALUES +* Return CL_SUCCESS if the desired item was found. This stops list iteration. +* +* Return CL_NOT_FOUND to continue list iteration. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_qlist_find_from_head, +* cl_qlist_find_from_tail, cl_qlist_find_next, and cl_qlist_find_prev +* functions. +* +* SEE ALSO +* Quick List, cl_qlist_find_from_head, cl_qlist_find_from_tail, +* cl_qlist_find_next, cl_qlist_find_prev +*********/ + +/****i* Component Library: Quick List/__cl_primitive_insert +* NAME +* __cl_primitive_insert +* +* DESCRIPTION +* Add a new item in front of the specified item. This is a low level +* function for use internally by the queuing routines. +* +* SYNOPSIS +*/ +static inline void +__cl_primitive_insert(IN cl_list_item_t * const p_list_item, + IN cl_list_item_t * const p_new_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_new_item); + + p_new_item->p_next = p_list_item; + p_new_item->p_prev = p_list_item->p_prev; + p_list_item->p_prev = p_new_item; + p_new_item->p_prev->p_next = p_new_item; +} + +/* +* PARAMETERS +* p_list_item +* [in] Pointer to cl_list_item_t to insert in front of +* +* p_new_item +* [in] Pointer to cl_list_item_t to add +* +* RETURN VALUE +* This function does not return a value. +*********/ + +/****i* Component Library: Quick List/__cl_primitive_remove +* NAME +* __cl_primitive_remove +* +* DESCRIPTION +* Remove an item from a list. This is a low level routine +* for use internally by the queuing routines. +* +* SYNOPSIS +*/ +static inline void __cl_primitive_remove(IN cl_list_item_t * const p_list_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + + /* set the back pointer */ + p_list_item->p_next->p_prev = p_list_item->p_prev; + /* set the next pointer */ + p_list_item->p_prev->p_next = p_list_item->p_next; + + /* if we're debugging, spruce up the pointers to help find bugs */ +#if defined( _DEBUG_ ) + if (p_list_item != p_list_item->p_next) { + p_list_item->p_next = NULL; + p_list_item->p_prev = NULL; + } +#endif /* defined( _DEBUG_ ) */ +} + +/* +* PARAMETERS +* p_list_item +* [in] Pointer to cl_list_item_t to remove +* +* RETURN VALUE +* This function does not return a value. +*********/ + +/* + * Declaration of quick list functions + */ + +/****f* Component Library: Quick List/cl_qlist_set_obj +* NAME +* cl_qlist_set_obj +* +* DESCRIPTION +* The cl_qlist_set_obj function sets the object stored in a list object. +* +* SYNOPSIS +*/ +static inline void +cl_qlist_set_obj(IN cl_list_obj_t * const p_list_obj, + IN const void *const p_object) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_obj); + p_list_obj->p_object = p_object; +} + +/* +* PARAMETERS +* p_list_obj +* [in] Pointer to a cl_list_obj_t structure. +* +* p_object +* [in] User defined context. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Quick List, cl_qlist_obj +*********/ + +/****f* Component Library: Quick List/cl_qlist_obj +* NAME +* cl_qlist_obj +* +* DESCRIPTION +* The cl_qlist_set_obj function returns the object stored in a list object. +* +* SYNOPSIS +*/ +static inline void *cl_qlist_obj(IN const cl_list_obj_t * const p_list_obj) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_obj); + + return ((void *)p_list_obj->p_object); +} + +/* +* PARAMETERS +* p_list_obj +* [in] Pointer to a cl_list_obj_t structure. +* +* RETURN VALUE +* Returns the value of the object pointer stored in the list object. +* +* SEE ALSO +* Quick List, cl_qlist_set_obj +*********/ + +static inline void __cl_qlist_reset(IN cl_qlist_t * const p_list) +{ + /* Point the end item to itself. */ + p_list->end.p_next = &p_list->end; + p_list->end.p_prev = &p_list->end; +#if defined( _DEBUG_ ) + p_list->end.p_list = p_list; +#endif + + /* Clear the count. */ + p_list->count = 0; +} + +/****f* Component Library: Quick List/cl_qlist_init +* NAME +* cl_qlist_init +* +* DESCRIPTION +* The cl_qlist_init function initializes a quick list. +* +* SYNOPSIS +*/ +static inline void cl_qlist_init(IN cl_qlist_t * const p_list) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + + p_list->state = CL_INITIALIZED; + + /* Reset the quick list data structure. */ + __cl_qlist_reset(p_list); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure to initialize. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Allows calling quick list manipulation functions. +* +* SEE ALSO +* Quick List, cl_qlist_insert_head, cl_qlist_insert_tail, +* cl_qlist_remove_head, cl_qlist_remove_tail +*********/ + +/****f* Component Library: Quick List/cl_qlist_count +* NAME +* cl_qlist_count +* +* DESCRIPTION +* The cl_qlist_count function returns the number of list items stored +* in a quick list. +* +* SYNOPSIS +*/ +static inline uint32_t cl_qlist_count(IN const cl_qlist_t * const p_list) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + return ((uint32_t) p_list->count); + +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUE +* Number of items in the list. This function iterates though the quick +* list to count the items. +* +* SEE ALSO +* Quick List, cl_is_qlist_empty +*********/ + +/****f* Component Library: Quick List/cl_is_qlist_empty +* NAME +* cl_is_qlist_empty +* +* DESCRIPTION +* The cl_is_qlist_empty function returns whether a quick list is empty. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_qlist_empty(IN const cl_qlist_t * const p_list) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + return (!cl_qlist_count(p_list)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUES +* TRUE if the specified quick list is empty. +* +* FALSE otherwise. +* +* SEE ALSO +* Quick List, cl_qlist_count, cl_qlist_remove_all +*********/ + +/****f* Component Library: Quick List/cl_qlist_next +* NAME +* cl_qlist_next +* +* DESCRIPTION +* The cl_qlist_next function returns a pointer to the list item following +* a given list item in a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_next(IN const cl_list_item_t * + const p_list_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list_item->p_list->state == CL_INITIALIZED); + + /* Return the next item. */ + return (p_list_item->p_next); +} + +/* +* PARAMETERS +* p_list_item +* [in] Pointer to the cl_list_item_t whose successor to return. +* +* Returns: +* Pointer to the list item following the list item specified by +* the p_list_item parameter in the quick list. +* +* Pointer to the list end if p_list_item was at the tail of the list. +* +* SEE ALSO +* Quick List, cl_qlist_head, cl_qlist_tail, cl_qlist_prev, cl_qlist_end, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_prev +* NAME +* cl_qlist_prev +* +* DESCRIPTION +* The cl_qlist_prev function returns a poirter to the list item preceding +* a given list item in a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_prev(IN const cl_list_item_t * + const p_list_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list_item->p_list->state == CL_INITIALIZED); + + /* Return the previous item. */ + return (p_list_item->p_prev); +} + +/* +* PARAMETERS +* p_list_item +* [in] Pointer to the cl_list_item_t whose predecessor to return. +* +* Returns: +* Pointer to the list item preceding the list item specified by +* the p_list_item parameter in the quick list. +* +* Pointer to the list end if p_list_item was at the tail of the list. +* +* SEE ALSO +* Quick List, cl_qlist_head, cl_qlist_tail, cl_qlist_next, cl_qlist_end, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_head +* NAME +* cl_qlist_head +* +* DESCRIPTION +* The cl_qlist_head function returns the list item at +* the head of a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_head(IN const cl_qlist_t * const p_list) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + return (cl_qlist_next(&p_list->end)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUES +* Pointer to the list item at the head of the quick list. +* +* Pointer to the list end if the list was empty. +* +* NOTES +* cl_qlist_head does not remove the item from the list. +* +* SEE ALSO +* Quick List, cl_qlist_tail, cl_qlist_next, cl_qlist_prev, cl_qlist_end, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_tail +* NAME +* cl_qlist_tail +* +* DESCRIPTION +* The cl_qlist_tail function returns the list item at +* the tail of a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_tail(IN const cl_qlist_t * const p_list) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + return (cl_qlist_prev(&p_list->end)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUES +* Pointer to the list item at the tail of the quick list. +* +* Pointer to the list end if the list was empty. +* +* NOTES +* cl_qlist_tail does not remove the item from the list. +* +* SEE ALSO +* Quick List, cl_qlist_head, cl_qlist_next, cl_qlist_prev, cl_qlist_end, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_end +* NAME +* cl_qlist_end +* +* DESCRIPTION +* The cl_qlist_end function returns the end of a quick list. +* +* SYNOPSIS +*/ +static inline const cl_list_item_t *cl_qlist_end(IN const cl_qlist_t * + const p_list) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + return (&p_list->end); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUE +* Pointer to the end of the list. +* +* NOTES +* cl_qlist_end is useful for determining the validity of list items returned +* by cl_qlist_head, cl_qlist_tail, cl_qlist_next, cl_qlist_prev, as well as +* the cl_qlist_find functions. If the list item pointer returned by any of +* these functions compares to the end, the end of the list was encoutered. +* When using cl_qlist_head or cl_qlist_tail, this condition indicates that +* the list is empty. +* +* SEE ALSO +* Quick List, cl_qlist_head, cl_qlist_tail, cl_qlist_next, cl_qlist_prev, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_head +* NAME +* cl_qlist_insert_head +* +* DESCRIPTION +* The cl_qlist_insert_head function inserts a list item at the +* head of a quick list. +* +* SYNOPSIS +*/ +static inline void +cl_qlist_insert_head(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_list_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + /* + * The list item must not already be part of the list. Note that this + * assertion may fail if an uninitialized list item happens to have its + * list pointer equal to the specified list. The chances of this + * happening are acceptable in light of the value of this check. + */ + CL_ASSERT(p_list_item->p_list != p_list); + +#if defined( _DEBUG_ ) + p_list_item->p_list = p_list; +#endif + + /* Insert before the head. */ + __cl_primitive_insert(cl_qlist_head(p_list), p_list_item); + + p_list->count++; +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure into which to insert the object. +* +* p_list_item +* [in] Pointer to a cl_list_item_t structure to add. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* In debug builds, cl_qlist_insert_head asserts that the specified list item +* is not already in the list. +* +* SEE ALSO +* Quick List, cl_qlist_insert_tail, cl_qlist_insert_list_head, +* cl_qlist_insert_list_tail, cl_qlist_insert_array_head, +* cl_qlist_insert_array_tail, cl_qlist_insert_prev, cl_qlist_insert_next, +* cl_qlist_remove_head, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_tail +* NAME +* cl_qlist_insert_tail +* +* DESCRIPTION +* The cl_qlist_insert_tail function inserts a list item at the tail +* of a quick list. +* +* SYNOPSIS +*/ +static inline void +cl_qlist_insert_tail(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_list_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + /* + * The list item must not already be part of the list. Note that this + * assertion may fail if an uninitialized list item happens to have its + * list pointer equal to the specified list. The chances of this + * happening are acceptable in light of the value of this check. + */ + CL_ASSERT(p_list_item->p_list != p_list); + +#if defined( _DEBUG_ ) + p_list_item->p_list = p_list; +#endif + + /* + * Put the new element in front of the end which is the same + * as being at the tail + */ + __cl_primitive_insert(&p_list->end, p_list_item); + + p_list->count++; +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure into which to insert the object. +* +* p_list_item +* [in] Pointer to cl_list_item_t structure to add. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* In debug builds, cl_qlist_insert_tail asserts that the specified list item +* is not already in the list. +* +* SEE ALSO +* Quick List, cl_qlist_insert_head, cl_qlist_insert_list_head, +* cl_qlist_insert_list_tail, cl_qlist_insert_array_head, +* cl_qlist_insert_array_tail, cl_qlist_insert_prev, cl_qlist_insert_next, +* cl_qlist_remove_tail, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_list_head +* NAME +* cl_qlist_insert_list_head +* +* DESCRIPTION +* The cl_qlist_insert_list_head function merges two quick lists by +* inserting one at the head of the other. +* +* SYNOPSIS +*/ +void +cl_qlist_insert_list_head(IN cl_qlist_t * const p_dest_list, + IN cl_qlist_t * const p_src_list); +/* +* PARAMETERS +* p_dest_list +* [in] Pointer to destination quicklist object. +* +* p_src_list +* [in] Pointer to quicklist to add. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Inserts all list items in the source list to the head of the +* destination list. The ordering of the list items is preserved. +* +* The list pointed to by the p_src_list parameter is empty when +* the call returns. +* +* SEE ALSO +* Quick List, cl_qlist_insert_list_tail, cl_qlist_insert_head, +* cl_qlist_insert_tail, cl_qlist_insert_array_head, +* cl_qlist_insert_array_tail, cl_qlist_insert_prev, cl_qlist_insert_next, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_list_tail +* NAME +* cl_qlist_insert_list_tail +* +* DESCRIPTION +* The cl_qlist_insert_list_tail function merges two quick lists by +* inserting one at the tail of the other. +* +* SYNOPSIS +*/ +void +cl_qlist_insert_list_tail(IN cl_qlist_t * const p_dest_list, + IN cl_qlist_t * const p_src_list); +/* +* PARAMETERS +* p_dest_list +* [in] Pointer to destination quicklist object +* +* p_src_list +* [in] Pointer to quicklist to add +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Inserts all list items in the source list to the tail of the +* destination list. The ordering of the list items is preserved. +* +* The list pointed to by the p_src_list parameter is empty when +* the call returns. +* +* SEE ALSO +* Quick List, cl_qlist_insert_list_head, cl_qlist_insert_head, +* cl_qlist_insert_tail, cl_qlist_insert_array_head, +* cl_qlist_insert_array_tail, cl_qlist_insert_prev, cl_qlist_insert_next, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_array_head +* NAME +* cl_qlist_insert_array_head +* +* DESCRIPTION +* The cl_qlist_insert_array_head function inserts an array of list items +* at the head of a quick list. +* +* SYNOPSIS +*/ +void +cl_qlist_insert_array_head(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_array, + IN uint32_t item_count, IN const uint32_t item_size); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure into which to insert +* the objects. +* +* p_array +* [in] Pointer to the first list item in an array of cl_list_item_t +* structures. +* +* item_count +* [in] Number of cl_list_item_t structures in the array. +* +* item_size +* [in] Size of the items added to the list. This is the stride in the +* array from one cl_list_item_t structure to the next. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Inserts all the list items in the array specified by the p_array parameter +* to the head of the quick list specified by the p_list parameter, +* preserving ordering of the list items. +* +* The array pointer passed into the function points to the cl_list_item_t +* in the first element of the caller's element array. There is no +* restriction on where the element is stored in the parent structure. +* +* SEE ALSO +* Quick List, cl_qlist_insert_array_tail, cl_qlist_insert_head, +* cl_qlist_insert_tail, cl_qlist_insert_list_head, cl_qlist_insert_list_tail, +* cl_qlist_insert_prev, cl_qlist_insert_next, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_array_tail +* NAME +* cl_qlist_insert_array_tail +* +* DESCRIPTION +* The cl_qlist_insert_array_tail function inserts an array of list items +* at the tail of a quick list. +* +* SYNOPSIS +*/ +void +cl_qlist_insert_array_tail(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_array, + IN uint32_t item_count, IN const uint32_t item_size); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure into which to insert +* the objects. +* +* p_array +* [in] Pointer to the first list item in an array of cl_list_item_t +* structures. +* +* item_count +* [in] Number of cl_list_item_t structures in the array. +* +* item_size +* [in] Size of the items added to the list. This is the stride in the +* array from one cl_list_item_t structure to the next. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Inserts all the list items in the array specified by the p_array parameter +* to the tail of the quick list specified by the p_list parameter, +* preserving ordering of the list items. +* +* The array pointer passed into the function points to the cl_list_item_t +* in the first element of the caller's element array. There is no +* restriction on where the element is stored in the parent structure. +* +* SEE ALSO +* Quick List, cl_qlist_insert_array_head, cl_qlist_insert_head, +* cl_qlist_insert_tail, cl_qlist_insert_list_head, cl_qlist_insert_list_tail, +* cl_qlist_insert_prev, cl_qlist_insert_next, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_prev +* NAME +* cl_qlist_insert_prev +* +* DESCRIPTION +* The cl_qlist_insert_prev function inserts a list item before a +* specified list item in a quick list. +* +* SYNOPSIS +*/ +static inline void +cl_qlist_insert_prev(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_list_item, + IN cl_list_item_t * const p_new_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_new_item); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + /* + * The list item must not already be part of the list. Note that this + * assertion may fail if an uninitialized list item happens to have its + * list pointer equal to the specified list. The chances of this + * happening are acceptable in light of the value of this check. + */ + CL_ASSERT(p_new_item->p_list != p_list); + +#if defined( _DEBUG_ ) + p_new_item->p_list = p_list; +#endif + + __cl_primitive_insert(p_list_item, p_new_item); + + p_list->count++; +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure into which to add the new item. +* +* p_list_item +* [in] Pointer to a cl_list_item_t structure. +* +* p_new_item +* [in] Pointer to a cl_list_item_t structure to add to the quick list. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Inserts the new list item before the list item specified by p_list_item. +* +* SEE ALSO +* Quick List, cl_qlist_insert_next, cl_qlist_insert_head, +* cl_qlist_insert_tail, cl_qlist_insert_list_head, cl_qlist_insert_list_tail, +* cl_qlist_insert_array_head, cl_qlist_insert_array_tail, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_insert_next +* NAME +* cl_qlist_insert_next +* +* DESCRIPTION +* The cl_qlist_insert_next function inserts a list item after a specified +* list item in a quick list. +* +* SYNOPSIS +*/ +static inline void +cl_qlist_insert_next(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_list_item, + IN cl_list_item_t * const p_new_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_new_item); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + /* + * The list item must not already be part of the list. Note that this + * assertion may fail if an uninitialized list item happens to have its + * list pointer equal to the specified list. The chances of this + * happening are acceptable in light of the value of this check. + */ + CL_ASSERT(p_new_item->p_list != p_list); + +#if defined( _DEBUG_ ) + p_new_item->p_list = p_list; +#endif + + __cl_primitive_insert(cl_qlist_next(p_list_item), p_new_item); + + p_list->count++; +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure into which to add the new item. +* +* p_list_item +* [in] Pointer to a cl_list_item_t structure. +* +* p_new_item +* [in] Pointer to a cl_list_item_t structure to add to the quick list. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Inserts the new list item after the list item specified by p_list_item. +* The list item specified by p_list_item must be in the quick list. +* +* SEE ALSO +* Quick List, cl_qlist_insert_prev, cl_qlist_insert_head, +* cl_qlist_insert_tail, cl_qlist_insert_list_head, cl_qlist_insert_list_tail, +* cl_qlist_insert_array_head, cl_qlist_insert_array_tail, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_remove_head +* NAME +* cl_qlist_remove_head +* +* DESCRIPTION +* The cl_qlist_remove_head function removes and returns the list item +* at the head of a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_remove_head(IN cl_qlist_t * const p_list) +{ + cl_list_item_t *p_item; + + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + p_item = cl_qlist_head(p_list); + /* CL_ASSERT that the list item is part of the list. */ + CL_ASSERT(p_item->p_list == p_list); + + if (p_item == cl_qlist_end(p_list)) + return (p_item); + +#if defined( _DEBUG_ ) + /* Clear the item's link to the list. */ + p_item->p_list = NULL; +#endif + + __cl_primitive_remove(p_item); + + p_list->count--; + + return (p_item); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUES +* Returns a pointer to the list item formerly at the head of the quick list. +* +* Pointer to the list end if the list was empty. +* +* SEE ALSO +* Quick List, cl_qlist_remove_tail, cl_qlist_remove_all, cl_qlist_remove_item, +* cl_qlist_end, cl_qlist_head, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_remove_tail +* NAME +* cl_qlist_remove_tail +* +* DESCRIPTION +* The cl_qlist_remove_tail function removes and returns the list item +* at the tail of a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_remove_tail(IN cl_qlist_t * const p_list) +{ + cl_list_item_t *p_item; + + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + + p_item = cl_qlist_tail(p_list); + /* CL_ASSERT that the list item is part of the list. */ + CL_ASSERT(p_item->p_list == p_list); + + if (p_item == cl_qlist_end(p_list)) + return (p_item); + +#if defined( _DEBUG_ ) + /* Clear the item's link to the list. */ + p_item->p_list = NULL; +#endif + + __cl_primitive_remove(p_item); + + p_list->count--; + + return (p_item); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUES +* Returns a pointer to the list item formerly at the tail of the quick list. +* +* Pointer to the list end if the list was empty. +* +* SEE ALSO +* Quick List, cl_qlist_remove_head, cl_qlist_remove_all, cl_qlist_remove_item, +* cl_qlist_end, cl_qlist_tail, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_remove_item +* NAME +* cl_qlist_remove_item +* +* DESCRIPTION +* The cl_qlist_remove_item function removes a specific list item from a quick list. +* +* SYNOPSIS +*/ +static inline void +cl_qlist_remove_item(IN cl_qlist_t * const p_list, + IN cl_list_item_t * const p_list_item) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list_item); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + /* CL_ASSERT that the list item is part of the list. */ + CL_ASSERT(p_list_item->p_list == p_list); + + if (p_list_item == cl_qlist_end(p_list)) + return; + +#if defined( _DEBUG_ ) + /* Clear the item's link to the list. */ + p_list_item->p_list = NULL; +#endif + + __cl_primitive_remove(p_list_item); + + p_list->count--; +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure from which to remove the item. +* +* p_list_item +* [in] Pointer to a cl_list_item_t structure to remove. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Removes the list item pointed to by the p_list_item parameter from +* its list. +* +* SEE ALSO +* Quick List, cl_qlist_remove_head, cl_qlist_remove_tail, cl_qlist_remove_all, +* cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_remove_all +* NAME +* cl_qlist_remove_all +* +* DESCRIPTION +* The cl_qlist_remove_all function removes all items from a quick list. +* +* SYNOPSIS +*/ +static inline void cl_qlist_remove_all(IN cl_qlist_t * const p_list) +{ +#if defined( _DEBUG_ ) + cl_list_item_t *p_list_item; + + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + p_list_item = cl_qlist_head(p_list); + while (p_list_item != cl_qlist_end(p_list)) { + p_list_item = cl_qlist_next(p_list_item); + cl_qlist_prev(p_list_item)->p_list = NULL; + } +#endif + + __cl_qlist_reset(p_list); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Quick List, cl_qlist_remove_head, cl_qlist_remove_tail, +* cl_qlist_remove_item, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_is_item_in_qlist +* NAME +* cl_is_item_in_qlist +* +* DESCRIPTION +* The cl_is_item_in_qlist function checks for the presence of a +* list item in a quick list. +* +* SYNOPSIS +*/ +boolean_t +cl_is_item_in_qlist(IN const cl_qlist_t * const p_list, + IN const cl_list_item_t * const p_list_item); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* p_list_item +* [in] Pointer to the cl_list_item_t to find. +* +* RETURN VALUES +* TRUE if the list item was found in the quick list. +* +* FALSE otherwise. +* +* SEE ALSO +* Quick List, cl_qlist_remove_item, cl_list_item_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_find_next +* NAME +* cl_qlist_find_next +* +* DESCRIPTION +* The cl_qlist_find_next function invokes a specified function to +* search for an item, starting from a given list item. +* +* SYNOPSIS +*/ +cl_list_item_t *cl_qlist_find_next(IN const cl_qlist_t * const p_list, + IN const cl_list_item_t * const p_list_item, + IN cl_pfn_qlist_find_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure in which to search. +* +* p_list_item +* [in] Pointer to a cl_list_item_t structure from which to start the search. +* +* pfn_func +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_qlist_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context if a +* callback function is provided, or value compared to the quick list's +* list items. +* +* Returns: +* Pointer to the list item, if found. +* +* p_list_item if not found. +* +* NOTES +* cl_qlist_find_next does not remove list items from the list. +* The list item is returned when the function specified by the pfn_func +* parameter returns CL_SUCCESS. The list item from which the search starts is +* excluded from the search. +* +* The function provided by the pfn_func must not perform any list operations, +* as these would corrupt the list. +* +* SEE ALSO +* Quick List, cl_qlist_find_prev, cl_qlist_find_from_head, +* cl_qlist_find_from_tail, cl_qlist_end, cl_qlist_apply_func, +* cl_qlist_move_items, cl_list_item_t, cl_pfn_qlist_find_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_find_prev +* NAME +* cl_qlist_find_prev +* +* DESCRIPTION +* The cl_qlist_find_prev function invokes a specified function to +* search backward for an item, starting from a given list item. +* +* SYNOPSIS +*/ +cl_list_item_t *cl_qlist_find_prev(IN const cl_qlist_t * const p_list, + IN const cl_list_item_t * const p_list_item, + IN cl_pfn_qlist_find_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure in which to search. +* +* p_list_item +* [in] Pointer to a cl_list_item_t structure from which to start the search. +* +* pfn_func +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_qlist_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context if a +* callback function is provided, or value compared to the quick list's +* list items. +* +* Returns: +* Pointer to the list item, if found. +* +* p_list_item if not found. +* +* NOTES +* cl_qlist_find_prev does not remove list items from the list. +* The list item is returned when the function specified by the pfn_func +* parameter returns CL_SUCCESS. The list item from which the search starts is +* excluded from the search. +* +* The function provided by the pfn_func must not perform any list operations, +* as these would corrupt the list. +* +* SEE ALSO +* Quick List, cl_qlist_find_next, cl_qlist_find_from_head, +* cl_qlist_find_from_tail, cl_qlist_end, cl_qlist_apply_func, +* cl_qlist_move_items, cl_list_item_t, cl_pfn_qlist_find_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_find_from_head +* NAME +* cl_qlist_find_from_head +* +* DESCRIPTION +* The cl_qlist_find_from_head function invokes a specified function to +* search for an item, starting at the head of a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_find_from_head(IN const cl_qlist_t * + const p_list, + IN cl_pfn_qlist_find_t + pfn_func, + IN const void *const + context) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + /* CL_ASSERT that a find function is provided. */ + CL_ASSERT(pfn_func); + + return (cl_qlist_find_next(p_list, cl_qlist_end(p_list), pfn_func, + context)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* pfn_func +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_qlist_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context if a +* callback function is provided, or value compared to the quick list's +* list items. +* +* Returns: +* Pointer to the list item, if found. +* +* Pointer to the list end otherwise +* +* NOTES +* cl_qlist_find_from_head does not remove list items from the list. +* The list item is returned when the function specified by the pfn_func +* parameter returns CL_SUCCESS. +* +* The function provided by the pfn_func parameter must not perform any list +* operations, as these would corrupt the list. +* +* SEE ALSO +* Quick List, cl_qlist_find_from_tail, cl_qlist_find_next, cl_qlist_find_prev, +* cl_qlist_end, cl_qlist_apply_func, cl_qlist_move_items, cl_list_item_t, +* cl_pfn_qlist_find_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_find_from_tail +* NAME +* cl_qlist_find_from_tail +* +* DESCRIPTION +* The cl_qlist_find_from_tail function invokes a specified function to +* search for an item, starting at the tail of a quick list. +* +* SYNOPSIS +*/ +static inline cl_list_item_t *cl_qlist_find_from_tail(IN const cl_qlist_t * + const p_list, + IN cl_pfn_qlist_find_t + pfn_func, + IN const void *const + context) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_list); + /* CL_ASSERT that the list was initialized. */ + CL_ASSERT(p_list->state == CL_INITIALIZED); + /* CL_ASSERT that a find function is provided. */ + CL_ASSERT(pfn_func); + + return (cl_qlist_find_prev(p_list, cl_qlist_end(p_list), pfn_func, + context)); +} + +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* pfn_func +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_qlist_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context if a +* callback function is provided, or value compared to the quick list's +* list items. +* +* Returns: +* Pointer to the list item, if found. +* +* Pointer to the list end otherwise +* +* NOTES +* cl_qlist_find_from_tail does not remove list items from the list. +* The list item is returned when the function specified by the pfn_func +* parameter returns CL_SUCCESS. +* +* The function provided by the pfn_func parameter must not perform any list +* operations, as these would corrupt the list. +* +* SEE ALSO +* Quick List, cl_qlist_find_from_head, cl_qlist_find_next, cl_qlist_find_prev, +* cl_qlist_apply_func, cl_qlist_end, cl_qlist_move_items, cl_list_item_t, +* cl_pfn_qlist_find_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_apply_func +* NAME +* cl_qlist_apply_func +* +* DESCRIPTION +* The cl_qlist_apply_func function executes a specified function +* for every list item stored in a quick list. +* +* SYNOPSIS +*/ +void +cl_qlist_apply_func(IN const cl_qlist_t * const p_list, + IN cl_pfn_qlist_apply_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_list +* [in] Pointer to a cl_qlist_t structure. +* +* pfn_func +* [in] Function invoked for every item in the quick list. +* See the cl_pfn_qlist_apply_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* The function provided must not perform any list operations, as these +* would corrupt the quick list. +* +* SEE ALSO +* Quick List, cl_qlist_find_from_head, cl_qlist_find_from_tail, +* cl_qlist_move_items, cl_pfn_qlist_apply_t +*********/ + +/****f* Component Library: Quick List/cl_qlist_move_items +* NAME +* cl_qlist_move_items +* +* DESCRIPTION +* The cl_qlist_move_items function moves list items from one list to +* another based on the return value of a user supplied function. +* +* SYNOPSIS +*/ +void +cl_qlist_move_items(IN cl_qlist_t * const p_src_list, + IN cl_qlist_t * const p_dest_list, + IN cl_pfn_qlist_find_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_src_list +* [in] Pointer to a cl_qlist_t structure from which +* list items are removed. +* +* p_dest_list +* [in] Pointer to a cl_qlist_t structure to which the source +* list items are added. +* +* pfn_func +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_qlist_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* If the function specified by the pfn_func parameter returns CL_SUCCESS, +* the related list item is removed from p_src_list and inserted at the tail +* of the p_dest_list. +* +* The cl_qlist_move_items function continues iterating through p_src_list +* from the last item moved, allowing multiple items to be located and moved +* in a single list iteration. +* +* The function specified by pfn_func must not perform any list operations, +* as these would corrupt the list. +* +* SEE ALSO +* Quick List, cl_qlist_find_from_head, cl_qlist_find_from_tail, +* cl_qlist_apply_func, cl_pfn_qlist_find_t +*********/ + +END_C_DECLS +#endif /* _CL_QUICK_LIST_H_ */ diff --git a/include/complib/cl_qmap.h b/include/complib/cl_qmap.h new file mode 100644 index 0000000..7d86fe7 --- /dev/null +++ b/include/complib/cl_qmap.h @@ -0,0 +1,975 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of quick map, a binary tree where the caller always provides + * all necessary storage. + */ + +#ifndef _CL_QMAP_H_ +#define _CL_QMAP_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Quick Map +* NAME +* Quick Map +* +* DESCRIPTION +* Quick map implements a binary tree that stores user provided cl_map_item_t +* structures. Each item stored in a quick map has a unique 64-bit key +* (duplicates are not allowed). Quick map provides the ability to +* efficiently search for an item given a key. +* +* Quick map does not allocate any memory, and can therefore not fail +* any operations due to insufficient memory. Quick map can thus be useful +* in minimizing the error paths in code. +* +* Quick map is not thread safe, and users must provide serialization when +* adding and removing items from the map. +* +* The quick map functions operate on a cl_qmap_t structure which should be +* treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_qmap_t, cl_map_item_t, cl_map_obj_t +* +* Callbacks: +* cl_pfn_qmap_apply_t +* +* Item Manipulation: +* cl_qmap_set_obj, cl_qmap_obj, cl_qmap_key +* +* Initialization: +* cl_qmap_init +* +* Iteration: +* cl_qmap_end, cl_qmap_head, cl_qmap_tail, cl_qmap_next, cl_qmap_prev +* +* Manipulation: +* cl_qmap_insert, cl_qmap_get, cl_qmap_remove_item, cl_qmap_remove, +* cl_qmap_remove_all, cl_qmap_merge, cl_qmap_delta, cl_qmap_get_next +* +* Search: +* cl_qmap_apply_func +* +* Attributes: +* cl_qmap_count, cl_is_qmap_empty, +*********/ +/****i* Component Library: Quick Map/cl_map_color_t +* NAME +* cl_map_color_t +* +* DESCRIPTION +* The cl_map_color_t enumerated type is used to note the color of +* nodes in a map. +* +* SYNOPSIS +*/ +typedef enum _cl_map_color { + CL_MAP_RED, + CL_MAP_BLACK +} cl_map_color_t; +/* +* VALUES +* CL_MAP_RED +* The node in the map is red. +* +* CL_MAP_BLACK +* The node in the map is black. +* +* SEE ALSO +* Quick Map, cl_map_item_t +*********/ + +/****s* Component Library: Quick Map/cl_map_item_t +* NAME +* cl_map_item_t +* +* DESCRIPTION +* The cl_map_item_t structure is used by maps to store objects. +* +* The cl_map_item_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_map_item { + /* Must be first to allow casting. */ + cl_pool_item_t pool_item; + struct _cl_map_item *p_left; + struct _cl_map_item *p_right; + struct _cl_map_item *p_up; + cl_map_color_t color; + uint64_t key; +#ifdef _DEBUG_ + struct _cl_qmap *p_map; +#endif +} cl_map_item_t; +/* +* FIELDS +* pool_item +* Used to store the item in a doubly linked list, allowing more +* efficient map traversal. +* +* p_left +* Pointer to the map item that is a child to the left of the node. +* +* p_right +* Pointer to the map item that is a child to the right of the node. +* +* p_up +* Pointer to the map item that is the parent of the node. +* +* color +* Indicates whether a node is red or black in the map. +* +* key +* Value that uniquely represents a node in a map. This value is +* set by calling cl_qmap_insert and can be retrieved by calling +* cl_qmap_key. +* +* NOTES +* None of the fields of this structure should be manipulated by users, as +* they are crititcal to the proper operation of the map in which they +* are stored. +* +* To allow storing items in either a quick list, a quick pool, or a quick +* map, the map implementation guarantees that the map item can be safely +* cast to a pool item used for storing an object in a quick pool, or cast +* to a list item used for storing an object in a quick list. This removes +* the need to embed a map item, a list item, and a pool item in objects +* that need to be stored in a quick list, a quick pool, and a quick map. +* +* SEE ALSO +* Quick Map, cl_qmap_insert, cl_qmap_key, cl_pool_item_t, cl_list_item_t +*********/ + +/****s* Component Library: Quick Map/cl_map_obj_t +* NAME +* cl_map_obj_t +* +* DESCRIPTION +* The cl_map_obj_t structure is used to store objects in maps. +* +* The cl_map_obj_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_map_obj { + cl_map_item_t item; + const void *p_object; +} cl_map_obj_t; +/* +* FIELDS +* item +* Map item used by internally by the map to store an object. +* +* p_object +* User defined context. Users should not access this field directly. +* Use cl_qmap_set_obj and cl_qmap_obj to set and retrieve the value +* of this field. +* +* NOTES +* None of the fields of this structure should be manipulated by users, as +* they are crititcal to the proper operation of the map in which they +* are stored. +* +* Use cl_qmap_set_obj and cl_qmap_obj to set and retrieve the object +* stored in a map item, respectively. +* +* SEE ALSO +* Quick Map, cl_qmap_set_obj, cl_qmap_obj, cl_map_item_t +*********/ + +/****s* Component Library: Quick Map/cl_qmap_t +* NAME +* cl_qmap_t +* +* DESCRIPTION +* Quick map structure. +* +* The cl_qmap_t structure should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_qmap { + cl_map_item_t root; + cl_map_item_t nil; + cl_state_t state; + size_t count; +} cl_qmap_t; +/* +* PARAMETERS +* root +* Map item that serves as root of the map. The root is set up to +* always have itself as parent. The left pointer is set to point +* to the item at the root. +* +* nil +* Map item that serves as terminator for all leaves, as well as +* providing the list item used as quick list for storing map items +* in a list for faster traversal. +* +* state +* State of the map, used to verify that operations are permitted. +* +* count +* Number of items in the map. +* +* SEE ALSO +* Quick Map +*********/ + +/****d* Component Library: Quick Map/cl_pfn_qmap_apply_t +* NAME +* cl_pfn_qmap_apply_t +* +* DESCRIPTION +* The cl_pfn_qmap_apply_t function type defines the prototype for +* functions used to iterate items in a quick map. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_qmap_apply_t) (IN cl_map_item_t * const p_map_item, IN void *context); +/* +* PARAMETERS +* p_map_item +* [in] Pointer to a cl_map_item_t structure. +* +* context +* [in] Value passed to the callback function. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_qmap_apply_func +* function. +* +* SEE ALSO +* Quick Map, cl_qmap_apply_func +*********/ + +/****f* Component Library: Quick Map/cl_qmap_count +* NAME +* cl_qmap_count +* +* DESCRIPTION +* The cl_qmap_count function returns the number of items stored +* in a quick map. +* +* SYNOPSIS +*/ +static inline uint32_t cl_qmap_count(IN const cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + return ((uint32_t) p_map->count); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure whose item count to return. +* +* RETURN VALUE +* Returns the number of items stored in the map. +* +* SEE ALSO +* Quick Map, cl_is_qmap_empty +*********/ + +/****f* Component Library: Quick Map/cl_is_qmap_empty +* NAME +* cl_is_qmap_empty +* +* DESCRIPTION +* The cl_is_qmap_empty function returns whether a quick map is empty. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_qmap_empty(IN const cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + return (p_map->count == 0); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure to test for emptiness. +* +* RETURN VALUES +* TRUE if the quick map is empty. +* +* FALSE otherwise. +* +* SEE ALSO +* Quick Map, cl_qmap_count, cl_qmap_remove_all +*********/ + +/****f* Component Library: Quick Map/cl_qmap_set_obj +* NAME +* cl_qmap_set_obj +* +* DESCRIPTION +* The cl_qmap_set_obj function sets the object stored in a map object. +* +* SYNOPSIS +*/ +static inline void +cl_qmap_set_obj(IN cl_map_obj_t * const p_map_obj, + IN const void *const p_object) +{ + CL_ASSERT(p_map_obj); + p_map_obj->p_object = p_object; +} + +/* +* PARAMETERS +* p_map_obj +* [in] Pointer to a map object stucture whose object pointer +* is to be set. +* +* p_object +* [in] User defined context. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Quick Map, cl_qmap_obj +*********/ + +/****f* Component Library: Quick Map/cl_qmap_obj +* NAME +* cl_qmap_obj +* +* DESCRIPTION +* The cl_qmap_obj function returns the object stored in a map object. +* +* SYNOPSIS +*/ +static inline void *cl_qmap_obj(IN const cl_map_obj_t * const p_map_obj) +{ + CL_ASSERT(p_map_obj); + return ((void *)p_map_obj->p_object); +} + +/* +* PARAMETERS +* p_map_obj +* [in] Pointer to a map object stucture whose object pointer to return. +* +* RETURN VALUE +* Returns the value of the object pointer stored in the map object. +* +* SEE ALSO +* Quick Map, cl_qmap_set_obj +*********/ + +/****f* Component Library: Quick Map/cl_qmap_key +* NAME +* cl_qmap_key +* +* DESCRIPTION +* The cl_qmap_key function retrieves the key value of a map item. +* +* SYNOPSIS +*/ +static inline uint64_t cl_qmap_key(IN const cl_map_item_t * const p_item) +{ + CL_ASSERT(p_item); + return (p_item->key); +} + +/* +* PARAMETERS +* p_item +* [in] Pointer to a map item whose key value to return. +* +* RETURN VALUE +* Returns the 64-bit key value for the specified map item. +* +* NOTES +* The key value is set in a call to cl_qmap_insert. +* +* SEE ALSO +* Quick Map, cl_qmap_insert +*********/ + +/****f* Component Library: Quick Map/cl_qmap_init +* NAME +* cl_qmap_init +* +* DESCRIPTION +* The cl_qmap_init function initialized a quick map for use. +* +* SYNOPSIS +*/ +void cl_qmap_init(IN cl_qmap_t * const p_map); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure to initialize. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Allows calling quick map manipulation functions. +* +* SEE ALSO +* Quick Map, cl_qmap_insert, cl_qmap_remove +*********/ + +/****f* Component Library: Quick Map/cl_qmap_end +* NAME +* cl_qmap_end +* +* DESCRIPTION +* The cl_qmap_end function returns the end of a quick map. +* +* SYNOPSIS +*/ +static inline const cl_map_item_t *cl_qmap_end(IN const cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + /* Nil is the end of the map. */ + return (&p_map->nil); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure whose end to return. +* +* RETURN VALUE +* Pointer to the end of the map. +* +* NOTES +* cl_qmap_end is useful for determining the validity of map items returned +* by cl_qmap_head, cl_qmap_tail, cl_qmap_next, or cl_qmap_prev. If the +* map item pointer returned by any of these functions compares to the end, +* the end of the map was encoutered. +* When using cl_qmap_head or cl_qmap_tail, this condition indicates that +* the map is empty. +* +* SEE ALSO +* Quick Map, cl_qmap_head, cl_qmap_tail, cl_qmap_next, cl_qmap_prev +*********/ + +/****f* Component Library: Quick Map/cl_qmap_head +* NAME +* cl_qmap_head +* +* DESCRIPTION +* The cl_qmap_head function returns the map item with the lowest key +* value stored in a quick map. +* +* SYNOPSIS +*/ +static inline cl_map_item_t *cl_qmap_head(IN const cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + return ((cl_map_item_t *) p_map->nil.pool_item.list_item.p_next); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure whose item with the lowest +* key is returned. +* +* RETURN VALUES +* Pointer to the map item with the lowest key in the quick map. +* +* Pointer to the map end if the quick map was empty. +* +* NOTES +* cl_qmap_head does not remove the item from the map. +* +* SEE ALSO +* Quick Map, cl_qmap_tail, cl_qmap_next, cl_qmap_prev, cl_qmap_end, +* cl_qmap_item_t +*********/ + +/****f* Component Library: Quick Map/cl_qmap_tail +* NAME +* cl_qmap_tail +* +* DESCRIPTION +* The cl_qmap_tail function returns the map item with the highest key +* value stored in a quick map. +* +* SYNOPSIS +*/ +static inline cl_map_item_t *cl_qmap_tail(IN const cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + return ((cl_map_item_t *) p_map->nil.pool_item.list_item.p_prev); +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure whose item with the +* highest key is returned. +* +* RETURN VALUES +* Pointer to the map item with the highest key in the quick map. +* +* Pointer to the map end if the quick map was empty. +* +* NOTES +* cl_qmap_end does not remove the item from the map. +* +* SEE ALSO +* Quick Map, cl_qmap_head, cl_qmap_next, cl_qmap_prev, cl_qmap_end, +* cl_qmap_item_t +*********/ + +/****f* Component Library: Quick Map/cl_qmap_next +* NAME +* cl_qmap_next +* +* DESCRIPTION +* The cl_qmap_next function returns the map item with the next higher +* key value than a specified map item. +* +* SYNOPSIS +*/ +static inline cl_map_item_t *cl_qmap_next(IN const cl_map_item_t * const p_item) +{ + CL_ASSERT(p_item); + return ((cl_map_item_t *) p_item->pool_item.list_item.p_next); +} + +/* +* PARAMETERS +* p_item +* [in] Pointer to a map item whose successor to return. +* +* RETURN VALUES +* Pointer to the map item with the next higher key value in a quick map. +* +* Pointer to the map end if the specified item was the last item in +* the quick map. +* +* SEE ALSO +* Quick Map, cl_qmap_head, cl_qmap_tail, cl_qmap_prev, cl_qmap_end, +* cl_map_item_t +*********/ + +/****f* Component Library: Quick Map/cl_qmap_prev +* NAME +* cl_qmap_prev +* +* DESCRIPTION +* The cl_qmap_prev function returns the map item with the next lower +* key value than a precified map item. +* +* SYNOPSIS +*/ +static inline cl_map_item_t *cl_qmap_prev(IN const cl_map_item_t * const p_item) +{ + CL_ASSERT(p_item); + return ((cl_map_item_t *) p_item->pool_item.list_item.p_prev); +} + +/* +* PARAMETERS +* p_item +* [in] Pointer to a map item whose predecessor to return. +* +* RETURN VALUES +* Pointer to the map item with the next lower key value in a quick map. +* +* Pointer to the map end if the specifid item was the first item in +* the quick map. +* +* SEE ALSO +* Quick Map, cl_qmap_head, cl_qmap_tail, cl_qmap_next, cl_qmap_end, +* cl_map_item_t +*********/ + +/****f* Component Library: Quick Map/cl_qmap_insert +* NAME +* cl_qmap_insert +* +* DESCRIPTION +* The cl_qmap_insert function inserts a map item into a quick map. +* NOTE: Only if such a key does not alerady exist in the map !!!! +* +* SYNOPSIS +*/ +cl_map_item_t *cl_qmap_insert(IN cl_qmap_t * const p_map, + IN const uint64_t key, + IN cl_map_item_t * const p_item); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure into which to add the item. +* +* key +* [in] Value to assign to the item. +* +* p_item +* [in] Pointer to a cl_map_item_t stucture to insert into the quick map. +* +* RETURN VALUE +* Pointer to the item in the map with the specified key. If insertion +* was successful, this is the pointer to the item. If an item with the +* specified key already exists in the map, the pointer to that item is +* returned - but the new key is NOT inserted... +* +* NOTES +* Insertion operations may cause the quick map to rebalance. +* +* SEE ALSO +* Quick Map, cl_qmap_remove, cl_map_item_t +*********/ + +/****f* Component Library: Quick Map/cl_qmap_get +* NAME +* cl_qmap_get +* +* DESCRIPTION +* The cl_qmap_get function returns the map item associated with a key. +* +* SYNOPSIS +*/ +cl_map_item_t *cl_qmap_get(IN const cl_qmap_t * const p_map, + IN const uint64_t key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure from which to retrieve the +* item with the specified key. +* +* key +* [in] Key value used to search for the desired map item. +* +* RETURN VALUES +* Pointer to the map item with the desired key value. +* +* Pointer to the map end if there was no item with the desired key value +* stored in the quick map. +* +* NOTES +* cl_qmap_get does not remove the item from the quick map. +* +* SEE ALSO +* Quick Map, cl_qmap_get_next, cl_qmap_remove +*********/ + +/****f* Component Library: Quick Map/cl_qmap_get_next +* NAME +* cl_qmap_get_next +* +* DESCRIPTION +* The cl_qmap_get_next function returns the first map item associated with a +* key > the key specified. +* +* SYNOPSIS +*/ +cl_map_item_t *cl_qmap_get_next(IN const cl_qmap_t * const p_map, + IN const uint64_t key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure from which to retrieve the +* first item with a key > the specified key. +* +* key +* [in] Key value used to search for the desired map item. +* +* RETURN VALUES +* Pointer to the first map item with a key > the desired key value. +* +* Pointer to the map end if there was no item with a key > the desired key +* value stored in the quick map. +* +* NOTES +* cl_qmap_get_next does not remove the item from the quick map. +* +* SEE ALSO +* Quick Map, cl_qmap_get, cl_qmap_remove +*********/ + +/****f* Component Library: Quick Map/cl_qmap_remove_item +* NAME +* cl_qmap_remove_item +* +* DESCRIPTION +* The cl_qmap_remove_item function removes the specified map item +* from a quick map. +* +* SYNOPSIS +*/ +void +cl_qmap_remove_item(IN cl_qmap_t * const p_map, + IN cl_map_item_t * const p_item); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure from which to +* remove item. +* +* p_item +* [in] Pointer to a map item to remove from its quick map. +* +* RETURN VALUES +* This function does not return a value. +* +* In a debug build, cl_qmap_remove_item asserts that the item being removed +* is in the specified map. +* +* NOTES +* Removes the map item pointed to by p_item from its quick map. +* +* SEE ALSO +* Quick Map, cl_qmap_remove, cl_qmap_remove_all, cl_qmap_insert +*********/ + +/****f* Component Library: Quick Map/cl_qmap_remove +* NAME +* cl_qmap_remove +* +* DESCRIPTION +* The cl_qmap_remove function removes the map item with the specified key +* from a quick map. +* +* SYNOPSIS +*/ +cl_map_item_t *cl_qmap_remove(IN cl_qmap_t * const p_map, + IN const uint64_t key); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure from which to remove the item +* with the specified key. +* +* key +* [in] Key value used to search for the map item to remove. +* +* RETURN VALUES +* Pointer to the removed map item if it was found. +* +* Pointer to the map end if no item with the specified key exists in the +* quick map. +* +* SEE ALSO +* Quick Map, cl_qmap_remove_item, cl_qmap_remove_all, cl_qmap_insert +*********/ + +/****f* Component Library: Quick Map/cl_qmap_remove_all +* NAME +* cl_qmap_remove_all +* +* DESCRIPTION +* The cl_qmap_remove_all function removes all items in a quick map, +* leaving it empty. +* +* SYNOPSIS +*/ +static inline void cl_qmap_remove_all(IN cl_qmap_t * const p_map) +{ + CL_ASSERT(p_map); + CL_ASSERT(p_map->state == CL_INITIALIZED); + + p_map->root.p_left = &p_map->nil; + p_map->nil.pool_item.list_item.p_next = &p_map->nil.pool_item.list_item; + p_map->nil.pool_item.list_item.p_prev = &p_map->nil.pool_item.list_item; + p_map->count = 0; +} + +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure to empty. +* +* RETURN VALUES +* This function does not return a value. +* +* SEE ALSO +* Quick Map, cl_qmap_remove, cl_qmap_remove_item +*********/ + +/****f* Component Library: Quick Map/cl_qmap_merge +* NAME +* cl_qmap_merge +* +* DESCRIPTION +* The cl_qmap_merge function moves all items from one map to another, +* excluding duplicates. +* +* SYNOPSIS +*/ +void +cl_qmap_merge(OUT cl_qmap_t * const p_dest_map, + IN OUT cl_qmap_t * const p_src_map); +/* +* PARAMETERS +* p_dest_map +* [out] Pointer to a cl_qmap_t structure to which items should be added. +* +* p_src_map +* [in/out] Pointer to a cl_qmap_t structure whose items to add +* to p_dest_map. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Items are evaluated based on their keys only. +* +* Upon return from cl_qmap_merge, the quick map referenced by p_src_map +* contains all duplicate items. +* +* SEE ALSO +* Quick Map, cl_qmap_delta +*********/ + +/****f* Component Library: Quick Map/cl_qmap_delta +* NAME +* cl_qmap_delta +* +* DESCRIPTION +* The cl_qmap_delta function computes the differences between two maps. +* +* SYNOPSIS +*/ +void +cl_qmap_delta(IN OUT cl_qmap_t * const p_map1, + IN OUT cl_qmap_t * const p_map2, + OUT cl_qmap_t * const p_new, OUT cl_qmap_t * const p_old); +/* +* PARAMETERS +* p_map1 +* [in/out] Pointer to the first of two cl_qmap_t structures whose +* differences to compute. +* +* p_map2 +* [in/out] Pointer to the second of two cl_qmap_t structures whose +* differences to compute. +* +* p_new +* [out] Pointer to an empty cl_qmap_t structure that contains the +* items unique to p_map2 upon return from the function. +* +* p_old +* [out] Pointer to an empty cl_qmap_t structure that contains the +* items unique to p_map1 upon return from the function. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Items are evaluated based on their keys. Items that exist in both +* p_map1 and p_map2 remain in their respective maps. Items that +* exist only p_map1 are moved to p_old. Likewise, items that exist only +* in p_map2 are moved to p_new. This function can be useful in evaluating +* changes between two maps. +* +* Both maps pointed to by p_new and p_old must be empty on input. This +* requirement removes the possibility of failures. +* +* SEE ALSO +* Quick Map, cl_qmap_merge +*********/ + +/****f* Component Library: Quick Map/cl_qmap_apply_func +* NAME +* cl_qmap_apply_func +* +* DESCRIPTION +* The cl_qmap_apply_func function executes a specified function +* for every item stored in a quick map. +* +* SYNOPSIS +*/ +void +cl_qmap_apply_func(IN const cl_qmap_t * const p_map, + IN cl_pfn_qmap_apply_t pfn_func, + IN const void *const context); +/* +* PARAMETERS +* p_map +* [in] Pointer to a cl_qmap_t structure. +* +* pfn_func +* [in] Function invoked for every item in the quick map. +* See the cl_pfn_qmap_apply_t function type declaration for +* details about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* The function provided must not perform any map operations, as these +* would corrupt the quick map. +* +* SEE ALSO +* Quick Map, cl_pfn_qmap_apply_t +*********/ + +END_C_DECLS +#endif /* _CL_QMAP_H_ */ diff --git a/include/complib/cl_qpool.h b/include/complib/cl_qpool.h new file mode 100644 index 0000000..d0d5ad8 --- /dev/null +++ b/include/complib/cl_qpool.h @@ -0,0 +1,611 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of the quick pool. + * The quick pool manages a pool of objects. + * The pool can grow to meet demand, limited only by system memory. + */ + +#ifndef _CL_QUICK_POOL_H_ +#define _CL_QUICK_POOL_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Quick Pool +* NAME +* Quick Pool +* +* DESCRIPTION +* The quick pool provides a self-contained and self-sustaining pool +* of user defined objects. +* +* To aid in object oriented design, the quick pool provides the user +* the ability to specify callbacks that are invoked for each object for +* construction, initialization, and destruction. Constructor and destructor +* callback functions may not fail. +* +* A quick pool does not return memory to the system as the user returns +* objects to the pool. The only method of returning memory to the system is +* to destroy the pool. +* +* The quick pool operates on cl_pool_item_t structures that describe +* objects. This can provides for more efficient memory use and operation. +* If using a cl_pool_item_t is not desired, the Pool provides similar +* functionality but operates on opaque objects. +* +* The quick pool functions operates on a cl_qpool_t structure which should +* be treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_qpool_t, cl_pool_item_t +* +* Callbacks: +* cl_pfn_qpool_init_t, cl_pfn_qpool_dtor_t +* +* Initialization/Destruction: +* cl_qpool_construct, cl_qpool_init, cl_qpool_destroy +* +* Manipulation: +* cl_qpool_get, cl_qpool_put, cl_qpool_put_list, cl_qpool_grow +* +* Attributes: +* cl_is_qpool_inited, cl_qpool_count +*********/ +/****d* Component Library: Quick Pool/cl_pfn_qpool_init_t +* NAME +* cl_pfn_qpool_init_t +* +* DESCRIPTION +* The cl_pfn_qpool_init_t function type defines the prototype for +* functions used as constructor for objects being allocated by a +* quick pool. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_qpool_init_t) (IN void *const p_object, + IN void *context, + OUT cl_pool_item_t ** const pp_pool_item); +/* +* PARAMETERS +* p_object +* [in] Pointer to an object to initialize. +* +* context +* [in] Context provided in a call to cl_qpool_init. +* +* pp_pool_item +* [out] Users should set this pointer to reference the cl_pool_item_t +* structure that represents the composite object. This pointer must +* not be NULL if the function returns CL_SUCCESS. +* +* RETURN VALUES +* Return CL_SUCCESS to indicate that initialization of the object +* was successful and that initialization of further objects may continue. +* +* Other cl_status_t values will be returned by cl_qcpool_init +* and cl_qcpool_grow. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as an optional parameter to the +* cl_qpool_init function. +* +* The initializer is invoked once per allocated object, allowing the user +* to perform any necessary initialization. Returning a status other than +* CL_SUCCESS aborts a grow operation, initiated either through cl_qcpool_init +* or cl_qcpool_grow, causing the initiating function to fail. +* Any non-CL_SUCCESS status will be returned by the function that initiated +* the grow operation. +* +* All memory for the object is pre-allocated. Users should include space in +* their objects for the cl_pool_item_t structure that will represent the +* object to avoid having to allocate that structure in the initialization +* callback. +* +* When later performing a cl_qcpool_get call, the return value is a pointer +* to the cl_pool_item_t returned by this function in the pp_pool_item +* parameter. Users must set pp_pool_item to a valid pointer to the +* cl_pool_item_t representing the object if they return CL_SUCCESS. +* +* SEE ALSO +* Quick Pool, cl_qpool_init +*********/ + +/****d* Component Library: Quick Pool/cl_pfn_qpool_dtor_t +* NAME +* cl_pfn_qpool_dtor_t +* +* DESCRIPTION +* The cl_pfn_qpool_dtor_t function type defines the prototype for +* functions used as destructor for objects being deallocated by a +* quick pool. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_qpool_dtor_t) (IN const cl_pool_item_t * const p_pool_item, + IN void *context); +/* +* PARAMETERS +* p_pool_item +* [in] Pointer to a cl_pool_item_t structure representing an object. +* +* context +* [in] Context provided in a call to cl_qpool_init. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by the user as an optional parameter to the +* cl_qpool_init function. +* +* The destructor is invoked once per allocated object, allowing the user +* to perform any necessary cleanup. Users should not attempt to deallocate +* the memory for the object, as the quick pool manages object +* allocation and deallocation. +* +* SEE ALSO +* Quick Pool, cl_qpool_init +*********/ + +/****s* Component Library: Quick Pool/cl_qpool_t +* NAME +* cl_qpool_t +* +* DESCRIPTION +* Quick pool structure. +* +* The cl_qpool_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_qpool { + cl_qcpool_t qcpool; + cl_pfn_qpool_init_t pfn_init; + cl_pfn_qpool_dtor_t pfn_dtor; + const void *context; +} cl_qpool_t; +/* +* FIELDS +* qcpool +* Quick composite pool that manages all objects. +* +* pfn_init +* Pointer to the user's initializer callback, used by the pool +* to translate the quick composite pool's initializer callback to +* a quick pool initializer callback. +* +* pfn_dtor +* Pointer to the user's destructor callback, used by the pool +* to translate the quick composite pool's destructor callback to +* a quick pool destructor callback. +* +* context +* User's provided context for callback functions, used by the pool +* to when invoking callbacks. +* +* SEE ALSO +* Quick Pool +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_construct +* NAME +* cl_qpool_construct +* +* DESCRIPTION +* The cl_qpool_construct function constructs a quick pool. +* +* SYNOPSIS +*/ +void cl_qpool_construct(IN cl_qpool_t * const p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_qpool_init, cl_qpool_destroy, cl_is_qpool_inited. +* +* Calling cl_qpool_construct is a prerequisite to calling any other +* quick pool function except cl_pool_init. +* +* SEE ALSO +* Quick Pool, cl_qpool_init, cl_qpool_destroy, cl_is_qpool_inited. +*********/ + +/****f* Component Library: Quick Pool/cl_is_qpool_inited +* NAME +* cl_is_qpool_inited +* +* DESCRIPTION +* The cl_is_qpool_inited function returns whether a quick pool was +* successfully initialized. +* +* SYNOPSIS +*/ +static inline uint32_t cl_is_qpool_inited(IN const cl_qpool_t * const p_pool) +{ + /* CL_ASSERT that a non-null pointer is provided. */ + CL_ASSERT(p_pool); + return (cl_is_qcpool_inited(&p_pool->qcpool)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure whose initialization state +* to check. +* +* RETURN VALUES +* TRUE if the quick pool was initialized successfully. +* +* FALSE otherwise. +* +* NOTES +* Allows checking the state of a quick pool to determine if +* invoking member functions is appropriate. +* +* SEE ALSO +* Quick Pool +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_init +* NAME +* cl_qpool_init +* +* DESCRIPTION +* The cl_qpool_init function initializes a quick pool for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_qpool_init(IN cl_qpool_t * const p_pool, + IN const size_t min_size, + IN const size_t max_size, + IN const size_t grow_size, + IN const size_t object_size, + IN cl_pfn_qpool_init_t pfn_initializer OPTIONAL, + IN cl_pfn_qpool_dtor_t pfn_destructor OPTIONAL, + IN const void *const context); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure to initialize. +* +* min_size +* [in] Minimum number of objects that the pool should support. All +* necessary allocations to allow storing the minimum number of items +* are performed at initialization time, and all necessary callbacks +* successfully invoked. +* +* max_size +* [in] Maximum number of objects to which the pool is allowed to grow. +* A value of zero specifies no maximum. +* +* grow_size +* [in] Number of objects to allocate when incrementally growing the pool. +* A value of zero disables automatic growth. +* +* object_size +* [in] Size, in bytes, of each object. +* +* pfn_initializer +* [in] Initialization callback to invoke for every new object when +* growing the pool. This parameter is optional and may be NULL. If NULL, +* the pool assumes the cl_pool_item_t structure describing objects is +* located at the head of each object. See the cl_pfn_qpool_init_t +* function type declaration for details about the callback function. +* +* pfn_destructor +* [in] Destructor callback to invoke for every object before memory for +* that object is freed. This parameter is optional and may be NULL. +* See the cl_pfn_qpool_dtor_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUES +* CL_SUCCESS if the quick pool was initialized successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to initialize the +* quick pool. +* +* CL_INVALID_SETTING if a the maximum size is non-zero and less than the +* minimum size. +* +* Other cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter. +* +* NOTES +* cl_qpool_init initializes, and if necessary, grows the pool to +* the capacity desired. +* +* SEE ALSO +* Quick Pool, cl_qpool_construct, cl_qpool_destroy, +* cl_qpool_get, cl_qpool_put, cl_qpool_grow, +* cl_qpool_count, cl_pfn_qpool_init_t, cl_pfn_qpool_init_t, +* cl_pfn_qpool_dtor_t +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_destroy +* NAME +* cl_qpool_destroy +* +* DESCRIPTION +* The cl_qpool_destroy function destroys a quick pool. +* +* SYNOPSIS +*/ +static inline void cl_qpool_destroy(IN cl_qpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + cl_qcpool_destroy(&p_pool->qcpool); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* All memory allocated for objects is freed. The destructor callback, +* if any, will be invoked for every allocated object. Further operations +* on the pool should not be attempted after cl_qpool_destroy +* is invoked. +* +* This function should only be called after a call to +* cl_qpool_construct or cl_qpool_init. +* +* In a debug build, cl_qpool_destroy asserts that all objects are in +* the pool. +* +* SEE ALSO +* Quick Pool, cl_qpool_construct, cl_qpool_init +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_count +* NAME +* cl_qpool_count +* +* DESCRIPTION +* The cl_qpool_count function returns the number of available objects +* in a quick pool. +* +* SYNOPSIS +*/ +static inline size_t cl_qpool_count(IN cl_qpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + return (cl_qcpool_count(&p_pool->qcpool)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure for which the number of +* available objects is requested. +* +* RETURN VALUE +* Returns the number of objects available in the specified quick pool. +* +* SEE ALSO +* Quick Pool +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_get +* NAME +* cl_qpool_get +* +* DESCRIPTION +* The cl_qpool_get function retrieves an object from a +* quick pool. +* +* SYNOPSIS +*/ +static inline cl_pool_item_t *cl_qpool_get(IN cl_qpool_t * const p_pool) +{ + CL_ASSERT(p_pool); + return (cl_qcpool_get(&p_pool->qcpool)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure from which to retrieve +* an object. +* +* RETURN VALUES +* Returns a pointer to a cl_pool_item_t for an object. +* +* Returns NULL if the pool is empty and can not be grown automatically. +* +* NOTES +* cl_qpool_get returns the object at the head of the pool. If the pool is +* empty, it is automatically grown to accommodate this request unless the +* grow_size parameter passed to the cl_qpool_init function was zero. +* +* SEE ALSO +* Quick Pool, cl_qpool_get_tail, cl_qpool_put, cl_qpool_grow, cl_qpool_count +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_put +* NAME +* cl_qpool_put +* +* DESCRIPTION +* The cl_qpool_put function returns an object to the head of a quick pool. +* +* SYNOPSIS +*/ +static inline void +cl_qpool_put(IN cl_qpool_t * const p_pool, + IN cl_pool_item_t * const p_pool_item) +{ + CL_ASSERT(p_pool); + cl_qcpool_put(&p_pool->qcpool, p_pool_item); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure to which to return +* an object. +* +* p_pool_item +* [in] Pointer to a cl_pool_item_t structure for the object +* being returned. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_qpool_put places the returned object at the head of the pool. +* +* The object specified by the p_pool_item parameter must have been +* retrieved from the pool by a previous call to cl_qpool_get. +* +* SEE ALSO +* Quick Pool, cl_qpool_put_tail, cl_qpool_get +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_put_list +* NAME +* cl_qpool_put_list +* +* DESCRIPTION +* The cl_qpool_put_list function returns a list of objects to the head +* of a quick pool. +* +* SYNOPSIS +*/ +static inline void +cl_qpool_put_list(IN cl_qpool_t * const p_pool, IN cl_qlist_t * const p_list) +{ + CL_ASSERT(p_pool); + cl_qcpool_put_list(&p_pool->qcpool, p_list); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure to which to return +* a list of objects. +* +* p_list +* [in] Pointer to a cl_qlist_t structure for the list of objects +* being returned. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_qpool_put_list places the returned objects at the head of the pool. +* +* The objects in the list specified by the p_list parameter must have been +* retrieved from the pool by a previous call to cl_qpool_get. +* +* SEE ALSO +* Quick Pool, cl_qpool_put, cl_qpool_put_tail, cl_qpool_get +*********/ + +/****f* Component Library: Quick Pool/cl_qpool_grow +* NAME +* cl_qpool_grow +* +* DESCRIPTION +* The cl_qpool_grow function grows a quick pool by +* the specified number of objects. +* +* SYNOPSIS +*/ +static inline cl_status_t +cl_qpool_grow(IN cl_qpool_t * const p_pool, IN const size_t obj_count) +{ + CL_ASSERT(p_pool); + return (cl_qcpool_grow(&p_pool->qcpool, obj_count)); +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to a cl_qpool_t structure whose capacity to grow. +* +* obj_count +* [in] Number of objects by which to grow the pool. +* +* RETURN VALUES +* CL_SUCCESS if the quick pool grew successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to grow the +* quick pool. +* +* cl_status_t value returned by optional initialization callback function +* specified by the pfn_initializer parameter passed to the +* cl_qpool_init function. +* +* NOTES +* It is not necessary to call cl_qpool_grow if the pool is +* configured to grow automatically. +* +* SEE ALSO +* Quick Pool +*********/ + +END_C_DECLS +#endif /* _CL_QUICK_POOL_H_ */ diff --git a/include/complib/cl_spinlock.h b/include/complib/cl_spinlock.h new file mode 100644 index 0000000..4de6f26 --- /dev/null +++ b/include/complib/cl_spinlock.h @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of spin lock object. + */ + +#ifndef _CL_SPINLOCK_H_ +#define _CL_SPINLOCK_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Public/Spinlock +* NAME +* Spinlock +* +* DESCRIPTION +* Spinlock provides synchronization between threads for exclusive access to +* a resource. +* +* The spinlock functions manipulate a cl_spinlock_t structure which should +* be treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_spinlock_t +* +* Initialization: +* cl_spinlock_construct, cl_spinlock_init, cl_spinlock_destroy +* +* Manipulation +* cl_spinlock_acquire, cl_spinlock_release +*********/ +/****f* Component Library: Spinlock/cl_spinlock_construct +* NAME +* cl_spinlock_construct +* +* DESCRIPTION +* The cl_spinlock_construct function initializes the state of a +* spin lock. +* +* SYNOPSIS +*/ +void cl_spinlock_construct(IN cl_spinlock_t * const p_spinlock); +/* +* PARAMETERS +* p_spin_lock +* [in] Pointer to a spin lock structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_spinlock_destroy without first calling +* cl_spinlock_init. +* +* Calling cl_spinlock_construct is a prerequisite to calling any other +* spin lock function except cl_spinlock_init. +* +* SEE ALSO +* Spinlock, cl_spinlock_init, cl_spinlock_destroy +*********/ + +/****f* Component Library: Spinlock/cl_spinlock_init +* NAME +* cl_spinlock_init +* +* DESCRIPTION +* The cl_spinlock_init function initializes a spin lock for use. +* +* SYNOPSIS +*/ +cl_status_t cl_spinlock_init(IN cl_spinlock_t * const p_spinlock); +/* +* PARAMETERS +* p_spin_lock +* [in] Pointer to a spin lock structure to initialize. +* +* RETURN VALUES +* CL_SUCCESS if initialization succeeded. +* +* CL_ERROR if initialization failed. Callers should call +* cl_spinlock_destroy to clean up any resources allocated during +* initialization. +* +* NOTES +* Initialize the spin lock structure. Allows calling cl_spinlock_aquire +* and cl_spinlock_release. +* +* SEE ALSO +* Spinlock, cl_spinlock_construct, cl_spinlock_destroy, +* cl_spinlock_acquire, cl_spinlock_release +*********/ + +/****f* Component Library: Spinlock/cl_spinlock_destroy +* NAME +* cl_spinlock_destroy +* +* DESCRIPTION +* The cl_spinlock_destroy function performs all necessary cleanup of a +* spin lock. +* +* SYNOPSIS +*/ +void cl_spinlock_destroy(IN cl_spinlock_t * const p_spinlock); +/* +* PARAMETERS +* p_spin_lock +* [in] Pointer to a spin lock structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of a spin lock. This function must only +* be called if either cl_spinlock_construct or cl_spinlock_init has been +* called. +* +* SEE ALSO +* Spinlock, cl_spinlock_construct, cl_spinlock_init +*********/ + +/****f* Component Library: Spinlock/cl_spinlock_acquire +* NAME +* cl_spinlock_acquire +* +* DESCRIPTION +* The cl_spinlock_acquire function acquires a spin lock. +* This version of lock does not prevent an interrupt from +* occuring on the processor on which the code is being +* executed. +* +* SYNOPSIS +*/ +void cl_spinlock_acquire(IN cl_spinlock_t * const p_spinlock); +/* +* PARAMETERS +* p_spin_lock +* [in] Pointer to a spin lock structure to acquire. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Spinlock, cl_spinlock_release +*********/ + +/****f* Component Library: Spinlock/cl_spinlock_release +* NAME +* cl_spinlock_release +* +* DESCRIPTION +* The cl_spinlock_release function releases a spin lock object. +* +* SYNOPSIS +*/ +void cl_spinlock_release(IN cl_spinlock_t * const p_spinlock); +/* +* PARAMETERS +* p_spin_lock +* [in] Pointer to a spin lock structure to release. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Releases a spin lock after a call to cl_spinlock_acquire. +* +* SEE ALSO +* Spinlock, cl_spinlock_acquire +*********/ + +END_C_DECLS +#endif /* _CL_SPINLOCK_H_ */ diff --git a/include/complib/cl_spinlock_osd.h b/include/complib/cl_spinlock_osd.h new file mode 100644 index 0000000..beb6405 --- /dev/null +++ b/include/complib/cl_spinlock_osd.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of spin lock object. + */ + +#ifndef _CL_SPINLOCK_OSD_H_ +#define _CL_SPINLOCK_OSD_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#include +#include /* usr/include/ */ +typedef struct _cl_spinlock_t { + pthread_mutex_t mutex; + cl_state_t state; +} cl_spinlock_t; + +END_C_DECLS +#endif /* _CL_SPINLOCK_OSD_H_ */ diff --git a/include/complib/cl_thread.h b/include/complib/cl_thread.h new file mode 100644 index 0000000..0a622a1 --- /dev/null +++ b/include/complib/cl_thread.h @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of thread abstraction and thread related operations. + */ + +#ifndef _CL_THREAD_H_ +#define _CL_THREAD_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****i* Component Library/Thread +* NAME +* Thread +* +* DESCRIPTION +* The Thread provides a separate thread of execution. +* +* The cl_thread_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +*********/ +/****d* Component Library: Thread/cl_pfn_thread_callback_t +* NAME +* cl_pfn_thread_callback_t +* +* DESCRIPTION +* The cl_pfn_thread_callback_t function type defines the prototype +* for functions invoked by thread objects +* +* SYNOPSIS +*/ +typedef void (*cl_pfn_thread_callback_t) (IN void *context); +/* +* PARAMETERS +* context +* [in] Value specified in a call to cl_thread_init. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the function provided by users as a parameter to cl_thread_init. +* +* SEE ALSO +* Thread Pool +*********/ + +/****i* Component Library: Thread/cl_thread_t +* NAME +* cl_thread_t +* +* DESCRIPTION +* Thread structure. +* +* The cl_thread_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_thread { + cl_thread_osd_t osd; + cl_pfn_thread_callback_t pfn_callback; + const void *context; + char name[16]; +} cl_thread_t; +/* +* FIELDS +* osd +* Implementation specific structure for managing thread information. +* +* pfn_callback +* Callback function for the thread to invoke. +* +* context +* Context to pass to the thread callback function. +* +* name +* Name to assign to the thread. +* +* SEE ALSO +* Thread +*********/ + +/****i* Component Library: Thread/cl_thread_construct +* NAME +* cl_thread_construct +* +* DESCRIPTION +* The cl_thread_construct function initializes the state of a thread. +* +* SYNOPSIS +*/ +void cl_thread_construct(IN cl_thread_t * const p_thread); +/* +* PARAMETERS +* p_thread +* [in] Pointer to a cl_thread_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_thread_destroy without first calling cl_thread_init. +* +* Calling cl_thread_construct is a prerequisite to calling any other +* thread function except cl_thread_init. +* +* SEE ALSO +* Thread, cl_thread_init, cl_thread_destroy +*********/ + +/****i* Component Library: Thread/cl_thread_init +* NAME +* cl_thread_init +* +* DESCRIPTION +* The cl_thread_init function creates a new thread of execution. +* +* SYNOPSIS +*/ +cl_status_t +cl_thread_init(IN cl_thread_t * const p_thread, + IN cl_pfn_thread_callback_t pfn_callback, + IN const void *const context, IN const char *const name); +/* +* PARAMETERS +* p_thread +* [in] Pointer to a cl_thread_t structure to initialize. +* +* pfn_callback +* [in] Address of a function to be invoked by a thread. +* See the cl_pfn_thread_callback_t function type definition for +* details about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* name +* [in] Name to associate with the thread. The name may be up to 16 +* characters, including a terminating null character. +* +* RETURN VALUES +* CL_SUCCESS if thread creation succeeded. +* +* CL_ERROR if thread creation failed. +* +* NOTES +* The thread created with cl_thread_init will invoke the callback +* specified by the callback parameter with context as single parameter. +* +* The callback function is invoked once, and the thread exits when the +* callback returns. +* +* It is invalid to call cl_thread_destroy from the callback function, +* as doing so will result in a deadlock. +* +* SEE ALSO +* Thread, cl_thread_construct, cl_thread_destroy, cl_thread_suspend, +* cl_thread_stall, cl_pfn_thread_callback_t +*********/ + +/****i* Component Library: Thread/cl_thread_destroy +* NAME +* cl_thread_destroy +* +* DESCRIPTION +* The cl_thread_destroy function performs any necessary cleanup to free +* resources associated with the specified thread. +* +* SYNOPSIS +*/ +void cl_thread_destroy(IN cl_thread_t * const p_thread); +/* +* PARAMETERS +* p_thread +* [in] Pointer to a cl_thread_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function blocks until the thread exits and must not be called by the +* thread itself. Callers must therefore ensure that such a blocking call is +* possible from the context of the call. +* +* This function must only be called after a call to cl_thread_construct or +* cl_thread_init. +* +* SEE ALSO +* Thread, cl_thread_construct, cl_thread_init +*********/ + +/****f* Component Library: Thread/cl_thread_suspend +* NAME +* cl_thread_suspend +* +* DESCRIPTION +* The cl_thread_suspend function suspends the calling thread for a minimum +* of the specified number of milliseconds. +* +* SYNOPSIS +*/ +void cl_thread_suspend(IN const uint32_t pause_ms); +/* +* PARAMETERS +* pause_ms +* [in] Number of milliseconds to suspend the calling thread. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function should only be called if it is valid for the caller's thread +* to enter a wait state. For stalling a thread that cannot enter a wait +* state, callers should use cl_thread_stall. +* +* SEE ALSO +* Thread, cl_thread_stall +*********/ + +/****f* Component Library: Thread/cl_thread_stall +* NAME +* cl_thread_stall +* +* DESCRIPTION +* The cl_thread_stall function stalls the calling thread for a minimum of +* the specified number of microseconds. +* +* SYNOPSIS +*/ +void cl_thread_stall(IN const uint32_t pause_us); +/* +* PARAMETERS +* pause_us +* [in] Number of microseconds to stall the calling thread. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* The cl_thread_stall function performs a busy wait for the specified +* number of microseconds. Care should be taken when using this function as +* it does not relinquish its quantum of operation. For longer wait +* operations, users should call cl_thread_suspend if possible. +* +* SEE ALSO +* Thread, cl_thread_suspend +*********/ + +/****f* Component Library: Thread/cl_proc_count +* NAME +* cl_proc_count +* +* DESCRIPTION +* The cl_proc_count function returns the number of processors in the system. +* +* SYNOPSIS +*/ +int cl_proc_count(void); +/* +* RETURN VALUE +* Returns the number of processors in the system. +*********/ + +/****i* Component Library: Thread/cl_is_current_thread +* NAME +* cl_is_current_thread +* +* DESCRIPTION +* The cl_is_current_thread function compares the calling thread to the +* specified thread and returns whether they are the same. +* +* SYNOPSIS +*/ +boolean_t cl_is_current_thread(IN const cl_thread_t * const p_thread); +/* +* PARAMETERS +* p_thread +* [in] Pointer to a cl_thread_t structure to compare to the +* caller's thead. +* +* RETURN VALUES +* TRUE if the thread specified by the p_thread parameter is the +* calling thread. +* +* FALSE otherwise. +* +* SEE ALSO +* Thread, cl_threadinit_t +*********/ + +/****f* Component Library: Thread/cl_is_blockable +* NAME +* cl_is_blockable +* +* DESCRIPTION +* The cl_is_blockable indicates if the current caller context is +* blockable. +* +* SYNOPSIS +*/ +boolean_t cl_is_blockable(void); +/* +* RETURN VALUE +* TRUE +* Current caller context can be blocked, i.e it is safe to perform +* a sleep, or call a down operation on a semaphore. +* +*********/ + +END_C_DECLS +#endif /* _CL_THREAD_H_ */ diff --git a/include/complib/cl_thread_osd.h b/include/complib/cl_thread_osd.h new file mode 100644 index 0000000..ad7df90 --- /dev/null +++ b/include/complib/cl_thread_osd.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of thread abstraction and thread related operations. + */ + +#ifndef _CL_THREAD_OSD_H_ +#define _CL_THREAD_OSD_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#include +#include +#include +/* Linux user mode thread object structure definition. */ +typedef struct _cl_thread_osd_t { + pthread_t id; + cl_state_t state; +} cl_thread_osd_t; + +static inline boolean_t cl_is_blockable(void) +{ + return TRUE; +} + +END_C_DECLS +#endif /* _CL_THREAD_OSD_H_ */ diff --git a/include/complib/cl_threadpool.h b/include/complib/cl_threadpool.h new file mode 100644 index 0000000..3ee6cf4 --- /dev/null +++ b/include/complib/cl_threadpool.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of thread pool. + */ + +#ifndef _CL_THREAD_POOL_H_ +#define _CL_THREAD_POOL_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Thread Pool +* NAME +* Thread Pool +* +* DESCRIPTION +* The Thread Pool manages a user specified number of threads. +* +* Each thread in the thread pool waits for a user initiated signal before +* invoking a user specified callback function. All threads in the thread +* pool invoke the same callback function. +* +* The thread pool functions operate on a cl_thread_pool_t structure which +* should be treated as opaque, and should be manipulated only through the +* provided functions. +* +* SEE ALSO +* Structures: +* cl_thread_pool_t +* +* Initialization: +* cl_thread_pool_init, cl_thread_pool_destroy +* +* Manipulation +* cl_thread_pool_signal +*********/ +/****s* Component Library: Thread Pool/cl_thread_pool_t +* NAME +* cl_thread_pool_t +* +* DESCRIPTION +* Thread pool structure. +* +* The cl_thread_pool_t structure should be treated as opaque, and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_thread_pool { + void (*pfn_callback) (void *); + void *context; + unsigned running_count; + unsigned events; + pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_t *tid; +} cl_thread_pool_t; +/* +* FIELDS +* pfn_callback +* Callback function for the thread to invoke. +* +* context +* Context to pass to the thread callback function. +* +* running_count +* Number of threads running. +* +* events +* events counter +* +* mutex +* mutex for cond variable protection +* +* cond +* conditional variable to signal an event to thread +* +* tid +* array of allocated thread ids. +* +* SEE ALSO +* Thread Pool +*********/ + +/****f* Component Library: Thread Pool/cl_thread_pool_init +* NAME +* cl_thread_pool_init +* +* DESCRIPTION +* The cl_thread_pool_init function creates the threads to be +* managed by a thread pool. +* +* SYNOPSIS +*/ +cl_status_t +cl_thread_pool_init(IN cl_thread_pool_t * const p_thread_pool, + IN unsigned count, + IN void (*pfn_callback) (void *), + IN void *context, IN const char *const name); +/* +* PARAMETERS +* p_thread_pool +* [in] Pointer to a thread pool structure to initialize. +* +* thread_count +* [in] Number of threads to be managed by the thread pool. +* +* pfn_callback +* [in] Address of a function to be invoked by a thread. +* See the cl_pfn_thread_callback_t function type definition for +* details about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* name +* [in] Name to associate with the threads. The name may be up to 16 +* characters, including a terminating null character. All threads +* created in the pool have the same name. +* +* RETURN VALUES +* CL_SUCCESS if the thread pool creation succeeded. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to inititalize +* the thread pool. +* +* CL_ERROR if the threads could not be created. +* +* NOTES +* cl_thread_pool_init creates and starts the specified number of threads. +* If thread_count is zero, the thread pool creates as many threads as there +* are processors in the system. +* +* SEE ALSO +* Thread Pool, cl_thread_pool_destroy, +* cl_thread_pool_signal, cl_pfn_thread_callback_t +*********/ + +/****f* Component Library: Thread Pool/cl_thread_pool_destroy +* NAME +* cl_thread_pool_destroy +* +* DESCRIPTION +* The cl_thread_pool_destroy function performs any necessary cleanup +* for a thread pool. +* +* SYNOPSIS +*/ +void cl_thread_pool_destroy(IN cl_thread_pool_t * const p_thread_pool); +/* +* PARAMETERS +* p_thread_pool +* [in] Pointer to a thread pool structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function blocks until all threads exit, and must therefore not +* be called from any of the thread pool's threads. Because of its blocking +* nature, callers of cl_thread_pool_destroy must ensure that entering a wait +* state is valid from the calling thread context. +* +* This function should only be called after a call to +* cl_thread_pool_init. +* +* SEE ALSO +* Thread Pool, cl_thread_pool_init +*********/ + +/****f* Component Library: Thread Pool/cl_thread_pool_signal +* NAME +* cl_thread_pool_signal +* +* DESCRIPTION +* The cl_thread_pool_signal function signals a single thread of +* the thread pool to invoke the thread pool's callback function. +* +* SYNOPSIS +*/ +cl_status_t cl_thread_pool_signal(IN cl_thread_pool_t * const p_thread_pool); +/* +* PARAMETERS +* p_thread_pool +* [in] Pointer to a thread pool structure to signal. +* +* RETURN VALUES +* CL_SUCCESS if the thread pool was successfully signalled. +* +* CL_ERROR otherwise. +* +* NOTES +* Each call to this function wakes up at most one waiting thread in +* the thread pool. +* +* If all threads are running, cl_thread_pool_signal has no effect. +* +* SEE ALSO +* Thread Pool +*********/ + +END_C_DECLS +#endif /* _CL_THREAD_POOL_H_ */ diff --git a/include/complib/cl_timer.h b/include/complib/cl_timer.h new file mode 100644 index 0000000..194e374 --- /dev/null +++ b/include/complib/cl_timer.h @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of timer abstraction. + */ + +#ifndef _CL_TIMER_H_ +#define _CL_TIMER_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Timer +* NAME +* Timer +* +* DESCRIPTION +* The Timer provides the ability to schedule a function to be invoked at +* a given time in the future. +* +* The timer callback function must not perform any blocking operations. +* +* The timer functions operate on a cl_timer_t structure which should be +* treated as opaque and should be manipulated only through the provided +* functions. +* +* SEE ALSO +* Structures: +* cl_timer_t +* +* Callbacks: +* cl_pfn_timer_callback_t +* +* Initialization: +* cl_timer_construct, cl_timer_init, cl_timer_destroy +* +* Manipulation: +* cl_timer_start, cl_timer_stop +*********/ +/****d* Component Library: Timer/cl_pfn_timer_callback_t +* NAME +* cl_pfn_timer_callback_t +* +* DESCRIPTION +* The cl_pfn_timer_callback_t function type defines the prototype for +* functions used to notify users of a timer expiration. +* +* SYNOPSIS +*/ +typedef void (*cl_pfn_timer_callback_t) (IN void *context); +/* +* PARAMETERS +* context +* [in] Value specified in a previous call to cl_timer_init. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_timer_init function. +* +* SEE ALSO +* Timer, cl_timer_init +*********/ + +/* + * This include file defines the timer structure, and depends on the timer + * callback definition. + */ +#include + +/****f* Component Library: Timer/cl_timer_construct +* NAME +* cl_timer_construct +* +* DESCRIPTION +* The cl_timer_construct function initializes the state of a timer. +* +* SYNOPSIS +*/ +void cl_timer_construct(IN cl_timer_t * const p_timer); +/* +* PARAMETERS +* p_timer +* [in] Pointer to a cl_timer_t structure whose state to initialize. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_timer_destroy without first calling cl_timer_init. +* +* Calling cl_timer_construct is a prerequisite to calling any other +* timer function except cl_timer_init. +* +* SEE ALSO +* Timer, cl_timer_init, cl_timer_destroy +*********/ + +/****f* Component Library: Timer/cl_timer_init +* NAME +* cl_timer_init +* +* DESCRIPTION +* The cl_timer_init function initializes a timer for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_timer_init(IN cl_timer_t * const p_timer, + IN cl_pfn_timer_callback_t pfn_callback, + IN const void *const context); +/* +* PARAMETERS +* p_timer +* [in] Pointer to a cl_timer_t structure to initialize. +* +* pfn_callback +* [in] Address of a callback function to be invoked when a timer expires. +* See the cl_pfn_timer_callback_t function type definition for details +* about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* RETURN VALUES +* CL_SUCCESS if the timer was successfully initialized. +* +* CL_ERROR otherwise. +* +* NOTES +* Allows calling cl_timer_start and cl_timer_stop. +* +* SEE ALSO +* Timer, cl_timer_construct, cl_timer_destroy, cl_timer_start, +* cl_timer_stop, cl_pfn_timer_callback_t +*********/ + +/****f* Component Library: Timer/cl_timer_destroy +* NAME +* cl_timer_destroy +* +* DESCRIPTION +* The cl_timer_destroy function performs any necessary cleanup of a timer. +* +* SYNOPSIS +*/ +void cl_timer_destroy(IN cl_timer_t * const p_timer); +/* +* PARAMETERS +* p_timer +* [in] Pointer to a cl_timer_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_timer_destroy cancels any pending callbacks. +* +* This function should only be called after a call to cl_timer_construct +* or cl_timer_init. +* +* SEE ALSO +* Timer, cl_timer_construct, cl_timer_init +*********/ + +/****f* Component Library: Timer/cl_timer_start +* NAME +* cl_timer_start +* +* DESCRIPTION +* The cl_timer_start function sets a timer to expire after a given interval. +* +* SYNOPSIS +*/ +cl_status_t +cl_timer_start(IN cl_timer_t * const p_timer, IN const uint32_t time_ms); +/* +* PARAMETERS +* p_timer +* [in] Pointer to a cl_timer_t structure to schedule. +* +* time_ms +* [in] Time, in milliseconds, before the timer should expire. +* +* RETURN VALUES +* CL_SUCCESS if the timer was successfully scheduled. +* +* CL_ERROR otherwise. +* +* NOTES +* cl_timer_start implicitly stops the timer before being scheduled. +* +* The interval specified by the time_ms parameter is a minimum interval. +* The timer is guaranteed to expire no sooner than the desired interval, but +* may take longer to expire. +* +* SEE ALSO +* Timer, cl_timer_stop, cl_timer_trim +*********/ + +/****f* Component Library: Timer/cl_timer_stop +* NAME +* cl_timer_stop +* +* DESCRIPTION +* The cl_timer_stop function stops a pending timer from expiring. +* +* SYNOPSIS +*/ +void cl_timer_stop(IN cl_timer_t * const p_timer); +/* +* PARAMETERS +* p_timer +* [in] Pointer to a cl_timer_t structure. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Timer, cl_timer_start, cl_timer_trim +*********/ + +/****f* Component Library: Timer/cl_timer_trim +* NAME +* cl_timer_trim +* +* DESCRIPTION +* The cl_timer_trim function pulls in the absolute expiration +* time of a timer if the current expiration time exceeds the specified +* interval. +* +* sets a timer to expire after a given +* interval if that interval is less than the current timer expiration. +* +* SYNOPSIS +*/ +cl_status_t +cl_timer_trim(IN cl_timer_t * const p_timer, IN const uint32_t time_ms); +/* +* PARAMETERS +* p_timer +* [in] Pointer to a cl_timer_t structure to schedule. +* +* time_ms +* [in] Maximum time, in milliseconds, before the timer should expire. +* +* RETURN VALUES +* CL_SUCCESS if the timer was successfully scheduled. +* +* CL_ERROR otherwise. +* +* NOTES +* cl_timer_trim has no effect if the time interval is greater than the +* remaining time when the timer is set. +* +* If the new interval time is less than the remaining time, cl_timer_trim +* implicitly stops the timer before resetting it. +* +* If the timer is reset, it is guaranteed to expire no sooner than the +* new interval, but may take longer to expire. +* +* SEE ALSO +* Timer, cl_timer_start, cl_timer_stop +*********/ + +/****f* Component Library: Time Stamp/cl_get_time_stamp +* NAME +* cl_get_time_stamp +* +* DESCRIPTION +* The cl_get_time_stamp function returns the current time stamp in +* microseconds since the system was booted. +* +* SYNOPSIS +*/ +uint64_t cl_get_time_stamp(void); +/* +* RETURN VALUE +* Time elapsed, in microseconds, since the system was booted. +* +* SEE ALSO +* Timer, cl_get_time_stamp_sec +*********/ + +/****f* Component Library: Time Stamp/cl_get_time_stamp_sec +* NAME +* cl_get_time_stamp_sec +* +* DESCRIPTION +* The cl_get_time_stamp_sec function returns the current time stamp in +* seconds since the system was booted. +* +* SYNOPSIS +*/ +uint32_t cl_get_time_stamp_sec(void); +/* +* RETURN VALUE +* Time elapsed, in seconds, since the system was booted. +* +* SEE ALSO +* Timer, cl_get_time_stamp +*********/ + +END_C_DECLS +#endif /* _CL_TIMER_H_ */ diff --git a/include/complib/cl_timer_osd.h b/include/complib/cl_timer_osd.h new file mode 100644 index 0000000..ed36fea --- /dev/null +++ b/include/complib/cl_timer_osd.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of timer object. + */ + +#ifndef _CL_TIMER_OSD_H_ +#define _CL_TIMER_OSD_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#include +#include +typedef enum _cl_timer_state { + CL_TIMER_IDLE, + CL_TIMER_QUEUED, + CL_TIMER_RUNNING +} cl_timer_state_t; + +typedef struct _cl_timer_t { + cl_list_item_t list_item; + cl_timer_state_t timer_state; + cl_state_t state; + cl_pfn_timer_callback_t pfn_callback; + const void *context; + pthread_cond_t cond; + struct timespec timeout; +} cl_timer_t; + +/* Internal functions to create the timer provider. */ +cl_status_t __cl_timer_prov_create(void); + +/* Internal function to destroy the timer provider. */ +void __cl_timer_prov_destroy(void); + +END_C_DECLS +#endif /* _CL_TIMER_OSD_H_ */ diff --git a/include/complib/cl_types.h b/include/complib/cl_types.h new file mode 100644 index 0000000..70af7f8 --- /dev/null +++ b/include/complib/cl_types.h @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Defines standard return codes, keywords, macros, and debug levels. + */ + +#ifdef __WIN__ +#pragma warning(disable : 4996) +#endif + +#ifndef _CL_TYPES_H_ +#define _CL_TYPES_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#include +#include +typedef uint16_t net16_t; +typedef uint32_t net32_t; +typedef uint64_t net64_t; + +/* explicit cast of void* to uint32_t */ +#ifndef ASSERT_VOIDP2UINTN +#if __WORDSIZE == 64 +#define ASSERT_VOIDP2UINTN(var) \ + CL_ASSERT( (intptr_t)var <= 0xffffffffffffffffL ) +#else /* __WORDSIZE == 64 */ +#if __WORDSIZE == 32 + /* need to cast carefully to avoid the warining of un-needed check */ +#define ASSERT_VOIDP2UINTN(var) \ + CL_ASSERT( (intptr_t)var <= 0x100000000ULL ) +#else /* __WORDSIZE == 32 */ +#error "Need to know WORDSIZE to tell how to cast to unsigned long int" +#endif /* __WORDSIZE == 32 */ +#endif /* __WORDSIZE == 64 */ +#endif + +/* explicit casting of void* to long */ +#ifndef CAST_P2LONG +#define CAST_P2LONG(var) ((intptr_t)(var)) +#endif + +/****d* Component Library: Pointer Manipulation/offsetof +* NAME +* offsetof +* +* DESCRIPTION +* The offsetof macro returns the offset of a member within a structure. +* +* SYNOPSIS +* uintptr_t +* offsetof( +* IN TYPE, +* IN MEMBER ); +* +* PARAMETERS +* TYPE +* [in] Name of the structure containing the specified member. +* +* MEMBER +* [in] Name of the member whose offset in the specified structure +* is to be returned. +* +* RETURN VALUE +* Number of bytes from the beginning of the structure to the +* specified member. +* +* SEE ALSO +* PARENT_STRUCT +*********/ +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((uintptr_t) &((TYPE *)0)->MEMBER) +#endif + +/****d* Component Library: Pointer Manipulation/PARENT_STRUCT +* NAME +* PARENT_STRUCT +* +* DESCRIPTION +* The PARENT_STRUCT macro returns a pointer to a structure +* given a name and pointer to one of its members. +* +* SYNOPSIS +* PARENT_TYPE* +* PARENT_STRUCT( +* IN void* const p_member, +* IN PARENT_TYPE, +* IN MEMBER_NAME ); +* +* PARAMETERS +* p_member +* [in] Pointer to the MEMBER_NAME member of a PARENT_TYPE structure. +* +* PARENT_TYPE +* [in] Name of the structure containing the specified member. +* +* MEMBER_NAME +* [in] Name of the member whose address is passed in the p_member +* parameter. +* +* RETURN VALUE +* Pointer to a structure of type PARENT_TYPE whose MEMBER_NAME member is +* located at p_member. +* +* SEE ALSO +* offsetof +*********/ +#define PARENT_STRUCT(p_member, PARENT_TYPE, MEMBER_NAME) \ + ((PARENT_TYPE*)((uint8_t*)(p_member) - offsetof(PARENT_TYPE, MEMBER_NAME))) + +/****d* Component Library/Parameter Keywords +* NAME +* Parameter Keywords +* +* DESCRIPTION +* The Parameter Keywords can be used to clarify the usage of function +* parameters to users. +* +* VALUES +* IN +* Designates that the parameter is used as input to a function. +* +* OUT +* Designates that the parameter's value will be set by the function. +* +* OPTIONAL +* Designates that the parameter is optional, and may be NULL. +* The OPTIONAL keyword, if used, follows the parameter name. +* +* EXAMPLE +* // Function declaration. +* void* +* my_func( +* IN void* const p_param1, +* OUT void** const p_handle OPTIONAL ); +* +* NOTES +* Multiple keywords can apply to a single parameter. The IN and OUT +* keywords precede the parameter type. The OPTIONAL +* keyword, if used, follows the parameter name. +*********/ +#ifndef IN +#define IN /* Function input parameter */ +#endif +#ifndef OUT +#define OUT /* Function output parameter */ +#endif +#ifndef OPTIONAL +#define OPTIONAL /* Optional function parameter - NULL if not used */ +#endif + +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function Returns And Completion Codes %% +%% %% +%% The text for any addition to this enumerated type must be added to the %% +%% string array defined in . %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + +/****d* Component Library/Data Types +* NAME +* Data Types +* +* DESCRIPTION +* The component library provides and uses explicitly sized types. +* +* VALUES +* char +* 8-bit, defined by compiler. +* +* void +* 0-bit, defined by compiler. +* +* int8_t +* 8-bit signed integer. +* +* uint8_t +* 8-bit unsigned integer. +* +* int16_t +* 16-bit signed integer. +* +* uint16_t +* 16-bit unsigned integer. +* +* net16_t +* 16-bit network byte order value. +* +* int32_t +* 32-bit signed integer. +* +* uint32_t +* 32-bit unsigned integer. +* +* net32_t +* 32-bit network byte order value. +* +* int64_t +* 64-bit signed integer. +* +* uint64_t +* 64-bit unsigned integer. +* +* net64_t +* 64-bit network byte order value. +* +* boolean_t +* integral sized. Set to TRUE or FALSE and used in logical expressions. +* +* NOTES +* Pointer types are not defined as these provide no value and can potentially +* lead to naming confusion. +*********/ + +/****d* Component Library: Data Types/cl_status_t +* NAME +* cl_status_t +* +* DESCRIPTION +* The cl_status_t return types are used by the component library to +* provide detailed function return values. +* +* SYNOPSIS +*/ +#define CL_SUCCESS 0 +#define CL_ERROR 1 +#define CL_INVALID_STATE 2 +#define CL_INVALID_OPERATION 3 +#define CL_INVALID_SETTING 4 +#define CL_INVALID_PARAMETER 5 +#define CL_INSUFFICIENT_RESOURCES 6 +#define CL_INSUFFICIENT_MEMORY 7 +#define CL_INVALID_PERMISSION 8 +#define CL_COMPLETED 9 +#define CL_NOT_DONE 10 +#define CL_PENDING 11 +#define CL_TIMEOUT 12 +#define CL_CANCELED 13 +#define CL_REJECT 14 +#define CL_OVERRUN 15 +#define CL_NOT_FOUND 16 +#define CL_UNAVAILABLE 17 +#define CL_BUSY 18 +#define CL_DISCONNECT 19 +#define CL_DUPLICATE 20 +#define CL_STATUS_COUNT 21 /* should be the last value */ + +typedef int cl_status_t; +/* +* SEE ALSO +* Data Types, CL_STATUS_MSG +*********/ + +/* Status values above converted to text for easier printing. */ +extern const char *cl_status_text[]; + +/****d* Component Library: Data Types/CL_STATUS_MSG +* NAME +* CL_STATUS_MSG +* +* DESCRIPTION +* The CL_STATUS_MSG macro returns a textual representation of +* an cl_status_t code. +* +* SYNOPSIS +* const char* +* CL_STATUS_MSG( +* IN cl_status_t errcode ); +* +* PARAMETERS +* errcode +* [in] cl_status_t code for which to return a text representation. +* +* RETURN VALUE +* Pointer to a string containing a textual representation of the errcode +* parameter. +* +* NOTES +* This function performs boundary checking on the cl_status_t value, +* masking off the upper 24-bits. If the value is out of bounds, the string +* "invalid status code" is returned. +* +* SEE ALSO +* cl_status_t +*********/ +#define CL_STATUS_MSG( errcode ) \ + ((errcode < CL_STATUS_COUNT)?cl_status_text[errcode]:"invalid status code") + +#if !defined( FALSE ) +#define FALSE 0 +#endif /* !defined( FALSE ) */ + +#if !defined( TRUE ) +#define TRUE (!FALSE) +#endif /* !defined( TRUE ) */ + +/****d* Component Library: Unreferenced Parameters/UNUSED_PARAM +* NAME +* UNUSED_PARAM +* +* DESCRIPTION +* The UNUSED_PARAM macro can be used to eliminates compiler warnings related +* to intentionally unused formal parameters in function implementations. +* +* SYNOPSIS +* UNUSED_PARAM( P ) +* +* EXAMPLE +* void my_func( int32_t value ) +* { +* UNUSED_PARAM( value ); +* } +*********/ + +/****d* Component Library/Object States +* NAME +* Object States +* +* DESCRIPTION +* The object states enumerated type defines the valid states of components. +* +* SYNOPSIS +*/ +typedef enum _cl_state { + CL_UNINITIALIZED = 1, + CL_INITIALIZED, + CL_DESTROYING, + CL_DESTROYED +} cl_state_t; +/* +* VALUES +* CL_UNINITIALIZED +* Indicates that initialization was not invoked successfully. +* +* CL_INITIALIZED +* Indicates initialization was successful. +* +* CL_DESTROYING +* Indicates that the object is undergoing destruction. +* +* CL_DESTROYED +* Indicates that the object's destructor has already been called. Most +* objects set their final state to CL_DESTROYED before freeing the +* memory associated with the object. +*********/ + +/****d* Component Library: Object States/cl_is_state_valid +* NAME +* cl_is_state_valid +* +* DESCRIPTION +* The cl_is_state_valid function returns whether a state has a valid value. +* +* SYNOPSIS +*/ +static inline boolean_t cl_is_state_valid(IN const cl_state_t state) +{ + return ((state == CL_UNINITIALIZED) || (state == CL_INITIALIZED) || + (state == CL_DESTROYING) || (state == CL_DESTROYED)); +} + +/* +* PARAMETERS +* state +* State whose value to validate. +* +* RETURN VALUES +* TRUE if the specified state has a valid value. +* +* FALSE otherwise. +* +* NOTES +* This function is used in debug builds to check for valid states. If an +* uninitialized object is passed, the memory for the state may cause the +* state to have an invalid value. +* +* SEE ALSO +* Object States +*********/ + +END_C_DECLS +#endif /* _DATA_TYPES_H_ */ diff --git a/include/complib/cl_types_osd.h b/include/complib/cl_types_osd.h new file mode 100644 index 0000000..2538913 --- /dev/null +++ b/include/complib/cl_types_osd.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Defines sized datatypes for Linux User mode + * exported sizes are int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t + * int64_t, uint64_t. + */ + +#ifndef _CL_TYPES_OSD_H_ +#define _CL_TYPES_OSD_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#if defined (_DEBUG_) +#ifdef __IA64__ +#define cl_break() asm(" break 0") +#else /* __IA64__ */ +#define cl_break() asm(" int $3") +#endif /* __IA64__ */ +#else /* _DEBUG_ */ +#define cl_break +#endif +#include +#include +#include + +/* + * Branch prediction hints + */ +#if defined(HAVE_BUILTIN_EXPECT) +#define PT(exp) __builtin_expect( ((uintptr_t)(exp)), 1 ) +#define PF(exp) __builtin_expect( ((uintptr_t)(exp)), 0 ) +#else +#define PT(exp) (exp) +#define PF(exp) (exp) +#endif + +#if defined (_DEBUG_) +#define CL_ASSERT assert +#else /* _DEBUG_ */ +#define CL_ASSERT( __exp__ ) +#endif /* _DEBUG_ */ +/* + * Types not explicitly defined are native to the platform. + */ +typedef int boolean_t; +typedef volatile int32_t atomic32_t; + +#ifndef NULL +#define NULL (void*)0 +#endif + +#define UNUSED_PARAM( P ) + +END_C_DECLS +#endif /* _CL_TYPES_OSD_H_ */ diff --git a/include/complib/cl_vector.h b/include/complib/cl_vector.h new file mode 100644 index 0000000..f66172b --- /dev/null +++ b/include/complib/cl_vector.h @@ -0,0 +1,945 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * This file contains vector definitions. Vector provides dynmically + * resizable array functionality. Objects in a Vector are not relocated + * when the array is resized. + */ + +#ifndef _CL_VECTOR_H_ +#define _CL_VECTOR_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* Component Library/Vector +* NAME +* Vector +* +* DESCRIPTION +* The Vector is a self-sizing array. Like a traditonal array, a vector +* allows efficient constant time access to elements with a specified index. +* A vector grows transparently as the user adds elements to the array. +* +* As the vector grows in size, it does not relocate existing elements in +* memory. This allows using pointers to elements stored in a Vector. +* +* Users can supply an initializer functions that allow a vector to ensure +* that new items added to the vector are properly initialized. A vector +* calls the initializer function on a per object basis when growing the +* array. The initializer is optional. +* +* The initializer function can fail, and returns a cl_status_t. The vector +* will call the destructor function, if provided, for an element that +* failed initialization. If an initializer fails, a vector does not call +* the initializer for objects in the remainder of the new memory allocation. +* +* The cl_vector_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SEE ALSO +* Structures: +* cl_vector_t +* +* Callbacks: +* cl_pfn_vec_init_t, cl_pfn_vec_dtor_t, cl_pfn_vec_apply_t, +* cl_pfn_vec_find_t +* +* Item Manipulation: +* cl_vector_set_obj, cl_vector_obj +* +* Initialization: +* cl_vector_construct, cl_vector_init, cl_vector_destroy +* +* Manipulation: +* cl_vector_get_capacity, cl_vector_set_capacity, +* cl_vector_get_size, cl_vector_set_size, cl_vector_set_min_size +* cl_vector_get_ptr, cl_vector_get, cl_vector_at, cl_vector_set +* +* Search: +* cl_vector_find_from_start, cl_vector_find_from_end +* cl_vector_apply_func +*********/ +/****d* Component Library: Vector/cl_pfn_vec_init_t +* NAME +* cl_pfn_vec_init_t +* +* DESCRIPTION +* The cl_pfn_vec_init_t function type defines the prototype for functions +* used as initializer for elements being allocated by a vector. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_vec_init_t) (IN void *const p_element, IN void *context); +/* +* PARAMETERS +* p_element +* [in] Pointer to an element being added to a vector. +* +* context +* [in] Context provided in a call to cl_vector_init. +* +* RETURN VALUES +* Return CL_SUCCESS to indicate that the element was initialized successfully. +* +* Other cl_status_t values will be returned by the cl_vector_init, +* cl_vector_set_size, and cl_vector_set_min_size functions. +* +* In situations where the vector's size needs to grows in order to satisfy +* a call to cl_vector_set, a non-successful status returned by the +* initializer callback causes the growth to stop. +* +* NOTES +* This function type is provided as function prototype reference for +* the initializer function provided by users as an optional parameter to +* the cl_vector_init function. +* +* SEE ALSO +* Vector, cl_vector_init +*********/ + +/****d* Component Library: Vector/cl_pfn_vec_dtor_t +* NAME +* cl_pfn_vec_dtor_t +* +* DESCRIPTION +* The cl_pfn_vec_dtor_t function type defines the prototype for functions +* used as destructor for elements being deallocated from a vector. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_vec_dtor_t) (IN void *const p_element, IN void *context); +/* +* PARAMETERS +* p_element +* [in] Pointer to an element being deallocated from a vector. +* +* context +* [in] Context provided in a call to cl_vector_init. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the destructor function provided by users as an optional parameter to +* the cl_vector_init function. +* +* SEE ALSO +* Vector, cl_vector_init +*********/ + +/****d* Component Library: Vector/cl_pfn_vec_apply_t +* NAME +* cl_pfn_vec_apply_t +* +* DESCRIPTION +* The cl_pfn_vec_apply_t function type defines the prototype for functions +* used to iterate elements in a vector. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_vec_apply_t) (IN const size_t index, + IN void *const p_element, IN void *context); +/* +* PARAMETERS +* index +* [in] Index of the element. +* +* p_element +* [in] Pointer to an element at the specified index in the vector. +* +* context +* [in] Context provided in a call to cl_vector_apply_func. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* This function type is provided as function prototype reference for +* the function passed by users as a parameter to the cl_vector_apply_func +* function. +* +* SEE ALSO +* Vector, cl_vector_apply_func +*********/ + +/****d* Component Library: Vector/cl_pfn_vec_find_t +* NAME +* cl_pfn_vec_find_t +* +* DESCRIPTION +* The cl_pfn_vec_find_t function type defines the prototype for functions +* used to find elements in a vector. +* +* SYNOPSIS +*/ +typedef cl_status_t + (*cl_pfn_vec_find_t) (IN const size_t index, + IN const void *const p_element, IN void *context); +/* +* PARAMETERS +* index +* [in] Index of the element. +* +* p_element +* [in] Pointer to an element at the specified index in the vector. +* +* context +* [in] Context provided in a call to cl_vector_find_from_start or +* cl_vector_find_from_end. +* +* RETURN VALUES +* Return CL_SUCCESS if the element was found. This stops vector iteration. +* +* CL_NOT_FOUND to continue the vector iteration. +* +* NOTES +* This function type is provided as function prototype reference for the +* function provided by users as a parameter to the cl_vector_find_from_start +* and cl_vector_find_from_end functions. +* +* SEE ALSO +* Vector, cl_vector_find_from_start, cl_vector_find_from_end +*********/ + +/****i* Component Library: Vector/cl_pfn_vec_copy_t +* NAME +* cl_pfn_vec_copy_t +* +* DESCRIPTION +* The cl_pfn_vec_copy_t function type defines the prototype for functions +* used to copy elements in a vector. +* +* SYNOPSIS +*/ +typedef void + (*cl_pfn_vec_copy_t) (IN void *const p_dest, + IN const void *const p_src, IN const size_t size); +/* +* PARAMETERS +* p_dest +* [in] Pointer to the destination buffer into which to copy p_src. +* +* p_src +* [in] Pointer to the destination buffer from which to copy. +* +* size +* [in] Number of bytes to copy. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* Vector +*********/ + +/****s* Component Library: Vector/cl_vector_t +* NAME +* cl_vector_t +* +* DESCRIPTION +* Vector structure. +* +* The cl_vector_t structure should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct _cl_vector { + size_t size; + size_t grow_size; + size_t capacity; + size_t element_size; + cl_pfn_vec_init_t pfn_init; + cl_pfn_vec_dtor_t pfn_dtor; + cl_pfn_vec_copy_t pfn_copy; + const void *context; + cl_qlist_t alloc_list; + void **p_ptr_array; + cl_state_t state; +} cl_vector_t; +/* +* FIELDS +* size +* Number of elements successfully initialized in the vector. +* +* grow_size +* Number of elements to allocate when growing. +* +* capacity +* total # of elements allocated. +* +* element_size +* Size of each element. +* +* pfn_init +* User supplied element initializer. +* +* pfn_dtor +* User supplied element destructor. +* +* pfn_copy +* Copy operator. +* +* context +* User context for callbacks. +* +* alloc_list +* List of allocations. +* +* p_ptr_array +* Internal array of pointers to elements. +* +* state +* State of the vector. +* +* SEE ALSO +* Vector +*********/ + +/****f* Component Library: Vector/cl_vector_construct +* NAME +* cl_vector_construct +* +* DESCRIPTION +* The cl_vector_construct function constructs a vector. +* +* SYNOPSIS +*/ +void cl_vector_construct(IN cl_vector_t * const p_vector); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling cl_vector_destroy without first calling cl_vector_init. +* +* Calling cl_vector_construct is a prerequisite to calling any other +* vector function except cl_vector_init. +* +* SEE ALSO +* Vector, cl_vector_init, cl_vector_destroy +*********/ + +/****f* Component Library: Vector/cl_vector_init +* NAME +* cl_vector_init +* +* DESCRIPTION +* The cl_vector_init function initializes a vector for use. +* +* SYNOPSIS +*/ +cl_status_t +cl_vector_init(IN cl_vector_t * const p_vector, + IN const size_t min_size, + IN const size_t grow_size, + IN const size_t element_size, + IN cl_pfn_vec_init_t pfn_init OPTIONAL, + IN cl_pfn_vec_dtor_t pfn_dtor OPTIONAL, + IN const void *const context); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure to inititalize. +* +* min_size +* [in] Initial number of elements. +* +* grow_size +* [in] Number of elements to allocate when incrementally growing +* the vector. A value of zero disables automatic growth. +* +* element_size +* [in] Size of each element. +* +* pfn_init +* [in] Initializer callback to invoke for every new element. +* See the cl_pfn_vec_init_t function type declaration for details about +* the callback function. +* +* pfn_dtor +* [in] Destructor callback to invoke for elements being deallocated. +* See the cl_pfn_vec_dtor_t function type declaration for details about +* the callback function. +* +* context +* [in] Value to pass to the callback functions to provide context. +* +* RETURN VALUES +* CL_SUCCESS if the vector was initialized successfully. +* +* CL_INSUFFICIENT_MEMORY if the initialization failed. +* +* cl_status_t value returned by optional initializer function specified by +* the pfn_init parameter. +* +* NOTES +* The constructor and initializer functions, if any, are invoked for every +* new element in the array. +* +* SEE ALSO +* Vector, cl_vector_construct, cl_vector_destroy, cl_vector_set, +* cl_vector_get, cl_vector_get_ptr, cl_vector_at +*********/ + +/****f* Component Library: Vector/cl_vector_destroy +* NAME +* cl_vector_destroy +* +* DESCRIPTION +* The cl_vector_destroy function destroys a vector. +* +* SYNOPSIS +*/ +void cl_vector_destroy(IN cl_vector_t * const p_vector); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_vector_destroy frees all memory allocated for the vector. The vector +* is left initialized to a zero capacity and size. +* +* This function should only be called after a call to cl_vector_construct +* or cl_vector_init. +* +* SEE ALSO +* Vector, cl_vector_construct, cl_vector_init +*********/ + +/****f* Component Library: Vector/cl_vector_get_capacity +* NAME +* cl_vector_get_capacity +* +* DESCRIPTION +* The cl_vector_get_capacity function returns the capacity of a vector. +* +* SYNOPSIS +*/ +static inline size_t +cl_vector_get_capacity(IN const cl_vector_t * const p_vector) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + return (p_vector->capacity); +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure whose capacity to return. +* +* RETURN VALUE +* Capacity, in elements, of the vector. +* +* NOTES +* The capacity is the number of elements that the vector can store, and +* can be greater than the number of elements stored. To get the number of +* elements stored in the vector, use cl_vector_get_size. +* +* SEE ALSO +* Vector, cl_vector_set_capacity, cl_vector_get_size +*********/ + +/****f* Component Library: Vector/cl_vector_get_size +* NAME +* cl_vector_get_size +* +* DESCRIPTION +* The cl_vector_get_size function returns the size of a vector. +* +* SYNOPSIS +*/ +static inline size_t cl_vector_get_size(IN const cl_vector_t * const p_vector) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + return (p_vector->size); +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure whose size to return. +* +* RETURN VALUE +* Size, in elements, of the vector. +* +* SEE ALSO +* Vector, cl_vector_set_size, cl_vector_get_capacity +*********/ + +/****f* Component Library: Vector/cl_vector_get_ptr +* NAME +* cl_vector_get_ptr +* +* DESCRIPTION +* The cl_vector_get_ptr function returns a pointer to an element +* stored in a vector at a specified index. +* +* SYNOPSIS +*/ +static inline void *cl_vector_get_ptr(IN const cl_vector_t * const p_vector, + IN const size_t index) +{ + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + + return (p_vector->p_ptr_array[index]); +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure from which to get a +* pointer to an element. +* +* index +* [in] Index of the element. +* +* RETURN VALUE +* Pointer to the element stored at specified index. +* +* NOTES +* cl_vector_get_ptr provides constant access times regardless of the index. +* +* cl_vector_get_ptr does not perform boundary checking. Callers are +* responsible for providing an index that is within the range of the vector. +* +* SEE ALSO +* Vector, cl_vector_get, cl_vector_at, cl_vector_set, cl_vector_get_size +*********/ + +/****f* Component Library: Vector/cl_vector_get +* NAME +* cl_vector_get +* +* DESCRIPTION +* The cl_vector_get function copies an element stored in a vector at a +* specified index. +* +* SYNOPSIS +*/ +static inline void +cl_vector_get(IN const cl_vector_t * const p_vector, + IN const size_t index, OUT void *const p_element) +{ + void *p_src; + + CL_ASSERT(p_vector); + CL_ASSERT(p_vector->state == CL_INITIALIZED); + CL_ASSERT(p_element); + + /* Get a pointer to the element. */ + p_src = cl_vector_get_ptr(p_vector, index); + p_vector->pfn_copy(p_src, p_element, p_vector->element_size); +} + +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure from which to get a copy of +* an element. +* +* index +* [in] Index of the element. +* +* p_element +* [out] Pointer to storage for the element. Contains a copy of the +* desired element upon successful completion of the call. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_vector_get provides constant time access regardless of the index. +* +* cl_vector_get does not perform boundary checking on the vector, and +* callers are responsible for providing an index that is within the range +* of the vector. To access elements after performing boundary checks, +* use cl_vector_at. +* +* The p_element parameter contains a copy of the desired element upon +* return from this function. +* +* SEE ALSO +* Vector, cl_vector_get_ptr, cl_vector_at +*********/ + +/****f* Component Library: Vector/cl_vector_at +* NAME +* cl_vector_at +* +* DESCRIPTION +* The cl_vector_at function copies an element stored in a vector at a +* specified index, performing boundary checks. +* +* SYNOPSIS +*/ +cl_status_t +cl_vector_at(IN const cl_vector_t * const p_vector, + IN const size_t index, OUT void *const p_element); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure from which to get a copy of +* an element. +* +* index +* [in] Index of the element. +* +* p_element +* [out] Pointer to storage for the element. Contains a copy of the +* desired element upon successful completion of the call. +* +* RETURN VALUES +* CL_SUCCESS if an element was found at the specified index. +* +* CL_INVALID_SETTING if the index was out of range. +* +* NOTES +* cl_vector_at provides constant time access regardless of the index, and +* performs boundary checking on the vector. +* +* Upon success, the p_element parameter contains a copy of the desired element. +* +* SEE ALSO +* Vector, cl_vector_get, cl_vector_get_ptr +*********/ + +/****f* Component Library: Vector/cl_vector_set +* NAME +* cl_vector_set +* +* DESCRIPTION +* The cl_vector_set function sets the element at the specified index. +* +* SYNOPSIS +*/ +cl_status_t +cl_vector_set(IN cl_vector_t * const p_vector, + IN const size_t index, IN void *const p_element); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure into which to store +* an element. +* +* index +* [in] Index of the element. +* +* p_element +* [in] Pointer to an element to store in the vector. +* +* RETURN VALUES +* CL_SUCCESS if the element was successfully set. +* +* CL_INSUFFICIENT_MEMORY if the vector could not be resized to accommodate +* the new element. +* +* NOTES +* cl_vector_set grows the vector as needed to accommodate the new element, +* unless the grow_size parameter passed into the cl_vector_init function +* was zero. +* +* SEE ALSO +* Vector, cl_vector_get +*********/ + +/****f* Component Library: Vector/cl_vector_set_capacity +* NAME +* cl_vector_set_capacity +* +* DESCRIPTION +* The cl_vector_set_capacity function reserves memory in a vector for a +* specified number of elements. +* +* SYNOPSIS +*/ +cl_status_t +cl_vector_set_capacity(IN cl_vector_t * const p_vector, + IN const size_t new_capacity); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure whose capacity to set. +* +* new_capacity +* [in] Total number of elements for which the vector should +* allocate memory. +* +* RETURN VALUES +* CL_SUCCESS if the capacity was successfully set. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to satisfy the +* operation. The vector is left unchanged. +* +* NOTES +* cl_vector_set_capacity increases the capacity of the vector. It does +* not change the size of the vector. If the requested capacity is less +* than the current capacity, the vector is left unchanged. +* +* SEE ALSO +* Vector, cl_vector_get_capacity, cl_vector_set_size, +* cl_vector_set_min_size +*********/ + +/****f* Component Library: Vector/cl_vector_set_size +* NAME +* cl_vector_set_size +* +* DESCRIPTION +* The cl_vector_set_size function resizes a vector, either increasing or +* decreasing its size. +* +* SYNOPSIS +*/ +cl_status_t +cl_vector_set_size(IN cl_vector_t * const p_vector, IN const size_t size); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure whose size to set. +* +* size +* [in] Number of elements desired in the vector. +* +* RETURN VALUES +* CL_SUCCESS if the size of the vector was set successfully. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to complete the +* operation. The vector is left unchanged. +* +* NOTES +* cl_vector_set_size sets the vector to the specified size. If size is +* smaller than the current size of the vector, the size is reduced. +* The destructor function, if any, will be invoked for all elements that +* are above size. Likewise, the constructor and initializer, if any, will +* be invoked for all new elements. +* +* This function can only fail if size is larger than the current capacity. +* +* SEE ALSO +* Vector, cl_vector_get_size, cl_vector_set_min_size, +* cl_vector_set_capacity +*********/ + +/****f* Component Library: Vector/cl_vector_set_min_size +* NAME +* cl_vector_set_min_size +* +* DESCRIPTION +* The cl_vector_set_min_size function resizes a vector to a specified size +* if the vector is smaller than the specified size. +* +* SYNOPSIS +*/ +cl_status_t +cl_vector_set_min_size(IN cl_vector_t * const p_vector, + IN const size_t min_size); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure whose minimum size to set. +* +* min_size +* [in] Minimum number of elements that the vector should contain. +* +* RETURN VALUES +* CL_SUCCESS if the vector size is greater than or equal to min_size. This +* could indicate that the vector's capacity was increased to min_size or +* that the vector was already of sufficient size. +* +* CL_INSUFFICIENT_MEMORY if there was not enough memory to resize the vector. +* The vector is left unchanged. +* +* NOTES +* If min_size is smaller than the current size of the vector, the vector is +* unchanged. The vector is unchanged if the size could not be changed due +* to insufficient memory being available to perform the operation. +* +* SEE ALSO +* Vector, cl_vector_get_size, cl_vector_set_size, cl_vector_set_capacity +*********/ + +/****f* Component Library: Vector/cl_vector_apply_func +* NAME +* cl_vector_apply_func +* +* DESCRIPTION +* The cl_vector_apply_func function invokes a specified function for every +* element in a vector. +* +* SYNOPSIS +*/ +void +cl_vector_apply_func(IN const cl_vector_t * const p_vector, + IN cl_pfn_vec_apply_t pfn_callback, + IN const void *const context); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure whose elements to iterate. +* +* pfn_callback +* [in] Function invoked for every element in the array. +* See the cl_pfn_vec_apply_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* cl_vector_apply_func invokes the specified function for every element +* in the vector, starting from the beginning of the vector. +* +* SEE ALSO +* Vector, cl_vector_find_from_start, cl_vector_find_from_end, +* cl_pfn_vec_apply_t +*********/ + +/****f* Component Library: Vector/cl_vector_find_from_start +* NAME +* cl_vector_find_from_start +* +* DESCRIPTION +* The cl_vector_find_from_start function uses a specified function to +* search for elements in a vector starting from the lowest index. +* +* SYNOPSIS +*/ +size_t +cl_vector_find_from_start(IN const cl_vector_t * const p_vector, + IN cl_pfn_vec_find_t pfn_callback, + IN const void *const context); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure to inititalize. +* +* pfn_callback +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_vec_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* RETURN VALUES +* Index of the element, if found. +* +* Size of the vector if the element was not found. +* +* NOTES +* cl_vector_find_from_start does not remove the found element from +* the vector. The index of the element is returned when the function +* provided by the pfn_callback parameter returns CL_SUCCESS. +* +* SEE ALSO +* Vector, cl_vector_find_from_end, cl_vector_apply_func, cl_pfn_vec_find_t +*********/ + +/****f* Component Library: Vector/cl_vector_find_from_end +* NAME +* cl_vector_find_from_end +* +* DESCRIPTION +* The cl_vector_find_from_end function uses a specified function to search +* for elements in a vector starting from the highest index. +* +* SYNOPSIS +*/ +size_t +cl_vector_find_from_end(IN const cl_vector_t * const p_vector, + IN cl_pfn_vec_find_t pfn_callback, + IN const void *const context); +/* +* PARAMETERS +* p_vector +* [in] Pointer to a cl_vector_t structure to inititalize. +* +* pfn_callback +* [in] Function invoked to determine if a match was found. +* See the cl_pfn_vec_find_t function type declaration for details +* about the callback function. +* +* context +* [in] Value to pass to the callback function. +* +* RETURN VALUES +* Index of the element, if found. +* +* Size of the vector if the element was not found. +* +* NOTES +* cl_vector_find_from_end does not remove the found element from +* the vector. The index of the element is returned when the function +* provided by the pfn_callback parameter returns CL_SUCCESS. +* +* SEE ALSO +* Vector, cl_vector_find_from_start, cl_vector_apply_func, +* cl_pfn_vec_find_t +*********/ + +END_C_DECLS +#endif /* _CL_VECTOR_H_ */ diff --git a/include/iba/ib_cm_types.h b/include/iba/ib_cm_types.h new file mode 100644 index 0000000..061f47b --- /dev/null +++ b/include/iba/ib_cm_types.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if !defined(__IB_CM_TYPES_H__) +#define __IB_CM_TYPES_H__ + +#ifndef __WIN__ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + * Defines known Communication management class versions + */ +#define IB_MCLASS_CM_VER_2 2 +#define IB_MCLASS_CM_VER_1 1 +/* + * Defines the size of user available data in communication management MADs + */ +#define IB_REQ_PDATA_SIZE_VER2 92 +#define IB_MRA_PDATA_SIZE_VER2 222 +#define IB_REJ_PDATA_SIZE_VER2 148 +#define IB_REP_PDATA_SIZE_VER2 196 +#define IB_RTU_PDATA_SIZE_VER2 224 +#define IB_LAP_PDATA_SIZE_VER2 168 +#define IB_APR_PDATA_SIZE_VER2 148 +#define IB_DREQ_PDATA_SIZE_VER2 220 +#define IB_DREP_PDATA_SIZE_VER2 224 +#define IB_SIDR_REQ_PDATA_SIZE_VER2 216 +#define IB_SIDR_REP_PDATA_SIZE_VER2 136 +#define IB_REQ_PDATA_SIZE_VER1 92 +#define IB_MRA_PDATA_SIZE_VER1 222 +#define IB_REJ_PDATA_SIZE_VER1 148 +#define IB_REP_PDATA_SIZE_VER1 204 +#define IB_RTU_PDATA_SIZE_VER1 224 +#define IB_LAP_PDATA_SIZE_VER1 168 +#define IB_APR_PDATA_SIZE_VER1 151 +#define IB_DREQ_PDATA_SIZE_VER1 220 +#define IB_DREP_PDATA_SIZE_VER1 224 +#define IB_SIDR_REQ_PDATA_SIZE_VER1 216 +#define IB_SIDR_REP_PDATA_SIZE_VER1 140 +#define IB_ARI_SIZE 72 // redefine +#define IB_APR_INFO_SIZE 72 +/****d* Access Layer/ib_rej_status_t +* NAME +* ib_rej_status_t +* +* DESCRIPTION +* Rejection reasons. +* +* SYNOPSIS +*/ +typedef ib_net16_t ib_rej_status_t; +/* +* SEE ALSO +* ib_cm_rej, ib_cm_rej_rec_t +* +* SOURCE +*/ +#define IB_REJ_INSUF_QP CL_HTON16(1) +#define IB_REJ_INSUF_EEC CL_HTON16(2) +#define IB_REJ_INSUF_RESOURCES CL_HTON16(3) +#define IB_REJ_TIMEOUT CL_HTON16(4) +#define IB_REJ_UNSUPPORTED CL_HTON16(5) +#define IB_REJ_INVALID_COMM_ID CL_HTON16(6) +#define IB_REJ_INVALID_COMM_INSTANCE CL_HTON16(7) +#define IB_REJ_INVALID_SID CL_HTON16(8) +#define IB_REJ_INVALID_XPORT CL_HTON16(9) +#define IB_REJ_STALE_CONN CL_HTON16(10) +#define IB_REJ_RDC_NOT_EXIST CL_HTON16(11) +#define IB_REJ_INVALID_GID CL_HTON16(12) +#define IB_REJ_INVALID_LID CL_HTON16(13) +#define IB_REJ_INVALID_SL CL_HTON16(14) +#define IB_REJ_INVALID_TRAFFIC_CLASS CL_HTON16(15) +#define IB_REJ_INVALID_HOP_LIMIT CL_HTON16(16) +#define IB_REJ_INVALID_PKT_RATE CL_HTON16(17) +#define IB_REJ_INVALID_ALT_GID CL_HTON16(18) +#define IB_REJ_INVALID_ALT_LID CL_HTON16(19) +#define IB_REJ_INVALID_ALT_SL CL_HTON16(20) +#define IB_REJ_INVALID_ALT_TRAFFIC_CLASS CL_HTON16(21) +#define IB_REJ_INVALID_ALT_HOP_LIMIT CL_HTON16(22) +#define IB_REJ_INVALID_ALT_PKT_RATE CL_HTON16(23) +#define IB_REJ_PORT_REDIRECT CL_HTON16(24) +#define IB_REJ_INVALID_MTU CL_HTON16(26) +#define IB_REJ_INSUFFICIENT_RESP_RES CL_HTON16(27) +#define IB_REJ_USER_DEFINED CL_HTON16(28) +#define IB_REJ_INVALID_RNR_RETRY CL_HTON16(29) +#define IB_REJ_DUPLICATE_LOCAL_COMM_ID CL_HTON16(30) +#define IB_REJ_INVALID_CLASS_VER CL_HTON16(31) +#define IB_REJ_INVALID_FLOW_LBL CL_HTON16(32) +#define IB_REJ_INVALID_ALT_FLOW_LBL CL_HTON16(33) + +#define IB_REJ_SERVICE_HANDOFF CL_HTON16(65535) +/******/ + +/****d* Access Layer/ib_apr_status_t +* NAME +* ib_apr_status_t +* +* DESCRIPTION +* Automatic path migration status information. +* +* SYNOPSIS +*/ +typedef uint8_t ib_apr_status_t; +/* +* SEE ALSO +* ib_cm_apr, ib_cm_apr_rec_t +* +* SOURCE + */ +#define IB_AP_SUCCESS 0 +#define IB_AP_INVALID_COMM_ID 1 +#define IB_AP_UNSUPPORTED 2 +#define IB_AP_REJECT 3 +#define IB_AP_REDIRECT 4 +#define IB_AP_IS_CURRENT 5 +#define IB_AP_INVALID_QPN_EECN 6 +#define IB_AP_INVALID_LID 7 +#define IB_AP_INVALID_GID 8 +#define IB_AP_INVALID_FLOW_LBL 9 +#define IB_AP_INVALID_TCLASS 10 +#define IB_AP_INVALID_HOP_LIMIT 11 +#define IB_AP_INVALID_PKT_RATE 12 +#define IB_AP_INVALID_SL 13 +/******/ + +/****d* Access Layer/ib_cm_cap_mask_t +* NAME +* ib_cm_cap_mask_t +* +* DESCRIPTION +* Capability mask values in ClassPortInfo. +* +* SYNOPSIS +*/ +#define IB_CM_RELIABLE_CONN_CAPABLE CL_HTON16(9) +#define IB_CM_RELIABLE_DGRM_CAPABLE CL_HTON16(10) +#define IB_CM_RDGRM_CAPABLE CL_HTON16(11) +#define IB_CM_UNRELIABLE_CONN_CAPABLE CL_HTON16(12) +#define IB_CM_SIDR_CAPABLE CL_HTON16(13) +/* +* SEE ALSO +* ib_cm_rep, ib_class_port_info_t +* +* SOURCE +* +*******/ + +/* + * Service ID resolution status + */ +typedef uint16_t ib_sidr_status_t; +#define IB_SIDR_SUCCESS 0 +#define IB_SIDR_UNSUPPORTED 1 +#define IB_SIDR_REJECT 2 +#define IB_SIDR_NO_QP 3 +#define IB_SIDR_REDIRECT 4 +#define IB_SIDR_UNSUPPORTED_VER 5 + +END_C_DECLS +#endif /* ndef __WIN__ */ +#endif /* __IB_CM_TYPES_H__ */ diff --git a/include/iba/ib_types.h b/include/iba/ib_types.h new file mode 100644 index 0000000..025f971 --- /dev/null +++ b/include/iba/ib_types.h @@ -0,0 +1,12101 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if !defined(__IB_TYPES_H__) +#define __IB_TYPES_H__ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#if defined( __WIN__ ) +#if defined( EXPORT_AL_SYMBOLS ) +#define OSM_EXPORT __declspec(dllexport) +#else +#define OSM_EXPORT __declspec(dllimport) +#endif +#define OSM_API __stdcall +#define OSM_CDECL __cdecl +#else +#define OSM_EXPORT extern +#define OSM_API +#define OSM_CDECL +#define __ptr64 +#endif +/****h* IBA Base/Constants +* NAME +* Constants +* +* DESCRIPTION +* The following constants are used throughout the IBA code base. +* +* Definitions are from the InfiniBand Architecture Specification v1.3.1 +* +*********/ +/****d* IBA Base: Constants/MAD_BLOCK_SIZE +* NAME +* MAD_BLOCK_SIZE +* +* DESCRIPTION +* Size of a non-RMPP MAD datagram. +* +* SOURCE +*/ +#define MAD_BLOCK_SIZE 256 +/**********/ +/****d* IBA Base: Constants/MAD_RMPP_HDR_SIZE +* NAME +* MAD_RMPP_HDR_SIZE +* +* DESCRIPTION +* Size of an RMPP header, including the common MAD header. +* +* SOURCE +*/ +#define MAD_RMPP_HDR_SIZE 36 +/**********/ +/****d* IBA Base: Constants/MAD_RMPP_DATA_SIZE +* NAME +* MAD_RMPP_DATA_SIZE +* +* DESCRIPTION +* Size of an RMPP transaction data section. +* +* SOURCE +*/ +#define MAD_RMPP_DATA_SIZE (MAD_BLOCK_SIZE - MAD_RMPP_HDR_SIZE) +/**********/ +/****d* IBA Base: Constants/MAD_BLOCK_GRH_SIZE +* NAME +* MAD_BLOCK_GRH_SIZE +* +* DESCRIPTION +* Size of a MAD datagram, including the GRH. +* +* SOURCE +*/ +#define MAD_BLOCK_GRH_SIZE 296 +/**********/ +/****d* IBA Base: Constants/IB_LID_PERMISSIVE +* NAME +* IB_LID_PERMISSIVE +* +* DESCRIPTION +* Permissive LID +* +* SOURCE +*/ +#define IB_LID_PERMISSIVE 0xFFFF +/**********/ +/****d* IBA Base: Constants/IB_DEFAULT_PKEY +* NAME +* IB_DEFAULT_PKEY +* +* DESCRIPTION +* P_Key value for the default partition. +* +* SOURCE +*/ +#define IB_DEFAULT_PKEY 0xFFFF +/**********/ +/****d* IBA Base: Constants/IB_QP1_WELL_KNOWN_Q_KEY +* NAME +* IB_QP1_WELL_KNOWN_Q_KEY +* +* DESCRIPTION +* Well-known Q_Key for QP1 privileged mode access (15.4.2). +* +* SOURCE +*/ +#define IB_QP1_WELL_KNOWN_Q_KEY CL_HTON32(0x80010000) +/*********/ +#define IB_QP0 0 +#define IB_QP1 CL_HTON32(1) +#define IB_QP_PRIVILEGED_Q_KEY CL_HTON32(0x80000000) +/****d* IBA Base: Constants/IB_LID_UCAST_START +* NAME +* IB_LID_UCAST_START +* +* DESCRIPTION +* Lowest valid unicast LID value. +* +* SOURCE +*/ +#define IB_LID_UCAST_START_HO 0x0001 +#define IB_LID_UCAST_START (CL_HTON16(IB_LID_UCAST_START_HO)) +/**********/ +/****d* IBA Base: Constants/IB_LID_UCAST_END +* NAME +* IB_LID_UCAST_END +* +* DESCRIPTION +* Highest valid unicast LID value. +* +* SOURCE +*/ +#define IB_LID_UCAST_END_HO 0xBFFF +#define IB_LID_UCAST_END (CL_HTON16(IB_LID_UCAST_END_HO)) +/**********/ +/****d* IBA Base: Constants/IB_LID_MCAST_START +* NAME +* IB_LID_MCAST_START +* +* DESCRIPTION +* Lowest valid multicast LID value. +* +* SOURCE +*/ +#define IB_LID_MCAST_START_HO 0xC000 +#define IB_LID_MCAST_START (CL_HTON16(IB_LID_MCAST_START_HO)) +/**********/ +/****d* IBA Base: Constants/IB_LID_MCAST_END +* NAME +* IB_LID_MCAST_END +* +* DESCRIPTION +* Highest valid multicast LID value. +* +* SOURCE +*/ +#define IB_LID_MCAST_END_HO 0xFFFE +#define IB_LID_MCAST_END (CL_HTON16(IB_LID_MCAST_END_HO)) +/**********/ +/****d* IBA Base: Constants/IB_DEFAULT_SUBNET_PREFIX +* NAME +* IB_DEFAULT_SUBNET_PREFIX +* +* DESCRIPTION +* Default subnet GID prefix. +* +* SOURCE +*/ +#define IB_DEFAULT_SUBNET_PREFIX (CL_HTON64(0xFE80000000000000ULL)) +#define IB_DEFAULT_SUBNET_PREFIX_HO (0xFE80000000000000ULL) +/**********/ +/****d* IBA Base: Constants/IB_NODE_NUM_PORTS_MAX +* NAME +* IB_NODE_NUM_PORTS_MAX +* +* DESCRIPTION +* Maximum number of ports in a single node (14.2.5.7). +* SOURCE +*/ +#define IB_NODE_NUM_PORTS_MAX 0xFE +/**********/ +/****d* IBA Base: Constants/IB_INVALID_PORT_NUM +* NAME +* IB_INVALID_PORT_NUM +* +* DESCRIPTION +* Value used to indicate an invalid port number (14.2.5.10). +* +* SOURCE +*/ +#define IB_INVALID_PORT_NUM 0xFF +/*********/ +/****d* IBA Base: Constants/IB_SUBNET_PATH_HOPS_MAX +* NAME +* IB_SUBNET_PATH_HOPS_MAX +* +* DESCRIPTION +* Maximum number of directed route switch hops in a subnet (14.2.1.2). +* +* SOURCE +*/ +#define IB_SUBNET_PATH_HOPS_MAX 64 +/*********/ +/****d* IBA Base: Constants/IB_HOPLIMIT_MAX +* NAME +* IB_HOPLIMIT_MAX +* +* DESCRIPTION +* Maximum number of router hops allowed. +* +* SOURCE +*/ +#define IB_HOPLIMIT_MAX 255 +/*********/ +/****d* IBA Base: Constants/IB_MC_SCOPE_* +* NAME +* IB_MC_SCOPE_* +* +* DESCRIPTION +* Scope component definitions from IBA 1.2.1 (Table 3 p. 148) +*/ +#define IB_MC_SCOPE_LINK_LOCAL 0x2 +#define IB_MC_SCOPE_SITE_LOCAL 0x5 +#define IB_MC_SCOPE_ORG_LOCAL 0x8 +#define IB_MC_SCOPE_GLOBAL 0xE +/*********/ +/****d* IBA Base: Constants/IB_PKEY_MAX_BLOCKS +* NAME +* IB_PKEY_MAX_BLOCKS +* +* DESCRIPTION +* Maximum number of PKEY blocks (14.2.5.7). +* +* SOURCE +*/ +#define IB_PKEY_MAX_BLOCKS 2048 +/*********/ +/****d* IBA Base: Constants/IB_MCAST_MAX_BLOCK_ID +* NAME +* IB_MCAST_MAX_BLOCK_ID +* +* DESCRIPTION +* Maximum number of Multicast port mask blocks +* +* SOURCE +*/ +#define IB_MCAST_MAX_BLOCK_ID 511 +/*********/ +/****d* IBA Base: Constants/IB_MCAST_BLOCK_ID_MASK_HO +* NAME +* IB_MCAST_BLOCK_ID_MASK_HO +* +* DESCRIPTION +* Mask (host order) to recover the Multicast block ID. +* +* SOURCE +*/ +#define IB_MCAST_BLOCK_ID_MASK_HO 0x000001FF +/*********/ +/****d* IBA Base: Constants/IB_MCAST_BLOCK_SIZE +* NAME +* IB_MCAST_BLOCK_SIZE +* +* DESCRIPTION +* Number of port mask entries in a multicast forwarding table block. +* +* SOURCE +*/ +#define IB_MCAST_BLOCK_SIZE 32 +/*********/ +/****d* IBA Base: Constants/IB_MCAST_MASK_SIZE +* NAME +* IB_MCAST_MASK_SIZE +* +* DESCRIPTION +* Number of port mask bits in each entry in the multicast forwarding table. +* +* SOURCE +*/ +#define IB_MCAST_MASK_SIZE 16 +/*********/ +/****d* IBA Base: Constants/IB_MCAST_POSITION_MASK_HO +* NAME +* IB_MCAST_POSITION_MASK_HO +* +* DESCRIPTION +* Mask (host order) to recover the multicast block position. +* +* SOURCE +*/ +#define IB_MCAST_POSITION_MASK_HO 0xF0000000 +/*********/ +/****d* IBA Base: Constants/IB_MCAST_POSITION_MAX +* NAME +* IB_MCAST_POSITION_MAX +* +* DESCRIPTION +* Maximum value for the multicast block position. +* +* SOURCE +*/ +#define IB_MCAST_POSITION_MAX 0xF +/*********/ +/****d* IBA Base: Constants/IB_MCAST_POSITION_SHIFT +* NAME +* IB_MCAST_POSITION_SHIFT +* +* DESCRIPTION +* Shift value to normalize the multicast block position value. +* +* SOURCE +*/ +#define IB_MCAST_POSITION_SHIFT 28 +/*********/ +/****d* IBA Base: Constants/IB_PKEY_ENTRIES_MAX +* NAME +* IB_PKEY_ENTRIES_MAX +* +* DESCRIPTION +* Maximum number of PKEY entries per port (14.2.5.7). +* +* SOURCE +*/ +#define IB_PKEY_ENTRIES_MAX (IB_PKEY_MAX_BLOCKS * IB_NUM_PKEY_ELEMENTS_IN_BLOCK) +/*********/ +/****d* IBA Base: Constants/IB_PKEY_BASE_MASK +* NAME +* IB_PKEY_BASE_MASK +* +* DESCRIPTION +* Masks for the base P_Key value given a P_Key Entry. +* +* SOURCE +*/ +#define IB_PKEY_BASE_MASK (CL_HTON16(0x7FFF)) +/*********/ +/****d* IBA Base: Constants/IB_PKEY_TYPE_MASK +* NAME +* IB_PKEY_TYPE_MASK +* +* DESCRIPTION +* Masks for the P_Key membership type given a P_Key Entry. +* +* SOURCE +*/ +#define IB_PKEY_TYPE_MASK (CL_HTON16(0x8000)) +/*********/ +/****d* IBA Base: Constants/IB_DEFAULT_PARTIAL_PKEY +* NAME +* IB_DEFAULT_PARTIAL_PKEY +* +* DESCRIPTION +* 0x7FFF in network order +* +* SOURCE +*/ +#define IB_DEFAULT_PARTIAL_PKEY (CL_HTON16(0x7FFF)) +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_SUBN_LID +* NAME +* IB_MCLASS_SUBN_LID +* +* DESCRIPTION +* Subnet Management Class, Subnet Manager LID routed (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_SUBN_LID 0x01 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_SUBN_DIR +* NAME +* IB_MCLASS_SUBN_DIR +* +* DESCRIPTION +* Subnet Management Class, Subnet Manager directed route (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_SUBN_DIR 0x81 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_SUBN_ADM +* NAME +* IB_MCLASS_SUBN_ADM +* +* DESCRIPTION +* Management Class, Subnet Administration (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_SUBN_ADM 0x03 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_PERF +* NAME +* IB_MCLASS_PERF +* +* DESCRIPTION +* Management Class, Performance Management (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_PERF 0x04 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_BM +* NAME +* IB_MCLASS_BM +* +* DESCRIPTION +* Management Class, Baseboard Management (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_BM 0x05 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_DEV_MGMT +* NAME +* IB_MCLASS_DEV_MGMT +* +* DESCRIPTION +* Management Class, Device Management (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_DEV_MGMT 0x06 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_COMM_MGMT +* NAME +* IB_MCLASS_COMM_MGMT +* +* DESCRIPTION +* Management Class, Communication Management (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_COMM_MGMT 0x07 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_SNMP +* NAME +* IB_MCLASS_SNMP +* +* DESCRIPTION +* Management Class, SNMP Tunneling (13.4.4) +* +* SOURCE +*/ +#define IB_MCLASS_SNMP 0x08 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_VENDOR_LOW_RANGE_MIN +* NAME +* IB_MCLASS_VENDOR_LOW_RANGE_MIN +* +* DESCRIPTION +* Management Class, Vendor Specific Low Range Start +* +* SOURCE +*/ +#define IB_MCLASS_VENDOR_LOW_RANGE_MIN 0x09 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_VENDOR_LOW_RANGE_MAX +* NAME +* IB_MCLASS_VENDOR_LOW_RANGE_MAX +* +* DESCRIPTION +* Management Class, Vendor Specific Low Range End +* +* SOURCE +*/ +#define IB_MCLASS_VENDOR_LOW_RANGE_MAX 0x0F +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_DEV_ADM +* NAME +* IB_MCLASS_DEV_ADM +* +* DESCRIPTION +* Management Class, Device Administration +* +* SOURCE +*/ +#define IB_MCLASS_DEV_ADM 0x10 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_BIS +* NAME +* IB_MCLASS_BIS +* +* DESCRIPTION +* Management Class, BIS +* +* SOURCE +*/ +#define IB_MCLASS_BIS 0x12 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_CC +* NAME +* IB_MCLASS_CC +* +* DESCRIPTION +* Management Class, Congestion Control (A10.4.1) +* +* SOURCE +*/ +#define IB_MCLASS_CC 0x21 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_VENDOR_HIGH_RANGE_MIN +* NAME +* IB_MCLASS_VENDOR_HIGH_RANGE_MIN +* +* DESCRIPTION +* Management Class, Vendor Specific High Range Start +* +* SOURCE +*/ +#define IB_MCLASS_VENDOR_HIGH_RANGE_MIN 0x30 +/**********/ +/****d* IBA Base: Constants/IB_MCLASS_VENDOR_HIGH_RANGE_MAX +* NAME +* IB_MCLASS_VENDOR_HIGH_RANGE_MAX +* +* DESCRIPTION +* Management Class, Vendor Specific High Range End +* +* SOURCE +*/ +#define IB_MCLASS_VENDOR_HIGH_RANGE_MAX 0x4F +/**********/ +/****f* IBA Base: Types/ib_class_is_vendor_specific_low +* NAME +* ib_class_is_vendor_specific_low +* +* DESCRIPTION +* Indicates if the Class Code if a vendor specific class from +* the low range +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_class_is_vendor_specific_low(IN const uint8_t class_code) +{ + return ((class_code >= IB_MCLASS_VENDOR_LOW_RANGE_MIN) && + (class_code <= IB_MCLASS_VENDOR_LOW_RANGE_MAX)); +} + +/* +* PARAMETERS +* class_code +* [in] The Management Datagram Class Code +* +* RETURN VALUE +* TRUE if the class is in the Low range of Vendor Specific MADs +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* IB_MCLASS_VENDOR_LOW_RANGE_MIN, IB_MCLASS_VENDOR_LOW_RANGE_MAX +*********/ + +/****f* IBA Base: Types/ib_class_is_vendor_specific_high +* NAME +* ib_class_is_vendor_specific_high +* +* DESCRIPTION +* Indicates if the Class Code if a vendor specific class from +* the high range +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_class_is_vendor_specific_high(IN const uint8_t class_code) +{ + return ((class_code >= IB_MCLASS_VENDOR_HIGH_RANGE_MIN) && + (class_code <= IB_MCLASS_VENDOR_HIGH_RANGE_MAX)); +} + +/* +* PARAMETERS +* class_code +* [in] The Management Datagram Class Code +* +* RETURN VALUE +* TRUE if the class is in the High range of Vendor Specific MADs +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* IB_MCLASS_VENDOR_HIGH_RANGE_MIN, IB_MCLASS_VENDOR_HIGH_RANGE_MAX +*********/ + +/****f* IBA Base: Types/ib_class_is_vendor_specific +* NAME +* ib_class_is_vendor_specific +* +* DESCRIPTION +* Indicates if the Class Code if a vendor specific class +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_class_is_vendor_specific(IN const uint8_t class_code) +{ + return (ib_class_is_vendor_specific_low(class_code) || + ib_class_is_vendor_specific_high(class_code)); +} + +/* +* PARAMETERS +* class_code +* [in] The Management Datagram Class Code +* +* RETURN VALUE +* TRUE if the class is a Vendor Specific MAD +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* ib_class_is_vendor_specific_low, ib_class_is_vendor_specific_high +*********/ + +/****f* IBA Base: Types/ib_class_is_rmpp +* NAME +* ib_class_is_rmpp +* +* DESCRIPTION +* Indicates if the Class Code supports RMPP +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API ib_class_is_rmpp(IN const uint8_t class_code) +{ + return ((class_code == IB_MCLASS_SUBN_ADM) || + (class_code == IB_MCLASS_DEV_MGMT) || + (class_code == IB_MCLASS_DEV_ADM) || + (class_code == IB_MCLASS_BIS) || + ib_class_is_vendor_specific_high(class_code)); +} + +/* +* PARAMETERS +* class_code +* [in] The Management Datagram Class Code +* +* RETURN VALUE +* TRUE if the class supports RMPP +* FALSE otherwise. +* +* NOTES +* +*********/ + +/* + * MAD methods + */ + +/****d* IBA Base: Constants/IB_MAX_METHOD +* NAME +* IB_MAX_METHOD +* +* DESCRIPTION +* Total number of methods available to a class, not including the R-bit. +* +* SOURCE +*/ +#define IB_MAX_METHODS 128 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_RESP_MASK +* NAME +* IB_MAD_METHOD_RESP_MASK +* +* DESCRIPTION +* Response mask to extract 'R' bit from the method field. (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_RESP_MASK 0x80 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_GET +* NAME +* IB_MAD_METHOD_GET +* +* DESCRIPTION +* Get() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_GET 0x01 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_SET +* NAME +* IB_MAD_METHOD_SET +* +* DESCRIPTION +* Set() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_SET 0x02 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_GET_RESP +* NAME +* IB_MAD_METHOD_GET_RESP +* +* DESCRIPTION +* GetResp() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_GET_RESP 0x81 +/**********/ + +#define IB_MAD_METHOD_DELETE 0x15 + +/****d* IBA Base: Constants/IB_MAD_METHOD_GETTABLE +* NAME +* IB_MAD_METHOD_GETTABLE +* +* DESCRIPTION +* SubnAdmGetTable() Method (15.2.2) +* +* SOURCE +*/ +#define IB_MAD_METHOD_GETTABLE 0x12 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_GETTABLE_RESP +* NAME +* IB_MAD_METHOD_GETTABLE_RESP +* +* DESCRIPTION +* SubnAdmGetTableResp() Method (15.2.2) +* +* SOURCE +*/ +#define IB_MAD_METHOD_GETTABLE_RESP 0x92 + +/**********/ + +#define IB_MAD_METHOD_GETTRACETABLE 0x13 +#define IB_MAD_METHOD_GETMULTI 0x14 +#define IB_MAD_METHOD_GETMULTI_RESP 0x94 + +/****d* IBA Base: Constants/IB_MAD_METHOD_SEND +* NAME +* IB_MAD_METHOD_SEND +* +* DESCRIPTION +* Send() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_SEND 0x03 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_TRAP +* NAME +* IB_MAD_METHOD_TRAP +* +* DESCRIPTION +* Trap() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_TRAP 0x05 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_REPORT +* NAME +* IB_MAD_METHOD_REPORT +* +* DESCRIPTION +* Report() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_REPORT 0x06 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_REPORT_RESP +* NAME +* IB_MAD_METHOD_REPORT_RESP +* +* DESCRIPTION +* ReportResp() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_REPORT_RESP 0x86 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_METHOD_TRAP_REPRESS +* NAME +* IB_MAD_METHOD_TRAP_REPRESS +* +* DESCRIPTION +* TrapRepress() Method (13.4.5) +* +* SOURCE +*/ +#define IB_MAD_METHOD_TRAP_REPRESS 0x07 +/**********/ + +/****d* IBA Base: Constants/IB_MAD_STATUS_BUSY +* NAME +* IB_MAD_STATUS_BUSY +* +* DESCRIPTION +* Temporarily busy, MAD discarded (13.4.7) +* +* SOURCE +*/ +#define IB_MAD_STATUS_BUSY (CL_HTON16(0x0001)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_STATUS_REDIRECT +* NAME +* IB_MAD_STATUS_REDIRECT +* +* DESCRIPTION +* QP Redirection required (13.4.7) +* +* SOURCE +*/ +#define IB_MAD_STATUS_REDIRECT (CL_HTON16(0x0002)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_STATUS_UNSUP_CLASS_VER +* NAME +* IB_MAD_STATUS_UNSUP_CLASS_VER +* +* DESCRIPTION +* Unsupported class version (13.4.7) +* +* SOURCE +*/ +#define IB_MAD_STATUS_UNSUP_CLASS_VER (CL_HTON16(0x0004)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_STATUS_UNSUP_METHOD +* NAME +* IB_MAD_STATUS_UNSUP_METHOD +* +* DESCRIPTION +* Unsupported method (13.4.7) +* +* SOURCE +*/ +#define IB_MAD_STATUS_UNSUP_METHOD (CL_HTON16(0x0008)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_STATUS_UNSUP_METHOD_ATTR +* NAME +* IB_MAD_STATUS_UNSUP_METHOD_ATTR +* +* DESCRIPTION +* Unsupported method/attribute combination (13.4.7) +* +* SOURCE +*/ +#define IB_MAD_STATUS_UNSUP_METHOD_ATTR (CL_HTON16(0x000C)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_STATUS_INVALID_FIELD +* NAME +* IB_MAD_STATUS_INVALID_FIELD +* +* DESCRIPTION +* Attribute contains one or more invalid fields (13.4.7) +* +* SOURCE +*/ +#define IB_MAD_STATUS_INVALID_FIELD (CL_HTON16(0x001C)) +/**********/ + +#define IB_MAD_STATUS_CLASS_MASK (CL_HTON16(0xFF00)) + +#define IB_SA_MAD_STATUS_SUCCESS (CL_HTON16(0x0000)) +#define IB_SA_MAD_STATUS_NO_RESOURCES (CL_HTON16(0x0100)) +#define IB_SA_MAD_STATUS_REQ_INVALID (CL_HTON16(0x0200)) +#define IB_SA_MAD_STATUS_NO_RECORDS (CL_HTON16(0x0300)) +#define IB_SA_MAD_STATUS_TOO_MANY_RECORDS (CL_HTON16(0x0400)) +#define IB_SA_MAD_STATUS_INVALID_GID (CL_HTON16(0x0500)) +#define IB_SA_MAD_STATUS_INSUF_COMPS (CL_HTON16(0x0600)) +#define IB_SA_MAD_STATUS_DENIED (CL_HTON16(0x0700)) +#define IB_SA_MAD_STATUS_PRIO_SUGGESTED (CL_HTON16(0x0800)) + +#define IB_DM_MAD_STATUS_NO_IOC_RESP (CL_HTON16(0x0100)) +#define IB_DM_MAD_STATUS_NO_SVC_ENTRIES (CL_HTON16(0x0200)) +#define IB_DM_MAD_STATUS_IOC_FAILURE (CL_HTON16(0x8000)) + +/****d* IBA Base: Constants/IB_MAD_ATTR_CLASS_PORT_INFO +* NAME +* IB_MAD_ATTR_CLASS_PORT_INFO +* +* DESCRIPTION +* ClassPortInfo attribute (13.4.8) +* +* SOURCE +*/ +#define IB_MAD_ATTR_CLASS_PORT_INFO (CL_HTON16(0x0001)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_NOTICE +* NAME +* IB_MAD_ATTR_NOTICE +* +* DESCRIPTION +* Notice attribute (13.4.8) +* +* SOURCE +*/ +#define IB_MAD_ATTR_NOTICE (CL_HTON16(0x0002)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_INFORM_INFO +* NAME +* IB_MAD_ATTR_INFORM_INFO +* +* DESCRIPTION +* InformInfo attribute (13.4.8) +* +* SOURCE +*/ +#define IB_MAD_ATTR_INFORM_INFO (CL_HTON16(0x0003)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_NODE_DESC +* NAME +* IB_MAD_ATTR_NODE_DESC +* +* DESCRIPTION +* NodeDescription attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_NODE_DESC (CL_HTON16(0x0010)) + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORT_SMPL_CTRL +* NAME +* IB_MAD_ATTR_PORT_SMPL_CTRL +* +* DESCRIPTION +* PortSamplesControl attribute (16.1.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORT_SMPL_CTRL (CL_HTON16(0x0010)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_NODE_INFO +* NAME +* IB_MAD_ATTR_NODE_INFO +* +* DESCRIPTION +* NodeInfo attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_NODE_INFO (CL_HTON16(0x0011)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORT_SMPL_RSLT +* NAME +* IB_MAD_ATTR_PORT_SMPL_RSLT +* +* DESCRIPTION +* PortSamplesResult attribute (16.1.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORT_SMPL_RSLT (CL_HTON16(0x0011)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SWITCH_INFO +* NAME +* IB_MAD_ATTR_SWITCH_INFO +* +* DESCRIPTION +* SwitchInfo attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SWITCH_INFO (CL_HTON16(0x0012)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORT_CNTRS +* NAME +* IB_MAD_ATTR_PORT_CNTRS +* +* DESCRIPTION +* PortCounters attribute (16.1.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORT_CNTRS (CL_HTON16(0x0012)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORT_CNTRS_EXT +* NAME +* IB_MAD_ATTR_PORT_CNTRS_EXT +* +* DESCRIPTION +* PortCountersExtended attribute (16.1.4) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORT_CNTRS_EXT (CL_HTON16(0x001D)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORT_XMIT_DATA_SL +* NAME +* IB_MAD_ATTR_PORT_XMIT_DATA_SL +* +* DESCRIPTION +* PortXmitDataSL attribute (A13.6.4) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORT_XMIT_DATA_SL (CL_HTON16(0x0036)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORT_RCV_DATA_SL +* NAME +* IB_MAD_ATTR_PORT_RCV_DATA_SL +* +* DESCRIPTION +* PortRcvDataSL attribute (A13.6.4) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORT_RCV_DATA_SL (CL_HTON16(0x0037)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_GUID_INFO +* NAME +* IB_MAD_ATTR_GUID_INFO +* +* DESCRIPTION +* GUIDInfo attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_GUID_INFO (CL_HTON16(0x0014)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORT_INFO +* NAME +* IB_MAD_ATTR_PORT_INFO +* +* DESCRIPTION +* PortInfo attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORT_INFO (CL_HTON16(0x0015)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_P_KEY_TABLE +* NAME +* IB_MAD_ATTR_P_KEY_TABLE +* +* DESCRIPTION +* PartitionTable attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_P_KEY_TABLE (CL_HTON16(0x0016)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SLVL_TABLE +* NAME +* IB_MAD_ATTR_SLVL_TABLE +* +* DESCRIPTION +* SL VL Mapping Table attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SLVL_TABLE (CL_HTON16(0x0017)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_VL_ARBITRATION +* NAME +* IB_MAD_ATTR_VL_ARBITRATION +* +* DESCRIPTION +* VL Arbitration Table attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_VL_ARBITRATION (CL_HTON16(0x0018)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_LIN_FWD_TBL +* NAME +* IB_MAD_ATTR_LIN_FWD_TBL +* +* DESCRIPTION +* Switch linear forwarding table +* +* SOURCE +*/ +#define IB_MAD_ATTR_LIN_FWD_TBL (CL_HTON16(0x0019)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_RND_FWD_TBL +* NAME +* IB_MAD_ATTR_RND_FWD_TBL +* +* DESCRIPTION +* Switch random forwarding table +* +* SOURCE +*/ +#define IB_MAD_ATTR_RND_FWD_TBL (CL_HTON16(0x001A)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_MCAST_FWD_TBL +* NAME +* IB_MAD_ATTR_MCAST_FWD_TBL +* +* DESCRIPTION +* Switch multicast forwarding table +* +* SOURCE +*/ +#define IB_MAD_ATTR_MCAST_FWD_TBL (CL_HTON16(0x001B)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_NODE_RECORD +* NAME +* IB_MAD_ATTR_NODE_RECORD +* +* DESCRIPTION +* NodeRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_NODE_RECORD (CL_HTON16(0x0011)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PORTINFO_RECORD +* NAME +* IB_MAD_ATTR_PORTINFO_RECORD +* +* DESCRIPTION +* PortInfoRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PORTINFO_RECORD (CL_HTON16(0x0012)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SWITCH_INFO_RECORD +* NAME +* IB_MAD_ATTR_SWITCH_INFO_RECORD +* +* DESCRIPTION +* SwitchInfoRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SWITCH_INFO_RECORD (CL_HTON16(0x0014)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_LINK_RECORD +* NAME +* IB_MAD_ATTR_LINK_RECORD +* +* DESCRIPTION +* LinkRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_LINK_RECORD (CL_HTON16(0x0020)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SM_INFO +* NAME +* IB_MAD_ATTR_SM_INFO +* +* DESCRIPTION +* SMInfo attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SM_INFO (CL_HTON16(0x0020)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SMINFO_RECORD +* NAME +* IB_MAD_ATTR_SMINFO_RECORD +* +* DESCRIPTION +* SMInfoRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SMINFO_RECORD (CL_HTON16(0x0018)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_GUIDINFO_RECORD +* NAME +* IB_MAD_ATTR_GUIDINFO_RECORD +* +* DESCRIPTION +* GuidInfoRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_GUIDINFO_RECORD (CL_HTON16(0x0030)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_VENDOR_DIAG +* NAME +* IB_MAD_ATTR_VENDOR_DIAG +* +* DESCRIPTION +* VendorDiag attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_VENDOR_DIAG (CL_HTON16(0x0030)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_LED_INFO +* NAME +* IB_MAD_ATTR_LED_INFO +* +* DESCRIPTION +* LedInfo attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_LED_INFO (CL_HTON16(0x0031)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO +* NAME +* IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO +* +* DESCRIPTION +* Vendor specific SM attribute (14.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO (CL_HTON16(0xFF90)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SERVICE_RECORD +* NAME +* IB_MAD_ATTR_SERVICE_RECORD +* +* DESCRIPTION +* ServiceRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SERVICE_RECORD (CL_HTON16(0x0031)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_LFT_RECORD +* NAME +* IB_MAD_ATTR_LFT_RECORD +* +* DESCRIPTION +* LinearForwardingTableRecord attribute (15.2.5.6) +* +* SOURCE +*/ +#define IB_MAD_ATTR_LFT_RECORD (CL_HTON16(0x0015)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_MFT_RECORD +* NAME +* IB_MAD_ATTR_MFT_RECORD +* +* DESCRIPTION +* MulticastForwardingTableRecord attribute (15.2.5.8) +* +* SOURCE +*/ +#define IB_MAD_ATTR_MFT_RECORD (CL_HTON16(0x0017)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PKEYTBL_RECORD +* NAME +* IB_MAD_ATTR_PKEYTBL_RECORD +* +* DESCRIPTION +* PKEY Table Record attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PKEY_TBL_RECORD (CL_HTON16(0x0033)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PATH_RECORD +* NAME +* IB_MAD_ATTR_PATH_RECORD +* +* DESCRIPTION +* PathRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PATH_RECORD (CL_HTON16(0x0035)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_VLARB_RECORD +* NAME +* IB_MAD_ATTR_VLARB_RECORD +* +* DESCRIPTION +* VL Arbitration Table Record attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_VLARB_RECORD (CL_HTON16(0x0036)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SLVL_RECORD +* NAME +* IB_MAD_ATTR_SLVL_RECORD +* +* DESCRIPTION +* SLtoVL Mapping Table Record attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SLVL_RECORD (CL_HTON16(0x0013)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_MCMEMBER_RECORD +* NAME +* IB_MAD_ATTR_MCMEMBER_RECORD +* +* DESCRIPTION +* MCMemberRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_MCMEMBER_RECORD (CL_HTON16(0x0038)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_TRACE_RECORD +* NAME +* IB_MAD_ATTR_TRACE_RECORD +* +* DESCRIPTION +* TraceRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_TRACE_RECORD (CL_HTON16(0x0039)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_MULTIPATH_RECORD +* NAME +* IB_MAD_ATTR_MULTIPATH_RECORD +* +* DESCRIPTION +* MultiPathRecord attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_MULTIPATH_RECORD (CL_HTON16(0x003A)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SVC_ASSOCIATION_RECORD +* NAME +* IB_MAD_ATTR_SVC_ASSOCIATION_RECORD +* +* DESCRIPTION +* Service Association Record attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SVC_ASSOCIATION_RECORD (CL_HTON16(0x003B)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_INFORM_INFO_RECORD +* NAME +* IB_MAD_ATTR_INFORM_INFO_RECORD +* +* DESCRIPTION +* InformInfo Record attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_INFORM_INFO_RECORD (CL_HTON16(0x00F3)) + +/****d* IBA Base: Constants/IB_MAD_ATTR_IO_UNIT_INFO +* NAME +* IB_MAD_ATTR_IO_UNIT_INFO +* +* DESCRIPTION +* IOUnitInfo attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_IO_UNIT_INFO (CL_HTON16(0x0010)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_IO_CONTROLLER_PROFILE +* NAME +* IB_MAD_ATTR_IO_CONTROLLER_PROFILE +* +* DESCRIPTION +* IOControllerProfile attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_IO_CONTROLLER_PROFILE (CL_HTON16(0x0011)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SERVICE_ENTRIES +* NAME +* IB_MAD_ATTR_SERVICE_ENTRIES +* +* DESCRIPTION +* ServiceEntries attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SERVICE_ENTRIES (CL_HTON16(0x0012)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_DIAGNOSTIC_TIMEOUT +* NAME +* IB_MAD_ATTR_DIAGNOSTIC_TIMEOUT +* +* DESCRIPTION +* DiagnosticTimeout attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_DIAGNOSTIC_TIMEOUT (CL_HTON16(0x0020)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_PREPARE_TO_TEST +* NAME +* IB_MAD_ATTR_PREPARE_TO_TEST +* +* DESCRIPTION +* PrepareToTest attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_PREPARE_TO_TEST (CL_HTON16(0x0021)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_TEST_DEVICE_ONCE +* NAME +* IB_MAD_ATTR_TEST_DEVICE_ONCE +* +* DESCRIPTION +* TestDeviceOnce attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_TEST_DEVICE_ONCE (CL_HTON16(0x0022)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_TEST_DEVICE_LOOP +* NAME +* IB_MAD_ATTR_TEST_DEVICE_LOOP +* +* DESCRIPTION +* TestDeviceLoop attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_TEST_DEVICE_LOOP (CL_HTON16(0x0023)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_DIAG_CODE +* NAME +* IB_MAD_ATTR_DIAG_CODE +* +* DESCRIPTION +* DiagCode attribute (16.3.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_DIAG_CODE (CL_HTON16(0x0024)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SVC_ASSOCIATION_RECORD +* NAME +* IB_MAD_ATTR_SVC_ASSOCIATION_RECORD +* +* DESCRIPTION +* Service Association Record attribute (15.2.5) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SVC_ASSOCIATION_RECORD (CL_HTON16(0x003B)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_CONG_INFO +* NAME +* IB_MAD_ATTR_CONG_INFO +* +* DESCRIPTION +* CongestionInfo attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_CONG_INFO (CL_HTON16(0x0011)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_CONG_KEY_INFO +* NAME +* IB_MAD_ATTR_CONG_KEY_INFO +* +* DESCRIPTION +* CongestionKeyInfo attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_CONG_KEY_INFO (CL_HTON16(0x0012)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_CONG_LOG +* NAME +* IB_MAD_ATTR_CONG_LOG +* +* DESCRIPTION +* CongestionLog attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_CONG_LOG (CL_HTON16(0x0013)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SW_CONG_SETTING +* NAME +* IB_MAD_ATTR_SW_CONG_SETTING +* +* DESCRIPTION +* SwitchCongestionSetting attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SW_CONG_SETTING (CL_HTON16(0x0014)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_SW_PORT_CONG_SETTING +* NAME +* IB_MAD_ATTR_SW_PORT_CONG_SETTING +* +* DESCRIPTION +* SwitchPortCongestionSetting attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_SW_PORT_CONG_SETTING (CL_HTON16(0x0015)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_CA_CONG_SETTING +* NAME +* IB_MAD_ATTR_CA_CONG_SETTING +* +* DESCRIPTION +* CACongestionSetting attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_CA_CONG_SETTING (CL_HTON16(0x0016)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_CC_TBL +* NAME +* IB_MAD_ATTR_CC_TBL +* +* DESCRIPTION +* CongestionControlTable attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_CC_TBL (CL_HTON16(0x0017)) +/**********/ + +/****d* IBA Base: Constants/IB_MAD_ATTR_TIME_STAMP +* NAME +* IB_MAD_ATTR_TIME_STAMP +* +* DESCRIPTION +* TimeStamp attribute (A10.4.3) +* +* SOURCE +*/ +#define IB_MAD_ATTR_TIME_STAMP (CL_HTON16(0x0018)) +/**********/ + +/****d* IBA Base: Constants/IB_NODE_TYPE_CA +* NAME +* IB_NODE_TYPE_CA +* +* DESCRIPTION +* Encoded generic node type used in MAD attributes (13.4.8.2) +* +* SOURCE +*/ +#define IB_NODE_TYPE_CA 0x01 +/**********/ + +/****d* IBA Base: Constants/IB_NODE_TYPE_SWITCH +* NAME +* IB_NODE_TYPE_SWITCH +* +* DESCRIPTION +* Encoded generic node type used in MAD attributes (13.4.8.2) +* +* SOURCE +*/ +#define IB_NODE_TYPE_SWITCH 0x02 +/**********/ + +/****d* IBA Base: Constants/IB_NODE_TYPE_ROUTER +* NAME +* IB_NODE_TYPE_ROUTER +* +* DESCRIPTION +* Encoded generic node type used in MAD attributes (13.4.8.2) +* +* SOURCE +*/ +#define IB_NODE_TYPE_ROUTER 0x03 +/**********/ + +/****d* IBA Base: Constants/IB_NOTICE_PRODUCER_TYPE_CA +* NAME +* IB_NOTICE_PRODUCER_TYPE_CA +* +* DESCRIPTION +* Encoded generic producer type used in Notice attribute (13.4.8.2) +* +* SOURCE +*/ +#define IB_NOTICE_PRODUCER_TYPE_CA (CL_HTON32(0x000001)) +/**********/ + +/****d* IBA Base: Constants/IB_NOTICE_PRODUCER_TYPE_SWITCH +* NAME +* IB_NOTICE_PRODUCER_TYPE_SWITCH +* +* DESCRIPTION +* Encoded generic producer type used in Notice attribute (13.4.8.2) +* +* SOURCE +*/ +#define IB_NOTICE_PRODUCER_TYPE_SWITCH (CL_HTON32(0x000002)) +/**********/ + +/****d* IBA Base: Constants/IB_NOTICE_PRODUCER_TYPE_ROUTER +* NAME +* IB_NOTICE_PRODUCER_TYPE_ROUTER +* +* DESCRIPTION +* Encoded generic producer type used in Notice attribute (13.4.8.2) +* +* SOURCE +*/ +#define IB_NOTICE_PRODUCER_TYPE_ROUTER (CL_HTON32(0x000003)) +/**********/ + +/****d* IBA Base: Constants/IB_NOTICE_PRODUCER_TYPE_CLASS_MGR +* NAME +* IB_NOTICE_PRODUCER_TYPE_CLASS_MGR +* +* DESCRIPTION +* Encoded generic producer type used in Notice attribute (13.4.8.2) +* +* SOURCE +*/ +#define IB_NOTICE_PRODUCER_TYPE_CLASS_MGR (CL_HTON32(0x000004)) +/**********/ + +/****d* IBA Base: Constants/IB_MTU_LEN_TYPE +* NAME +* IB_MTU_LEN_TYPE +* +* DESCRIPTION +* Encoded path MTU. +* 1: 256 +* 2: 512 +* 3: 1024 +* 4: 2048 +* 5: 4096 +* others: reserved +* +* SOURCE +*/ +#define IB_MTU_LEN_256 1 +#define IB_MTU_LEN_512 2 +#define IB_MTU_LEN_1024 3 +#define IB_MTU_LEN_2048 4 +#define IB_MTU_LEN_4096 5 + +#define IB_MIN_MTU IB_MTU_LEN_256 +#define IB_MAX_MTU IB_MTU_LEN_4096 + +/**********/ + +/****d* IBA Base: Constants/IB_PATH_SELECTOR_TYPE +* NAME +* IB_PATH_SELECTOR_TYPE +* +* DESCRIPTION +* Path selector. +* 0: greater than specified +* 1: less than specified +* 2: exactly the specified +* 3: largest available +* +* SOURCE +*/ +#define IB_PATH_SELECTOR_GREATER_THAN 0 +#define IB_PATH_SELECTOR_LESS_THAN 1 +#define IB_PATH_SELECTOR_EXACTLY 2 +#define IB_PATH_SELECTOR_LARGEST 3 +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_STATE_NOTACTIVE +* NAME +* IB_SMINFO_STATE_NOTACTIVE +* +* DESCRIPTION +* Encoded state value used in the SMInfo attribute. +* +* SOURCE +*/ +#define IB_SMINFO_STATE_NOTACTIVE 0 +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_STATE_DISCOVERING +* NAME +* IB_SMINFO_STATE_DISCOVERING +* +* DESCRIPTION +* Encoded state value used in the SMInfo attribute. +* +* SOURCE +*/ +#define IB_SMINFO_STATE_DISCOVERING 1 +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_STATE_STANDBY +* NAME +* IB_SMINFO_STATE_STANDBY +* +* DESCRIPTION +* Encoded state value used in the SMInfo attribute. +* +* SOURCE +*/ +#define IB_SMINFO_STATE_STANDBY 2 +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_STATE_MASTER +* NAME +* IB_SMINFO_STATE_MASTER +* +* DESCRIPTION +* Encoded state value used in the SMInfo attribute. +* +* SOURCE +*/ +#define IB_SMINFO_STATE_MASTER 3 +/**********/ + +/****d* IBA Base: Constants/IB_PATH_REC_SL_MASK +* NAME +* IB_PATH_REC_SL_MASK +* +* DESCRIPTION +* Mask for the sl field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_SL_MASK 0x000F + +/****d* IBA Base: Constants/IB_MULTIPATH_REC_SL_MASK +* NAME +* IB_MULTIPATH_REC_SL_MASK +* +* DESCRIPTION +* Mask for the sl field for MultiPath record +* +* SOURCE +*/ +#define IB_MULTIPATH_REC_SL_MASK 0x000F + +/****d* IBA Base: Constants/IB_PATH_REC_QOS_CLASS_MASK +* NAME +* IB_PATH_REC_QOS_CLASS_MASK +* +* DESCRIPTION +* Mask for the QoS class field for path record +* +* SOURCE +*/ +#define IB_PATH_REC_QOS_CLASS_MASK 0xFFF0 + +/****d* IBA Base: Constants/IB_MULTIPATH_REC_QOS_CLASS_MASK +* NAME +* IB_MULTIPATH_REC_QOS_CLASS_MASK +* +* DESCRIPTION +* Mask for the QoS class field for MultiPath record +* +* SOURCE +*/ +#define IB_MULTIPATH_REC_QOS_CLASS_MASK 0xFFF0 + +/****d* IBA Base: Constants/IB_PATH_REC_SELECTOR_MASK +* NAME +* IB_PATH_REC_SELECTOR_MASK +* +* DESCRIPTION +* Mask for the selector field for path record MTU, rate, +* and packet lifetime. +* +* SOURCE +*/ +#define IB_PATH_REC_SELECTOR_MASK 0xC0 + +/****d* IBA Base: Constants/IB_MULTIPATH_REC_SELECTOR_MASK +* NAME +* IB_MULTIPATH_REC_SELECTOR_MASK +* +* DESCRIPTION +* Mask for the selector field for multipath record MTU, rate, +* and packet lifetime. +* +* SOURCE +*/ +#define IB_MULTIPATH_REC_SELECTOR_MASK 0xC0 +/**********/ + +/****d* IBA Base: Constants/IB_PATH_REC_BASE_MASK +* NAME +* IB_PATH_REC_BASE_MASK +* +* DESCRIPTION +* Mask for the base value field for path record MTU, rate, +* and packet lifetime. +* +* SOURCE +*/ +#define IB_PATH_REC_BASE_MASK 0x3F +/**********/ + +/****d* IBA Base: Constants/IB_MULTIPATH_REC_BASE_MASK +* NAME +* IB_MULTIPATH_REC_BASE_MASK +* +* DESCRIPTION +* Mask for the base value field for multipath record MTU, rate, +* and packet lifetime. +* +* SOURCE +*/ +#define IB_MULTIPATH_REC_BASE_MASK 0x3F +/**********/ + +/****h* IBA Base/Type Definitions +* NAME +* Type Definitions +* +* DESCRIPTION +* Definitions are from the InfiniBand Architecture Specification v1.2 +* +*********/ + +/****d* IBA Base: Types/ib_net16_t +* NAME +* ib_net16_t +* +* DESCRIPTION +* Defines the network ordered type for 16-bit values. +* +* SOURCE +*/ +typedef uint16_t ib_net16_t; +/**********/ + +/****d* IBA Base: Types/ib_net32_t +* NAME +* ib_net32_t +* +* DESCRIPTION +* Defines the network ordered type for 32-bit values. +* +* SOURCE +*/ +typedef uint32_t ib_net32_t; +/**********/ + +/****d* IBA Base: Types/ib_net64_t +* NAME +* ib_net64_t +* +* DESCRIPTION +* Defines the network ordered type for 64-bit values. +* +* SOURCE +*/ +typedef uint64_t ib_net64_t; +/**********/ + +/****d* IBA Base: Types/ib_gid_prefix_t +* NAME +* ib_gid_prefix_t +* +* DESCRIPTION +* +* SOURCE +*/ +typedef ib_net64_t ib_gid_prefix_t; +/**********/ + +/****d* IBA Base: Constants/ib_link_states_t +* NAME +* ib_link_states_t +* +* DESCRIPTION +* Defines the link states of a port. +* +* SOURCE +*/ +#define IB_LINK_NO_CHANGE 0 +#define IB_LINK_DOWN 1 +#define IB_LINK_INIT 2 +#define IB_LINK_ARMED 3 +#define IB_LINK_ACTIVE 4 +#define IB_LINK_ACT_DEFER 5 +/**********/ + +static const char *const __ib_node_type_str[] = { + "UNKNOWN", + "Channel Adapter", + "Switch", + "Router" +}; + +/****f* IBA Base: Types/ib_get_node_type_str +* NAME +* ib_get_node_type_str +* +* DESCRIPTION +* Returns a string for the specified node type. +* 14.2.5.3 NodeInfo +* +* SYNOPSIS +*/ +static inline const char *OSM_API ib_get_node_type_str(IN uint8_t node_type) +{ + if (node_type > IB_NODE_TYPE_ROUTER) + node_type = 0; + return (__ib_node_type_str[node_type]); +} + +/* +* PARAMETERS +* node_type +* [in] Encoded node type as returned in the NodeInfo attribute. + +* RETURN VALUES +* Pointer to the node type string. +* +* NOTES +* +* SEE ALSO +* ib_node_info_t +*********/ + +static const char *const __ib_producer_type_str[] = { + "UNKNOWN", + "Channel Adapter", + "Switch", + "Router", + "Class Manager" +}; + +/****f* IBA Base: Types/ib_get_producer_type_str +* NAME +* ib_get_producer_type_str +* +* DESCRIPTION +* Returns a string for the specified producer type +* 13.4.8.2 Notice +* 13.4.8.3 InformInfo +* +* SYNOPSIS +*/ +static inline const char *OSM_API +ib_get_producer_type_str(IN ib_net32_t producer_type) +{ + if (cl_ntoh32(producer_type) > + CL_NTOH32(IB_NOTICE_PRODUCER_TYPE_CLASS_MGR)) + producer_type = 0; + return (__ib_producer_type_str[cl_ntoh32(producer_type)]); +} + +/* +* PARAMETERS +* producer_type +* [in] Encoded producer type from the Notice attribute + +* RETURN VALUES +* Pointer to the producer type string. +* +* NOTES +* +* SEE ALSO +* ib_notice_get_prod_type +*********/ + +static const char *const __ib_port_state_str[] = { + "No State Change (NOP)", + "DOWN", + "INIT", + "ARMED", + "ACTIVE", + "ACTDEFER", + "UNKNOWN" +}; + +/****f* IBA Base: Types/ib_get_port_state_str +* NAME +* ib_get_port_state_str +* +* DESCRIPTION +* Returns a string for the specified port state. +* +* SYNOPSIS +*/ +static inline const char *OSM_API ib_get_port_state_str(IN uint8_t port_state) +{ + if (port_state > IB_LINK_ACTIVE) + port_state = IB_LINK_ACTIVE + 1; + return (__ib_port_state_str[port_state]); +} + +/* +* PARAMETERS +* port_state +* [in] Encoded port state as returned in the PortInfo attribute. + +* RETURN VALUES +* Pointer to the port state string. +* +* NOTES +* +* SEE ALSO +* ib_port_info_t +*********/ + +/****f* IBA Base: Types/ib_get_port_state_from_str +* NAME +* ib_get_port_state_from_str +* +* DESCRIPTION +* Returns a string for the specified port state. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_get_port_state_from_str(IN char *p_port_state_str) +{ + if (!strncmp(p_port_state_str, "No State Change (NOP)", 12)) + return (0); + else if (!strncmp(p_port_state_str, "DOWN", 4)) + return (1); + else if (!strncmp(p_port_state_str, "INIT", 4)) + return (2); + else if (!strncmp(p_port_state_str, "ARMED", 5)) + return (3); + else if (!strncmp(p_port_state_str, "ACTIVE", 6)) + return (4); + else if (!strncmp(p_port_state_str, "ACTDEFER", 8)) + return (5); + return (6); +} + +/* +* PARAMETERS +* p_port_state_str +* [in] A string matching one returned by ib_get_port_state_str +* +* RETURN VALUES +* The appropriate code. +* +* NOTES +* +* SEE ALSO +* ib_port_info_t +*********/ + +/****d* IBA Base: Constants/Join States +* NAME +* Join States +* +* DESCRIPTION +* Defines the join state flags for multicast group management. +* +* SOURCE +*/ +#define IB_JOIN_STATE_FULL 1 +#define IB_JOIN_STATE_NON 2 +#define IB_JOIN_STATE_SEND_ONLY 4 +#define IB_JOIN_STATE_SEND_ONLY_FULL 8 +/**********/ + +/****f* IBA Base: Types/ib_pkey_get_base +* NAME +* ib_pkey_get_base +* +* DESCRIPTION +* Returns the base P_Key value with the membership bit stripped. +* +* SYNOPSIS +*/ +static inline ib_net16_t OSM_API ib_pkey_get_base(IN const ib_net16_t pkey) +{ + return ((ib_net16_t) (pkey & IB_PKEY_BASE_MASK)); +} + +/* +* PARAMETERS +* pkey +* [in] P_Key value +* +* RETURN VALUE +* Returns the base P_Key value with the membership bit stripped. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_pkey_is_full_member +* NAME +* ib_pkey_is_full_member +* +* DESCRIPTION +* Indicates if the port is a full member of the partition. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API ib_pkey_is_full_member(IN const ib_net16_t pkey) +{ + return ((pkey & IB_PKEY_TYPE_MASK) == IB_PKEY_TYPE_MASK); +} + +/* +* PARAMETERS +* pkey +* [in] P_Key value +* +* RETURN VALUE +* TRUE if the port is a full member of the partition. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* ib_pkey_get_base, ib_net16_t +*********/ + +/****f* IBA Base: Types/ib_pkey_is_invalid +* NAME +* ib_pkey_is_invalid +* +* DESCRIPTION +* Returns TRUE if the given P_Key is an invalid P_Key +* C10-116: the CI shall regard a P_Key as invalid if its low-order +* 15 bits are all zero... +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API ib_pkey_is_invalid(IN const ib_net16_t pkey) +{ + return ib_pkey_get_base(pkey) == 0x0000 ? TRUE : FALSE; +} + +/* +* PARAMETERS +* pkey +* [in] P_Key value +* +* RETURN VALUE +* Returns the base P_Key value with the membership bit stripped. +* +* NOTES +* +* SEE ALSO +*********/ + +/****d* IBA Base: Types/ib_gid_t +* NAME +* ib_gid_t +* +* DESCRIPTION +* +* SYNOPSIS +*/ +typedef union _ib_gid { + uint8_t raw[16]; + struct _ib_gid_unicast { + ib_gid_prefix_t prefix; + ib_net64_t interface_id; + } unicast; + struct _ib_gid_multicast { + uint8_t header[2]; + uint8_t raw_group_id[14]; + } multicast; + struct _ib_gid_ip_multicast { + uint8_t header[2]; + ib_net16_t signature; + ib_net16_t p_key; + uint8_t group_id[10]; + } ip_multicast; +} ib_gid_t; +/* +* FIELDS +* raw +* GID represented as an unformated byte array. +* +* unicast +* Typical unicast representation with subnet prefix and +* port GUID. +* +* multicast +* Representation for multicast use. +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_gid_is_multicast +* NAME +* ib_gid_is_multicast +* +* DESCRIPTION +* Returns a boolean indicating whether a GID is a multicast GID. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API ib_gid_is_multicast(IN const ib_gid_t * p_gid) +{ + return (p_gid->raw[0] == 0xFF); +} + +/****f* IBA Base: Types/ib_gid_get_scope +* NAME +* ib_gid_get_scope +* +* DESCRIPTION +* Returns scope of (assumed) multicast GID. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API ib_mgid_get_scope(IN const ib_gid_t * p_gid) +{ + return (p_gid->raw[1] & 0x0F); +} + +/****f* IBA Base: Types/ib_gid_set_scope +* NAME +* ib_gid_set_scope +* +* DESCRIPTION +* Sets scope of (assumed) multicast GID. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_mgid_set_scope(IN ib_gid_t * const p_gid, IN const uint8_t scope) +{ + p_gid->raw[1] &= 0xF0; + p_gid->raw[1] |= scope & 0x0F; +} + +/****f* IBA Base: Types/ib_gid_set_default +* NAME +* ib_gid_set_default +* +* DESCRIPTION +* Sets a GID to the default value. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_gid_set_default(IN ib_gid_t * const p_gid, IN const ib_net64_t interface_id) +{ + p_gid->unicast.prefix = IB_DEFAULT_SUBNET_PREFIX; + p_gid->unicast.interface_id = interface_id; +} + +/* +* PARAMETERS +* p_gid +* [in] Pointer to the GID object. +* +* interface_id +* [in] Manufacturer assigned EUI64 value of a port. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* ib_gid_t +*********/ + +/****f* IBA Base: Types/ib_gid_get_subnet_prefix +* NAME +* ib_gid_get_subnet_prefix +* +* DESCRIPTION +* Gets the subnet prefix from a GID. +* +* SYNOPSIS +*/ +static inline ib_net64_t OSM_API +ib_gid_get_subnet_prefix(IN const ib_gid_t * const p_gid) +{ + return (p_gid->unicast.prefix); +} + +/* +* PARAMETERS +* p_gid +* [in] Pointer to the GID object. +* +* RETURN VALUES +* 64-bit subnet prefix value. +* +* NOTES +* +* SEE ALSO +* ib_gid_t +*********/ + +/****f* IBA Base: Types/ib_gid_is_link_local +* NAME +* ib_gid_is_link_local +* +* DESCRIPTION +* Returns TRUE if the unicast GID scoping indicates link local, +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_gid_is_link_local(IN const ib_gid_t * const p_gid) +{ + return ((ib_gid_get_subnet_prefix(p_gid) & + CL_HTON64(0xFFC0000000000000ULL)) == IB_DEFAULT_SUBNET_PREFIX); +} + +/* +* PARAMETERS +* p_gid +* [in] Pointer to the GID object. +* +* RETURN VALUES +* Returns TRUE if the unicast GID scoping indicates link local, +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* ib_gid_t +*********/ + +/****f* IBA Base: Types/ib_gid_is_site_local +* NAME +* ib_gid_is_site_local +* +* DESCRIPTION +* Returns TRUE if the unicast GID scoping indicates site local, +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_gid_is_site_local(IN const ib_gid_t * const p_gid) +{ + return ((ib_gid_get_subnet_prefix(p_gid) & + CL_HTON64(0xFFFFFFFFFFFF0000ULL)) == + CL_HTON64(0xFEC0000000000000ULL)); +} + +/* +* PARAMETERS +* p_gid +* [in] Pointer to the GID object. +* +* RETURN VALUES +* Returns TRUE if the unicast GID scoping indicates site local, +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* ib_gid_t +*********/ + +/****f* IBA Base: Types/ib_gid_get_guid +* NAME +* ib_gid_get_guid +* +* DESCRIPTION +* Gets the guid from a GID. +* +* SYNOPSIS +*/ +static inline ib_net64_t OSM_API +ib_gid_get_guid(IN const ib_gid_t * const p_gid) +{ + return (p_gid->unicast.interface_id); +} + +/* +* PARAMETERS +* p_gid +* [in] Pointer to the GID object. +* +* RETURN VALUES +* 64-bit GUID value. +* +* NOTES +* +* SEE ALSO +* ib_gid_t +*********/ + +/****s* IBA Base: Types/ib_path_rec_t +* NAME +* ib_path_rec_t +* +* DESCRIPTION +* Path records encapsulate the properties of a given +* route between two end-points on a subnet. +* +* SYNOPSIS +*/ +typedef struct _ib_path_rec { + ib_net64_t service_id; + ib_gid_t dgid; + ib_gid_t sgid; + ib_net16_t dlid; + ib_net16_t slid; + ib_net32_t hop_flow_raw; + uint8_t tclass; + uint8_t num_path; + ib_net16_t pkey; + ib_net16_t qos_class_sl; + uint8_t mtu; + uint8_t rate; + uint8_t pkt_life; + uint8_t preference; + uint8_t resv2[6]; +} ib_path_rec_t; +/* +* FIELDS +* service_id +* Service ID for QoS. +* +* dgid +* GID of destination port. +* +* sgid +* GID of source port. +* +* dlid +* LID of destination port. +* +* slid +* LID of source port. +* +* hop_flow_raw +* Global routing parameters: hop count, flow label and raw bit. +* +* tclass +* Another global routing parameter. +* +* num_path +* Reversible path - 1 bit to say if path is reversible. +* num_path [6:0] In queries, maximum number of paths to return. +* In responses, undefined. +* +* pkey +* Partition key (P_Key) to use on this path. +* +* qos_class_sl +* QoS class and service level to use on this path. +* +* mtu +* MTU and MTU selector fields to use on this path +* +* rate +* Rate and rate selector fields to use on this path. +* +* pkt_life +* Packet lifetime +* +* preference +* Indicates the relative merit of this path versus other path +* records returned from the SA. Lower numbers are better. +* +* resv2 +* Reserved bytes. +* SEE ALSO +*********/ + +/* Path Record Component Masks */ +#define IB_PR_COMPMASK_SERVICEID_MSB (CL_HTON64(((uint64_t)1)<<0)) +#define IB_PR_COMPMASK_SERVICEID_LSB (CL_HTON64(((uint64_t)1)<<1)) +#define IB_PR_COMPMASK_DGID (CL_HTON64(((uint64_t)1)<<2)) +#define IB_PR_COMPMASK_SGID (CL_HTON64(((uint64_t)1)<<3)) +#define IB_PR_COMPMASK_DLID (CL_HTON64(((uint64_t)1)<<4)) +#define IB_PR_COMPMASK_SLID (CL_HTON64(((uint64_t)1)<<5)) +#define IB_PR_COMPMASK_RAWTRAFFIC (CL_HTON64(((uint64_t)1)<<6)) +#define IB_PR_COMPMASK_RESV0 (CL_HTON64(((uint64_t)1)<<7)) +#define IB_PR_COMPMASK_FLOWLABEL (CL_HTON64(((uint64_t)1)<<8)) +#define IB_PR_COMPMASK_HOPLIMIT (CL_HTON64(((uint64_t)1)<<9)) +#define IB_PR_COMPMASK_TCLASS (CL_HTON64(((uint64_t)1)<<10)) +#define IB_PR_COMPMASK_REVERSIBLE (CL_HTON64(((uint64_t)1)<<11)) +#define IB_PR_COMPMASK_NUMBPATH (CL_HTON64(((uint64_t)1)<<12)) +#define IB_PR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<13)) +#define IB_PR_COMPMASK_QOS_CLASS (CL_HTON64(((uint64_t)1)<<14)) +#define IB_PR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<15)) +#define IB_PR_COMPMASK_MTUSELEC (CL_HTON64(((uint64_t)1)<<16)) +#define IB_PR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<17)) +#define IB_PR_COMPMASK_RATESELEC (CL_HTON64(((uint64_t)1)<<18)) +#define IB_PR_COMPMASK_RATE (CL_HTON64(((uint64_t)1)<<19)) +#define IB_PR_COMPMASK_PKTLIFETIMESELEC (CL_HTON64(((uint64_t)1)<<20)) +#define IB_PR_COMPMASK_PKTLIFETIME (CL_HTON64(((uint64_t)1)<<21)) + +#define IB_PR_COMPMASK_SERVICEID (IB_PR_COMPMASK_SERVICEID_MSB | \ + IB_PR_COMPMASK_SERVICEID_LSB) + +/* Link Record Component Masks */ +#define IB_LR_COMPMASK_FROM_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_LR_COMPMASK_FROM_PORT (CL_HTON64(((uint64_t)1)<<1)) +#define IB_LR_COMPMASK_TO_PORT (CL_HTON64(((uint64_t)1)<<2)) +#define IB_LR_COMPMASK_TO_LID (CL_HTON64(((uint64_t)1)<<3)) + +/* VL Arbitration Record Masks */ +#define IB_VLA_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_VLA_COMPMASK_OUT_PORT (CL_HTON64(((uint64_t)1)<<1)) +#define IB_VLA_COMPMASK_BLOCK (CL_HTON64(((uint64_t)1)<<2)) + +/* SLtoVL Mapping Record Masks */ +#define IB_SLVL_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_SLVL_COMPMASK_IN_PORT (CL_HTON64(((uint64_t)1)<<1)) +#define IB_SLVL_COMPMASK_OUT_PORT (CL_HTON64(((uint64_t)1)<<2)) + +/* P_Key Table Record Masks */ +#define IB_PKEY_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_PKEY_COMPMASK_BLOCK (CL_HTON64(((uint64_t)1)<<1)) +#define IB_PKEY_COMPMASK_PORT (CL_HTON64(((uint64_t)1)<<2)) + +/* Switch Info Record Masks */ +#define IB_SWIR_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_SWIR_COMPMASK_RESERVED1 (CL_HTON64(((uint64_t)1)<<1)) + +/* LFT Record Masks */ +#define IB_LFTR_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_LFTR_COMPMASK_BLOCK (CL_HTON64(((uint64_t)1)<<1)) + +/* MFT Record Masks */ +#define IB_MFTR_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_MFTR_COMPMASK_POSITION (CL_HTON64(((uint64_t)1)<<1)) +#define IB_MFTR_COMPMASK_RESERVED1 (CL_HTON64(((uint64_t)1)<<2)) +#define IB_MFTR_COMPMASK_BLOCK (CL_HTON64(((uint64_t)1)<<3)) +#define IB_MFTR_COMPMASK_RESERVED2 (CL_HTON64(((uint64_t)1)<<4)) + +/* NodeInfo Record Masks */ +#define IB_NR_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_NR_COMPMASK_RESERVED1 (CL_HTON64(((uint64_t)1)<<1)) +#define IB_NR_COMPMASK_BASEVERSION (CL_HTON64(((uint64_t)1)<<2)) +#define IB_NR_COMPMASK_CLASSVERSION (CL_HTON64(((uint64_t)1)<<3)) +#define IB_NR_COMPMASK_NODETYPE (CL_HTON64(((uint64_t)1)<<4)) +#define IB_NR_COMPMASK_NUMPORTS (CL_HTON64(((uint64_t)1)<<5)) +#define IB_NR_COMPMASK_SYSIMAGEGUID (CL_HTON64(((uint64_t)1)<<6)) +#define IB_NR_COMPMASK_NODEGUID (CL_HTON64(((uint64_t)1)<<7)) +#define IB_NR_COMPMASK_PORTGUID (CL_HTON64(((uint64_t)1)<<8)) +#define IB_NR_COMPMASK_PARTCAP (CL_HTON64(((uint64_t)1)<<9)) +#define IB_NR_COMPMASK_DEVID (CL_HTON64(((uint64_t)1)<<10)) +#define IB_NR_COMPMASK_REV (CL_HTON64(((uint64_t)1)<<11)) +#define IB_NR_COMPMASK_PORTNUM (CL_HTON64(((uint64_t)1)<<12)) +#define IB_NR_COMPMASK_VENDID (CL_HTON64(((uint64_t)1)<<13)) +#define IB_NR_COMPMASK_NODEDESC (CL_HTON64(((uint64_t)1)<<14)) + +/* Service Record Component Masks Sec 15.2.5.14 Ver 1.1*/ +#define IB_SR_COMPMASK_SID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_SR_COMPMASK_SGID (CL_HTON64(((uint64_t)1)<<1)) +#define IB_SR_COMPMASK_SPKEY (CL_HTON64(((uint64_t)1)<<2)) +#define IB_SR_COMPMASK_RES1 (CL_HTON64(((uint64_t)1)<<3)) +#define IB_SR_COMPMASK_SLEASE (CL_HTON64(((uint64_t)1)<<4)) +#define IB_SR_COMPMASK_SKEY (CL_HTON64(((uint64_t)1)<<5)) +#define IB_SR_COMPMASK_SNAME (CL_HTON64(((uint64_t)1)<<6)) +#define IB_SR_COMPMASK_SDATA8_0 (CL_HTON64(((uint64_t)1)<<7)) +#define IB_SR_COMPMASK_SDATA8_1 (CL_HTON64(((uint64_t)1)<<8)) +#define IB_SR_COMPMASK_SDATA8_2 (CL_HTON64(((uint64_t)1)<<9)) +#define IB_SR_COMPMASK_SDATA8_3 (CL_HTON64(((uint64_t)1)<<10)) +#define IB_SR_COMPMASK_SDATA8_4 (CL_HTON64(((uint64_t)1)<<11)) +#define IB_SR_COMPMASK_SDATA8_5 (CL_HTON64(((uint64_t)1)<<12)) +#define IB_SR_COMPMASK_SDATA8_6 (CL_HTON64(((uint64_t)1)<<13)) +#define IB_SR_COMPMASK_SDATA8_7 (CL_HTON64(((uint64_t)1)<<14)) +#define IB_SR_COMPMASK_SDATA8_8 (CL_HTON64(((uint64_t)1)<<15)) +#define IB_SR_COMPMASK_SDATA8_9 (CL_HTON64(((uint64_t)1)<<16)) +#define IB_SR_COMPMASK_SDATA8_10 (CL_HTON64(((uint64_t)1)<<17)) +#define IB_SR_COMPMASK_SDATA8_11 (CL_HTON64(((uint64_t)1)<<18)) +#define IB_SR_COMPMASK_SDATA8_12 (CL_HTON64(((uint64_t)1)<<19)) +#define IB_SR_COMPMASK_SDATA8_13 (CL_HTON64(((uint64_t)1)<<20)) +#define IB_SR_COMPMASK_SDATA8_14 (CL_HTON64(((uint64_t)1)<<21)) +#define IB_SR_COMPMASK_SDATA8_15 (CL_HTON64(((uint64_t)1)<<22)) +#define IB_SR_COMPMASK_SDATA16_0 (CL_HTON64(((uint64_t)1)<<23)) +#define IB_SR_COMPMASK_SDATA16_1 (CL_HTON64(((uint64_t)1)<<24)) +#define IB_SR_COMPMASK_SDATA16_2 (CL_HTON64(((uint64_t)1)<<25)) +#define IB_SR_COMPMASK_SDATA16_3 (CL_HTON64(((uint64_t)1)<<26)) +#define IB_SR_COMPMASK_SDATA16_4 (CL_HTON64(((uint64_t)1)<<27)) +#define IB_SR_COMPMASK_SDATA16_5 (CL_HTON64(((uint64_t)1)<<28)) +#define IB_SR_COMPMASK_SDATA16_6 (CL_HTON64(((uint64_t)1)<<29)) +#define IB_SR_COMPMASK_SDATA16_7 (CL_HTON64(((uint64_t)1)<<30)) +#define IB_SR_COMPMASK_SDATA32_0 (CL_HTON64(((uint64_t)1)<<31)) +#define IB_SR_COMPMASK_SDATA32_1 (CL_HTON64(((uint64_t)1)<<32)) +#define IB_SR_COMPMASK_SDATA32_2 (CL_HTON64(((uint64_t)1)<<33)) +#define IB_SR_COMPMASK_SDATA32_3 (CL_HTON64(((uint64_t)1)<<34)) +#define IB_SR_COMPMASK_SDATA64_0 (CL_HTON64(((uint64_t)1)<<35)) +#define IB_SR_COMPMASK_SDATA64_1 (CL_HTON64(((uint64_t)1)<<36)) + +/* Port Info Record Component Masks */ +#define IB_PIR_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_PIR_COMPMASK_PORTNUM (CL_HTON64(((uint64_t)1)<<1)) +#define IB_PIR_COMPMASK_OPTIONS (CL_HTON64(((uint64_t)1)<<2)) +#define IB_PIR_COMPMASK_MKEY (CL_HTON64(((uint64_t)1)<<3)) +#define IB_PIR_COMPMASK_GIDPRE (CL_HTON64(((uint64_t)1)<<4)) +#define IB_PIR_COMPMASK_BASELID (CL_HTON64(((uint64_t)1)<<5)) +#define IB_PIR_COMPMASK_SMLID (CL_HTON64(((uint64_t)1)<<6)) +#define IB_PIR_COMPMASK_CAPMASK (CL_HTON64(((uint64_t)1)<<7)) +#define IB_PIR_COMPMASK_DIAGCODE (CL_HTON64(((uint64_t)1)<<8)) +#define IB_PIR_COMPMASK_MKEYLEASEPRD (CL_HTON64(((uint64_t)1)<<9)) +#define IB_PIR_COMPMASK_LOCALPORTNUM (CL_HTON64(((uint64_t)1)<<10)) +#define IB_PIR_COMPMASK_LINKWIDTHENABLED (CL_HTON64(((uint64_t)1)<<11)) +#define IB_PIR_COMPMASK_LNKWIDTHSUPPORT (CL_HTON64(((uint64_t)1)<<12)) +#define IB_PIR_COMPMASK_LNKWIDTHACTIVE (CL_HTON64(((uint64_t)1)<<13)) +#define IB_PIR_COMPMASK_LNKSPEEDSUPPORT (CL_HTON64(((uint64_t)1)<<14)) +#define IB_PIR_COMPMASK_PORTSTATE (CL_HTON64(((uint64_t)1)<<15)) +#define IB_PIR_COMPMASK_PORTPHYSTATE (CL_HTON64(((uint64_t)1)<<16)) +#define IB_PIR_COMPMASK_LINKDWNDFLTSTATE (CL_HTON64(((uint64_t)1)<<17)) +#define IB_PIR_COMPMASK_MKEYPROTBITS (CL_HTON64(((uint64_t)1)<<18)) +#define IB_PIR_COMPMASK_RESV2 (CL_HTON64(((uint64_t)1)<<19)) +#define IB_PIR_COMPMASK_LMC (CL_HTON64(((uint64_t)1)<<20)) +#define IB_PIR_COMPMASK_LINKSPEEDACTIVE (CL_HTON64(((uint64_t)1)<<21)) +#define IB_PIR_COMPMASK_LINKSPEEDENABLE (CL_HTON64(((uint64_t)1)<<22)) +#define IB_PIR_COMPMASK_NEIGHBORMTU (CL_HTON64(((uint64_t)1)<<23)) +#define IB_PIR_COMPMASK_MASTERSMSL (CL_HTON64(((uint64_t)1)<<24)) +#define IB_PIR_COMPMASK_VLCAP (CL_HTON64(((uint64_t)1)<<25)) +#define IB_PIR_COMPMASK_INITTYPE (CL_HTON64(((uint64_t)1)<<26)) +#define IB_PIR_COMPMASK_VLHIGHLIMIT (CL_HTON64(((uint64_t)1)<<27)) +#define IB_PIR_COMPMASK_VLARBHIGHCAP (CL_HTON64(((uint64_t)1)<<28)) +#define IB_PIR_COMPMASK_VLARBLOWCAP (CL_HTON64(((uint64_t)1)<<29)) +#define IB_PIR_COMPMASK_INITTYPEREPLY (CL_HTON64(((uint64_t)1)<<30)) +#define IB_PIR_COMPMASK_MTUCAP (CL_HTON64(((uint64_t)1)<<31)) +#define IB_PIR_COMPMASK_VLSTALLCNT (CL_HTON64(((uint64_t)1)<<32)) +#define IB_PIR_COMPMASK_HOQLIFE (CL_HTON64(((uint64_t)1)<<33)) +#define IB_PIR_COMPMASK_OPVLS (CL_HTON64(((uint64_t)1)<<34)) +#define IB_PIR_COMPMASK_PARENFIN (CL_HTON64(((uint64_t)1)<<35)) +#define IB_PIR_COMPMASK_PARENFOUT (CL_HTON64(((uint64_t)1)<<36)) +#define IB_PIR_COMPMASK_FILTERRAWIN (CL_HTON64(((uint64_t)1)<<37)) +#define IB_PIR_COMPMASK_FILTERRAWOUT (CL_HTON64(((uint64_t)1)<<38)) +#define IB_PIR_COMPMASK_MKEYVIO (CL_HTON64(((uint64_t)1)<<39)) +#define IB_PIR_COMPMASK_PKEYVIO (CL_HTON64(((uint64_t)1)<<40)) +#define IB_PIR_COMPMASK_QKEYVIO (CL_HTON64(((uint64_t)1)<<41)) +#define IB_PIR_COMPMASK_GUIDCAP (CL_HTON64(((uint64_t)1)<<42)) +#define IB_PIR_COMPMASK_CLIENTREREG (CL_HTON64(((uint64_t)1)<<43)) +#define IB_PIR_COMPMASK_RESV3 (CL_HTON64(((uint64_t)1)<<44)) +#define IB_PIR_COMPMASK_SUBNTO (CL_HTON64(((uint64_t)1)<<45)) +#define IB_PIR_COMPMASK_RESV4 (CL_HTON64(((uint64_t)1)<<46)) +#define IB_PIR_COMPMASK_RESPTIME (CL_HTON64(((uint64_t)1)<<47)) +#define IB_PIR_COMPMASK_LOCALPHYERR (CL_HTON64(((uint64_t)1)<<48)) +#define IB_PIR_COMPMASK_OVERRUNERR (CL_HTON64(((uint64_t)1)<<49)) +#define IB_PIR_COMPMASK_MAXCREDHINT (CL_HTON64(((uint64_t)1)<<50)) +#define IB_PIR_COMPMASK_RESV5 (CL_HTON64(((uint64_t)1)<<51)) +#define IB_PIR_COMPMASK_LINKRTLAT (CL_HTON64(((uint64_t)1)<<52)) +#define IB_PIR_COMPMASK_CAPMASK2 (CL_HTON64(((uint64_t)1)<<53)) +#define IB_PIR_COMPMASK_LINKSPDEXTACT (CL_HTON64(((uint64_t)1)<<54)) +#define IB_PIR_COMPMASK_LINKSPDEXTSUPP (CL_HTON64(((uint64_t)1)<<55)) +#define IB_PIR_COMPMASK_RESV7 (CL_HTON64(((uint64_t)1)<<56)) +#define IB_PIR_COMPMASK_LINKSPDEXTENAB (CL_HTON64(((uint64_t)1)<<57)) + +/* Multicast Member Record Component Masks */ +#define IB_MCR_COMPMASK_GID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_MCR_COMPMASK_MGID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_MCR_COMPMASK_PORT_GID (CL_HTON64(((uint64_t)1)<<1)) +#define IB_MCR_COMPMASK_QKEY (CL_HTON64(((uint64_t)1)<<2)) +#define IB_MCR_COMPMASK_MLID (CL_HTON64(((uint64_t)1)<<3)) +#define IB_MCR_COMPMASK_MTU_SEL (CL_HTON64(((uint64_t)1)<<4)) +#define IB_MCR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<5)) +#define IB_MCR_COMPMASK_TCLASS (CL_HTON64(((uint64_t)1)<<6)) +#define IB_MCR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<7)) +#define IB_MCR_COMPMASK_RATE_SEL (CL_HTON64(((uint64_t)1)<<8)) +#define IB_MCR_COMPMASK_RATE (CL_HTON64(((uint64_t)1)<<9)) +#define IB_MCR_COMPMASK_LIFE_SEL (CL_HTON64(((uint64_t)1)<<10)) +#define IB_MCR_COMPMASK_LIFE (CL_HTON64(((uint64_t)1)<<11)) +#define IB_MCR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<12)) +#define IB_MCR_COMPMASK_FLOW (CL_HTON64(((uint64_t)1)<<13)) +#define IB_MCR_COMPMASK_HOP (CL_HTON64(((uint64_t)1)<<14)) +#define IB_MCR_COMPMASK_SCOPE (CL_HTON64(((uint64_t)1)<<15)) +#define IB_MCR_COMPMASK_JOIN_STATE (CL_HTON64(((uint64_t)1)<<16)) +#define IB_MCR_COMPMASK_PROXY (CL_HTON64(((uint64_t)1)<<17)) + +/* GUID Info Record Component Masks */ +#define IB_GIR_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_GIR_COMPMASK_BLOCKNUM (CL_HTON64(((uint64_t)1)<<1)) +#define IB_GIR_COMPMASK_RESV1 (CL_HTON64(((uint64_t)1)<<2)) +#define IB_GIR_COMPMASK_RESV2 (CL_HTON64(((uint64_t)1)<<3)) +#define IB_GIR_COMPMASK_GID0 (CL_HTON64(((uint64_t)1)<<4)) +#define IB_GIR_COMPMASK_GID1 (CL_HTON64(((uint64_t)1)<<5)) +#define IB_GIR_COMPMASK_GID2 (CL_HTON64(((uint64_t)1)<<6)) +#define IB_GIR_COMPMASK_GID3 (CL_HTON64(((uint64_t)1)<<7)) +#define IB_GIR_COMPMASK_GID4 (CL_HTON64(((uint64_t)1)<<8)) +#define IB_GIR_COMPMASK_GID5 (CL_HTON64(((uint64_t)1)<<9)) +#define IB_GIR_COMPMASK_GID6 (CL_HTON64(((uint64_t)1)<<10)) +#define IB_GIR_COMPMASK_GID7 (CL_HTON64(((uint64_t)1)<<11)) + +/* MultiPath Record Component Masks */ +#define IB_MPR_COMPMASK_RAWTRAFFIC (CL_HTON64(((uint64_t)1)<<0)) +#define IB_MPR_COMPMASK_RESV0 (CL_HTON64(((uint64_t)1)<<1)) +#define IB_MPR_COMPMASK_FLOWLABEL (CL_HTON64(((uint64_t)1)<<2)) +#define IB_MPR_COMPMASK_HOPLIMIT (CL_HTON64(((uint64_t)1)<<3)) +#define IB_MPR_COMPMASK_TCLASS (CL_HTON64(((uint64_t)1)<<4)) +#define IB_MPR_COMPMASK_REVERSIBLE (CL_HTON64(((uint64_t)1)<<5)) +#define IB_MPR_COMPMASK_NUMBPATH (CL_HTON64(((uint64_t)1)<<6)) +#define IB_MPR_COMPMASK_PKEY (CL_HTON64(((uint64_t)1)<<7)) +#define IB_MPR_COMPMASK_QOS_CLASS (CL_HTON64(((uint64_t)1)<<8)) +#define IB_MPR_COMPMASK_SL (CL_HTON64(((uint64_t)1)<<9)) +#define IB_MPR_COMPMASK_MTUSELEC (CL_HTON64(((uint64_t)1)<<10)) +#define IB_MPR_COMPMASK_MTU (CL_HTON64(((uint64_t)1)<<11)) +#define IB_MPR_COMPMASK_RATESELEC (CL_HTON64(((uint64_t)1)<<12)) +#define IB_MPR_COMPMASK_RATE (CL_HTON64(((uint64_t)1)<<13)) +#define IB_MPR_COMPMASK_PKTLIFETIMESELEC (CL_HTON64(((uint64_t)1)<<14)) +#define IB_MPR_COMPMASK_PKTLIFETIME (CL_HTON64(((uint64_t)1)<<15)) +#define IB_MPR_COMPMASK_SERVICEID_MSB (CL_HTON64(((uint64_t)1)<<16)) +#define IB_MPR_COMPMASK_INDEPSELEC (CL_HTON64(((uint64_t)1)<<17)) +#define IB_MPR_COMPMASK_RESV3 (CL_HTON64(((uint64_t)1)<<18)) +#define IB_MPR_COMPMASK_SGIDCOUNT (CL_HTON64(((uint64_t)1)<<19)) +#define IB_MPR_COMPMASK_DGIDCOUNT (CL_HTON64(((uint64_t)1)<<20)) +#define IB_MPR_COMPMASK_SERVICEID_LSB (CL_HTON64(((uint64_t)1)<<21)) + +#define IB_MPR_COMPMASK_SERVICEID (IB_MPR_COMPMASK_SERVICEID_MSB | \ + IB_MPR_COMPMASK_SERVICEID_LSB) + +/* SMInfo Record Component Masks */ +#define IB_SMIR_COMPMASK_LID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_SMIR_COMPMASK_RESV0 (CL_HTON64(((uint64_t)1)<<1)) +#define IB_SMIR_COMPMASK_GUID (CL_HTON64(((uint64_t)1)<<2)) +#define IB_SMIR_COMPMASK_SMKEY (CL_HTON64(((uint64_t)1)<<3)) +#define IB_SMIR_COMPMASK_ACTCOUNT (CL_HTON64(((uint64_t)1)<<4)) +#define IB_SMIR_COMPMASK_PRIORITY (CL_HTON64(((uint64_t)1)<<5)) +#define IB_SMIR_COMPMASK_SMSTATE (CL_HTON64(((uint64_t)1)<<6)) + +/* InformInfo Record Component Masks */ +#define IB_IIR_COMPMASK_SUBSCRIBERGID (CL_HTON64(((uint64_t)1)<<0)) +#define IB_IIR_COMPMASK_ENUM (CL_HTON64(((uint64_t)1)<<1)) +#define IB_IIR_COMPMASK_RESV0 (CL_HTON64(((uint64_t)1)<<2)) +#define IB_IIR_COMPMASK_GID (CL_HTON64(((uint64_t)1)<<3)) +#define IB_IIR_COMPMASK_LIDRANGEBEGIN (CL_HTON64(((uint64_t)1)<<4)) +#define IB_IIR_COMPMASK_LIDRANGEEND (CL_HTON64(((uint64_t)1)<<5)) +#define IB_IIR_COMPMASK_RESV1 (CL_HTON64(((uint64_t)1)<<6)) +#define IB_IIR_COMPMASK_ISGENERIC (CL_HTON64(((uint64_t)1)<<7)) +#define IB_IIR_COMPMASK_SUBSCRIBE (CL_HTON64(((uint64_t)1)<<8)) +#define IB_IIR_COMPMASK_TYPE (CL_HTON64(((uint64_t)1)<<9)) +#define IB_IIR_COMPMASK_TRAPNUMB (CL_HTON64(((uint64_t)1)<<10)) +#define IB_IIR_COMPMASK_DEVICEID (CL_HTON64(((uint64_t)1)<<10)) +#define IB_IIR_COMPMASK_QPN (CL_HTON64(((uint64_t)1)<<11)) +#define IB_IIR_COMPMASK_RESV2 (CL_HTON64(((uint64_t)1)<<12)) +#define IB_IIR_COMPMASK_RESPTIME (CL_HTON64(((uint64_t)1)<<13)) +#define IB_IIR_COMPMASK_RESV3 (CL_HTON64(((uint64_t)1)<<14)) +#define IB_IIR_COMPMASK_PRODTYPE (CL_HTON64(((uint64_t)1)<<15)) +#define IB_IIR_COMPMASK_VENDID (CL_HTON64(((uint64_t)1)<<15)) + +/****f* IBA Base: Types/ib_path_rec_init_local +* NAME +* ib_path_rec_init_local +* +* DESCRIPTION +* Initializes a subnet local path record. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_init_local(IN ib_path_rec_t * const p_rec, + IN ib_gid_t * const p_dgid, + IN ib_gid_t * const p_sgid, + IN ib_net16_t dlid, + IN ib_net16_t slid, + IN uint8_t num_path, + IN ib_net16_t pkey, + IN uint8_t sl, + IN uint16_t qos_class, + IN uint8_t mtu_selector, + IN uint8_t mtu, + IN uint8_t rate_selector, + IN uint8_t rate, + IN uint8_t pkt_life_selector, + IN uint8_t pkt_life, IN uint8_t preference) +{ + p_rec->dgid = *p_dgid; + p_rec->sgid = *p_sgid; + p_rec->dlid = dlid; + p_rec->slid = slid; + p_rec->num_path = num_path; + p_rec->pkey = pkey; + p_rec->qos_class_sl = cl_hton16((sl & IB_PATH_REC_SL_MASK) | + (qos_class << 4)); + p_rec->mtu = (uint8_t) ((mtu & IB_PATH_REC_BASE_MASK) | + (uint8_t) (mtu_selector << 6)); + p_rec->rate = (uint8_t) ((rate & IB_PATH_REC_BASE_MASK) | + (uint8_t) (rate_selector << 6)); + p_rec->pkt_life = (uint8_t) ((pkt_life & IB_PATH_REC_BASE_MASK) | + (uint8_t) (pkt_life_selector << 6)); + p_rec->preference = preference; + + /* Clear global routing fields for local path records */ + p_rec->hop_flow_raw = 0; + p_rec->tclass = 0; + p_rec->service_id = 0; + + memset(p_rec->resv2, 0, sizeof(p_rec->resv2)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* dgid +* [in] GID of destination port. +* +* sgid +* [in] GID of source port. +* +* dlid +* [in] LID of destination port. +* +* slid +* [in] LID of source port. +* +* num_path +* [in] Reversible path - 1 bit to say if path is reversible. +* num_path [6:0] In queries, maximum number of paths to return. +* In responses, undefined. +* +* pkey +* [in] Partition key (P_Key) to use on this path. +* +* qos_class +* [in] QoS class to use on this path. Lower 12-bits are valid. +* +* sl +* [in] Service level to use on this path. Lower 4-bits are valid. +* +* mtu_selector +* [in] Encoded MTU selector value to use on this path +* +* mtu +* [in] Encoded MTU to use on this path +* +* rate_selector +* [in] Encoded rate selector value to use on this path. +* +* rate +* [in] Encoded rate to use on this path. +* +* pkt_life_selector +* [in] Encoded Packet selector value lifetime for this path. +* +* pkt_life +* [in] Encoded Packet lifetime for this path. +* +* preference +* [in] Indicates the relative merit of this path versus other path +* records returned from the SA. Lower numbers are better. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* ib_gid_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_num_path +* NAME +* ib_path_rec_num_path +* +* DESCRIPTION +* Get max number of paths to return. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_num_path(IN const ib_path_rec_t * const p_rec) +{ + return (p_rec->num_path & 0x7F); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Maximum number of paths to return for each unique SGID_DGID combination. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_set_sl +* NAME +* ib_path_rec_set_sl +* +* DESCRIPTION +* Set path service level. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_sl(IN ib_path_rec_t * const p_rec, IN const uint8_t sl) +{ + p_rec->qos_class_sl = + (p_rec->qos_class_sl & CL_HTON16(IB_PATH_REC_QOS_CLASS_MASK)) | + cl_hton16(sl & IB_PATH_REC_SL_MASK); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* sl +* [in] Service level to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_sl +* NAME +* ib_path_rec_sl +* +* DESCRIPTION +* Get path service level. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_sl(IN const ib_path_rec_t * const p_rec) +{ + return (uint8_t)(cl_ntoh16(p_rec->qos_class_sl) & IB_PATH_REC_SL_MASK); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* SL. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_set_qos_class +* NAME +* ib_path_rec_set_qos_class +* +* DESCRIPTION +* Set path QoS class. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_path_rec_set_qos_class(IN ib_path_rec_t * const p_rec, + IN const uint16_t qos_class) +{ + p_rec->qos_class_sl = + (p_rec->qos_class_sl & CL_HTON16(IB_PATH_REC_SL_MASK)) | + cl_hton16(qos_class << 4); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* qos_class +* [in] QoS class to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_qos_class +* NAME +* ib_path_rec_qos_class +* +* DESCRIPTION +* Get QoS class. +* +* SYNOPSIS +*/ +static inline uint16_t OSM_API +ib_path_rec_qos_class(IN const ib_path_rec_t * const p_rec) +{ + return (cl_ntoh16(p_rec->qos_class_sl) >> 4); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* QoS class of the path record. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_mtu +* NAME +* ib_path_rec_mtu +* +* DESCRIPTION +* Get encoded path MTU. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_mtu(IN const ib_path_rec_t * const p_rec) +{ + return ((uint8_t) (p_rec->mtu & IB_PATH_REC_BASE_MASK)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Encoded path MTU. +* 1: 256 +* 2: 512 +* 3: 1024 +* 4: 2048 +* 5: 4096 +* others: reserved +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_mtu_sel +* NAME +* ib_path_rec_mtu_sel +* +* DESCRIPTION +* Get encoded path MTU selector. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_mtu_sel(IN const ib_path_rec_t * const p_rec) +{ + return ((uint8_t) ((p_rec->mtu & IB_PATH_REC_SELECTOR_MASK) >> 6)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Encoded path MTU selector value (for queries). +* 0: greater than MTU specified +* 1: less than MTU specified +* 2: exactly the MTU specified +* 3: largest MTU available +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_rate +* NAME +* ib_path_rec_rate +* +* DESCRIPTION +* Get encoded path rate. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_rate(IN const ib_path_rec_t * const p_rec) +{ + return ((uint8_t) (p_rec->rate & IB_PATH_REC_BASE_MASK)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Encoded path rate. +* 2: 2.5 Gb/sec. +* 3: 10 Gb/sec. +* 4: 30 Gb/sec. +* 5: 5 Gb/sec. +* 6: 20 Gb/sec. +* 7: 40 Gb/sec. +* 8: 60 Gb/sec. +* 9: 80 Gb/sec. +* 10: 120 Gb/sec. +* 11: 14 Gb/sec. +* 12: 56 Gb/sec. +* 13: 112 Gb/sec. +* 14: 168 Gb/sec. +* 15: 25 Gb/sec. +* 16: 100 Gb/sec. +* 17: 200 Gb/sec. +* 18: 300 Gb/sec. +* 19: 28 Gb/sec. +* 20: 50 Gb/sec. +* 21: 400 Gb/sec. +* 22: 600 Gb/sec. +* others: reserved +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_rate_sel +* NAME +* ib_path_rec_rate_sel +* +* DESCRIPTION +* Get encoded path rate selector. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_rate_sel(IN const ib_path_rec_t * const p_rec) +{ + return ((uint8_t) ((p_rec->rate & IB_PATH_REC_SELECTOR_MASK) >> 6)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Encoded path rate selector value (for queries). +* 0: greater than rate specified +* 1: less than rate specified +* 2: exactly the rate specified +* 3: largest rate available +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_pkt_life +* NAME +* ib_path_rec_pkt_life +* +* DESCRIPTION +* Get encoded path pkt_life. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_pkt_life(IN const ib_path_rec_t * const p_rec) +{ + return ((uint8_t) (p_rec->pkt_life & IB_PATH_REC_BASE_MASK)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Encoded path pkt_life = 4.096 usec * 2 ** PacketLifeTime. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_pkt_life_sel +* NAME +* ib_path_rec_pkt_life_sel +* +* DESCRIPTION +* Get encoded path pkt_lifetime selector. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_pkt_life_sel(IN const ib_path_rec_t * const p_rec) +{ + return ((uint8_t) ((p_rec->pkt_life & IB_PATH_REC_SELECTOR_MASK) >> 6)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Encoded path pkt_lifetime selector value (for queries). +* 0: greater than rate specified +* 1: less than rate specified +* 2: exactly the rate specified +* 3: smallest packet lifetime available +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_flow_lbl +* NAME +* ib_path_rec_flow_lbl +* +* DESCRIPTION +* Get flow label. +* +* SYNOPSIS +*/ +static inline uint32_t OSM_API +ib_path_rec_flow_lbl(IN const ib_path_rec_t * const p_rec) +{ + return (((cl_ntoh32(p_rec->hop_flow_raw) >> 8) & 0x000FFFFF)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Flow label of the path record. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****f* IBA Base: Types/ib_path_rec_hop_limit +* NAME +* ib_path_rec_hop_limit +* +* DESCRIPTION +* Get hop limit. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_rec_hop_limit(IN const ib_path_rec_t * const p_rec) +{ + return ((uint8_t) (cl_ntoh32(p_rec->hop_flow_raw) & 0x000000FF)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the path record object. +* +* RETURN VALUES +* Hop limit of the path record. +* +* NOTES +* +* SEE ALSO +* ib_path_rec_t +*********/ + +/****s* IBA Base: Constants/IB_CLASS_CAP_TRAP +* NAME +* IB_CLASS_CAP_TRAP +* +* DESCRIPTION +* ClassPortInfo CapabilityMask bits. This bit will be set +* if the class supports Trap() MADs (13.4.8.1). +* +* SEE ALSO +* ib_class_port_info_t, IB_CLASS_CAP_GETSET, IB_CLASS_CAP_CAPMASK2 +* +* SOURCE +*/ +#define IB_CLASS_CAP_TRAP 0x0001 +/*********/ + +/****s* IBA Base: Constants/IB_CLASS_CAP_GETSET +* NAME +* IB_CLASS_CAP_GETSET +* +* DESCRIPTION +* ClassPortInfo CapabilityMask bits. This bit will be set +* if the class supports Get(Notice) and Set(Notice) MADs (13.4.8.1). +* +* SEE ALSO +* ib_class_port_info_t, IB_CLASS_CAP_TRAP, IB_CLASS_CAP_CAPMASK2 +* +* SOURCE +*/ +#define IB_CLASS_CAP_GETSET 0x0002 +/*********/ + +/****s* IBA Base: Constants/IB_CLASS_CAP_CAPMASK2 +* NAME +* IB_CLASS_CAP_CAPMASK2 +* +* DESCRIPTION +* ClassPortInfo CapabilityMask bits. +* This bit will be set of the class supports additional class specific +* capabilities (CapabilityMask2) (13.4.8.1). +* +* SEE ALSO +* ib_class_port_info_t, IB_CLASS_CAP_TRAP, IB_CLASS_CAP_GETSET +* +* SOURCE +*/ +#define IB_CLASS_CAP_CAPMASK2 0x0004 +/*********/ + +/****s* IBA Base: Constants/IB_CLASS_ENH_PORT0_CC_MASK +* NAME +* IB_CLASS_ENH_PORT0_CC_MASK +* +* DESCRIPTION +* ClassPortInfo CapabilityMask bits. +* Switch only: This bit will be set if the EnhancedPort0 +* supports CA Congestion Control (A10.4.3.1). +* +* SEE ALSO +* ib_class_port_info_t +* +* SOURCE +*/ +#define IB_CLASS_ENH_PORT0_CC_MASK 0x0100 +/*********/ + +/****s* IBA Base: Constants/IB_CLASS_RESP_TIME_MASK +* NAME +* IB_CLASS_RESP_TIME_MASK +* +* DESCRIPTION +* Mask bits to extract the response time value from the +* cap_mask2_resp_time field of ib_class_port_info_t. +* +* SEE ALSO +* ib_class_port_info_t +* +* SOURCE +*/ +#define IB_CLASS_RESP_TIME_MASK 0x1F +/*********/ + +/****s* IBA Base: Constants/IB_CLASS_CAPMASK2_SHIFT +* NAME +* IB_CLASS_CAPMASK2_SHIFT +* +* DESCRIPTION +* Number of bits to shift to extract the capability mask2 +* from the cap_mask2_resp_time field of ib_class_port_info_t. +* +* SEE ALSO +* ib_class_port_info_t +* +* SOURCE +*/ +#define IB_CLASS_CAPMASK2_SHIFT 5 +/*********/ + +/****s* IBA Base: Types/ib_class_port_info_t +* NAME +* ib_class_port_info_t +* +* DESCRIPTION +* IBA defined ClassPortInfo attribute (13.4.8.1) +* +* SYNOPSIS +*/ +typedef struct _ib_class_port_info { + uint8_t base_ver; + uint8_t class_ver; + ib_net16_t cap_mask; + ib_net32_t cap_mask2_resp_time; + ib_gid_t redir_gid; + ib_net32_t redir_tc_sl_fl; + ib_net16_t redir_lid; + ib_net16_t redir_pkey; + ib_net32_t redir_qp; + ib_net32_t redir_qkey; + ib_gid_t trap_gid; + ib_net32_t trap_tc_sl_fl; + ib_net16_t trap_lid; + ib_net16_t trap_pkey; + ib_net32_t trap_hop_qp; + ib_net32_t trap_qkey; +} ib_class_port_info_t; +/* +* FIELDS +* base_ver +* Maximum supported MAD Base Version. +* +* class_ver +* Maximum supported management class version. +* +* cap_mask +* Supported capabilities of this management class. +* +* cap_mask2_resp_time +* Maximum expected response time and additional +* supported capabilities of this management class. +* +* redir_gid +* GID to use for redirection, or zero +* +* redir_tc_sl_fl +* Traffic class, service level and flow label the requester +* should use if the service is redirected. +* +* redir_lid +* LID used for redirection, or zero +* +* redir_pkey +* P_Key used for redirection +* +* redir_qp +* QP number used for redirection +* +* redir_qkey +* Q_Key associated with the redirected QP. This shall be the +* well known Q_Key value. +* +* trap_gid +* GID value used for trap messages from this service. +* +* trap_tc_sl_fl +* Traffic class, service level and flow label used for +* trap messages originated by this service. +* +* trap_lid +* LID used for trap messages, or zero +* +* trap_pkey +* P_Key used for trap messages +* +* trap_hop_qp +* Hop limit (upper 8 bits) and QP number used for trap messages +* +* trap_qkey +* Q_Key associated with the trap messages QP. +* +* SEE ALSO +* IB_CLASS_CAP_GETSET, IB_CLASS_CAP_TRAP +* +*********/ + +#define IB_PM_ALL_PORT_SELECT (CL_HTON16(((uint16_t)1)<<8)) +#define IB_PM_EXT_WIDTH_SUPPORTED (CL_HTON16(((uint16_t)1)<<9)) +#define IB_PM_EXT_WIDTH_NOIETF_SUP (CL_HTON16(((uint16_t)1)<<10)) +#define IB_PM_SAMPLES_ONLY_SUP (CL_HTON16(((uint16_t)1)<<11)) +#define IB_PM_PC_XMIT_WAIT_SUP (CL_HTON16(((uint16_t)1)<<12)) +#define IS_PM_INH_LMTD_PKEY_MC_CONSTR_ERR (CL_HTON16(((uint16_t)1)<<13)) +#define IS_PM_RSFEC_COUNTERS_SUP (CL_HTON16(((uint16_t)1)<<14)) +#define IB_PM_IS_QP1_DROP_SUP (CL_HTON16(((uint16_t)1)<<15)) +/* CapabilityMask2 */ +#define IB_PM_IS_PM_KEY_SUPPORTED (CL_HTON32(((uint32_t)1)<<0)) +#define IB_PM_IS_ADDL_PORT_CTRS_EXT_SUP (CL_HTON32(((uint32_t)1)<<1)) + +/****f* IBA Base: Types/ib_class_set_resp_time_val +* NAME +* ib_class_set_resp_time_val +* +* DESCRIPTION +* Set maximum expected response time. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_class_set_resp_time_val(IN ib_class_port_info_t * const p_cpi, + IN const uint8_t val) +{ + p_cpi->cap_mask2_resp_time = + (p_cpi->cap_mask2_resp_time & CL_HTON32(~IB_CLASS_RESP_TIME_MASK)) | + cl_hton32(val & IB_CLASS_RESP_TIME_MASK); +} + +/* +* PARAMETERS +* p_cpi +* [in] Pointer to the class port info object. +* +* val +* [in] Response time value to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_class_port_info_t +*********/ + +/****f* IBA Base: Types/ib_class_resp_time_val +* NAME +* ib_class_resp_time_val +* +* DESCRIPTION +* Get response time value. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_class_resp_time_val(IN ib_class_port_info_t * const p_cpi) +{ + return (uint8_t)(cl_ntoh32(p_cpi->cap_mask2_resp_time) & + IB_CLASS_RESP_TIME_MASK); +} + +/* +* PARAMETERS +* p_cpi +* [in] Pointer to the class port info object. +* +* RETURN VALUES +* Response time value. +* +* NOTES +* +* SEE ALSO +* ib_class_port_info_t +*********/ + +/****f* IBA Base: Types/ib_class_set_cap_mask2 +* NAME +* ib_class_set_cap_mask2 +* +* DESCRIPTION +* Set ClassPortInfo:CapabilityMask2. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_class_set_cap_mask2(IN ib_class_port_info_t * const p_cpi, + IN const uint32_t cap_mask2) +{ + p_cpi->cap_mask2_resp_time = (p_cpi->cap_mask2_resp_time & + CL_HTON32(IB_CLASS_RESP_TIME_MASK)) | + cl_hton32(cap_mask2 << IB_CLASS_CAPMASK2_SHIFT); +} + +/* +* PARAMETERS +* p_cpi +* [in] Pointer to the class port info object. +* +* cap_mask2 +* [in] CapabilityMask2 value to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_class_port_info_t +*********/ + +/****f* IBA Base: Types/ib_class_cap_mask2 +* NAME +* ib_class_cap_mask2 +* +* DESCRIPTION +* Get ClassPortInfo:CapabilityMask2. +* +* SYNOPSIS +*/ +static inline uint32_t OSM_API +ib_class_cap_mask2(IN const ib_class_port_info_t * const p_cpi) +{ + return (cl_ntoh32(p_cpi->cap_mask2_resp_time) >> IB_CLASS_CAPMASK2_SHIFT); +} + +/* +* PARAMETERS +* p_cpi +* [in] Pointer to the class port info object. +* +* RETURN VALUES +* CapabilityMask2 of the ClassPortInfo. +* +* NOTES +* +* SEE ALSO +* ib_class_port_info_t +*********/ + +/****s* IBA Base: Types/ib_sm_info_t +* NAME +* ib_sm_info_t +* +* DESCRIPTION +* SMInfo structure (14.2.5.13). +* +* SYNOPSIS +*/ +#include +typedef struct _ib_sm_info { + ib_net64_t guid; + ib_net64_t sm_key; + ib_net32_t act_count; + uint8_t pri_state; +} PACK_SUFFIX ib_sm_info_t; +#include +/* +* FIELDS +* guid +* Port GUID for this SM. +* +* sm_key +* SM_Key of this SM. +* +* act_count +* Activity counter used as a heartbeat. +* +* pri_state +* Priority and State information +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_sminfo_get_priority +* NAME +* ib_sminfo_get_priority +* +* DESCRIPTION +* Returns the priority value. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_sminfo_get_priority(IN const ib_sm_info_t * const p_smi) +{ + return ((uint8_t) ((p_smi->pri_state & 0xF0) >> 4)); +} + +/* +* PARAMETERS +* p_smi +* [in] Pointer to the SMInfo Attribute. +* +* RETURN VALUES +* Returns the priority value. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_sminfo_get_state +* NAME +* ib_sminfo_get_state +* +* DESCRIPTION +* Returns the state value. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_sminfo_get_state(IN const ib_sm_info_t * const p_smi) +{ + return ((uint8_t) (p_smi->pri_state & 0x0F)); +} + +/* +* PARAMETERS +* p_smi +* [in] Pointer to the SMInfo Attribute. +* +* RETURN VALUES +* Returns the state value. +* +* NOTES +* +* SEE ALSO +*********/ + +/****s* IBA Base: Types/ib_mad_t +* NAME +* ib_mad_t +* +* DESCRIPTION +* IBA defined MAD header (13.4.3) +* +* SYNOPSIS +*/ +typedef struct _ib_mad { + uint8_t base_ver; + uint8_t mgmt_class; + uint8_t class_ver; + uint8_t method; + ib_net16_t status; + ib_net16_t class_spec; + ib_net64_t trans_id; + ib_net16_t attr_id; + ib_net16_t resv; + ib_net32_t attr_mod; +} ib_mad_t; +/* +* FIELDS +* base_ver +* MAD base format. +* +* mgmt_class +* Class of operation. +* +* class_ver +* Version of MAD class-specific format. +* +* method +* Method to perform, including 'R' bit. +* +* status +* Status of operation. +* +* class_spec +* Reserved for subnet management. +* +* trans_id +* Transaction ID. +* +* attr_id +* Attribute ID. +* +* resv +* Reserved field. +* +* attr_mod +* Attribute modifier. +* +* SEE ALSO +*********/ + +/****s* IBA Base: Types/ib_rmpp_mad_t +* NAME +* ib_rmpp_mad_t +* +* DESCRIPTION +* IBA defined MAD RMPP header (13.6.2.1) +* +* SYNOPSIS +*/ +#include +typedef struct _ib_rmpp_mad { + ib_mad_t common_hdr; + uint8_t rmpp_version; + uint8_t rmpp_type; + uint8_t rmpp_flags; + uint8_t rmpp_status; + ib_net32_t seg_num; + ib_net32_t paylen_newwin; +} PACK_SUFFIX ib_rmpp_mad_t; +#include +/* +* SEE ALSO +* ib_mad_t +*********/ + +/****f* IBA Base: Types/ib_mad_init_new +* NAME +* ib_mad_init_new +* +* DESCRIPTION +* Initializes a MAD common header. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_mad_init_new(IN ib_mad_t * const p_mad, + IN const uint8_t mgmt_class, + IN const uint8_t class_ver, + IN const uint8_t method, + IN const ib_net64_t trans_id, + IN const ib_net16_t attr_id, IN const ib_net32_t attr_mod) +{ + CL_ASSERT(p_mad); + p_mad->base_ver = 1; + p_mad->mgmt_class = mgmt_class; + p_mad->class_ver = class_ver; + p_mad->method = method; + p_mad->status = 0; + p_mad->class_spec = 0; + p_mad->trans_id = trans_id; + p_mad->attr_id = attr_id; + p_mad->resv = 0; + p_mad->attr_mod = attr_mod; +} + +/* +* PARAMETERS +* p_mad +* [in] Pointer to the MAD common header. +* +* mgmt_class +* [in] Class of operation. +* +* class_ver +* [in] Version of MAD class-specific format. +* +* method +* [in] Method to perform, including 'R' bit. +* +* trans_Id +* [in] Transaction ID. +* +* attr_id +* [in] Attribute ID. +* +* attr_mod +* [in] Attribute modifier. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* ib_mad_t +*********/ + +/****f* IBA Base: Types/ib_mad_init_response +* NAME +* ib_mad_init_response +* +* DESCRIPTION +* Initializes a MAD common header as a response. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_mad_init_response(IN const ib_mad_t * const p_req_mad, + IN ib_mad_t * const p_mad, IN const ib_net16_t status) +{ + CL_ASSERT(p_req_mad); + CL_ASSERT(p_mad); + *p_mad = *p_req_mad; + p_mad->status = status; + if (p_mad->method == IB_MAD_METHOD_SET) + p_mad->method = IB_MAD_METHOD_GET; + p_mad->method |= IB_MAD_METHOD_RESP_MASK; +} + +/* +* PARAMETERS +* p_req_mad +* [in] Pointer to the MAD common header in the original request MAD. +* +* p_mad +* [in] Pointer to the MAD common header to initialize. +* +* status +* [in] MAD Status value to return; +* +* RETURN VALUES +* None. +* +* NOTES +* p_req_mad and p_mad may point to the same MAD. +* +* SEE ALSO +* ib_mad_t +*********/ + +/****f* IBA Base: Types/ib_mad_is_response +* NAME +* ib_mad_is_response +* +* DESCRIPTION +* Returns TRUE if the MAD is a response ('R' bit set) +* or if the MAD is a TRAP REPRESS, +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_mad_is_response(IN const ib_mad_t * const p_mad) +{ + CL_ASSERT(p_mad); + return (p_mad->method & IB_MAD_METHOD_RESP_MASK || + p_mad->method == IB_MAD_METHOD_TRAP_REPRESS); +} + +/* +* PARAMETERS +* p_mad +* [in] Pointer to the MAD. +* +* RETURN VALUES +* Returns TRUE if the MAD is a response ('R' bit set), +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* ib_mad_t +*********/ + +#define IB_RMPP_TYPE_DATA 1 +#define IB_RMPP_TYPE_ACK 2 +#define IB_RMPP_TYPE_STOP 3 +#define IB_RMPP_TYPE_ABORT 4 + +#define IB_RMPP_NO_RESP_TIME 0x1F +#define IB_RMPP_FLAG_ACTIVE 0x01 +#define IB_RMPP_FLAG_FIRST 0x02 +#define IB_RMPP_FLAG_LAST 0x04 + +#define IB_RMPP_STATUS_SUCCESS 0 +#define IB_RMPP_STATUS_RESX 1 /* resources exhausted */ +#define IB_RMPP_STATUS_T2L 118 /* time too long */ +#define IB_RMPP_STATUS_BAD_LEN 119 /* incon. last and payload len */ +#define IB_RMPP_STATUS_BAD_SEG 120 /* incon. first and segment no */ +#define IB_RMPP_STATUS_BADT 121 /* bad rmpp type */ +#define IB_RMPP_STATUS_W2S 122 /* newwindowlast too small */ +#define IB_RMPP_STATUS_S2B 123 /* segment no too big */ +#define IB_RMPP_STATUS_BAD_STATUS 124 /* illegal status */ +#define IB_RMPP_STATUS_UNV 125 /* unsupported version */ +#define IB_RMPP_STATUS_TMR 126 /* too many retries */ +#define IB_RMPP_STATUS_UNSPEC 127 /* unspecified */ + +/****f* IBA Base: Types/ib_rmpp_is_flag_set +* NAME +* ib_rmpp_is_flag_set +* +* DESCRIPTION +* Returns TRUE if the MAD has the given RMPP flag set. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_rmpp_is_flag_set(IN const ib_rmpp_mad_t * const p_rmpp_mad, + IN const uint8_t flag) +{ + CL_ASSERT(p_rmpp_mad); + return ((p_rmpp_mad->rmpp_flags & flag) == flag); +} + +/* +* PARAMETERS +* ib_rmpp_mad_t +* [in] Pointer to a MAD with an RMPP header. +* +* flag +* [in] The RMPP flag being examined. +* +* RETURN VALUES +* Returns TRUE if the MAD has the given RMPP flag set. +* +* NOTES +* +* SEE ALSO +* ib_mad_t, ib_rmpp_mad_t +*********/ + +static inline void OSM_API +ib_rmpp_set_resp_time(IN ib_rmpp_mad_t * const p_rmpp_mad, + IN const uint8_t resp_time) +{ + CL_ASSERT(p_rmpp_mad); + p_rmpp_mad->rmpp_flags |= (resp_time << 3); +} + +static inline uint8_t OSM_API +ib_rmpp_get_resp_time(IN const ib_rmpp_mad_t * const p_rmpp_mad) +{ + CL_ASSERT(p_rmpp_mad); + return ((uint8_t) (p_rmpp_mad->rmpp_flags >> 3)); +} + +/****d* IBA Base: Constants/IB_SMP_DIRECTION +* NAME +* IB_SMP_DIRECTION +* +* DESCRIPTION +* The Direction bit for directed route SMPs. +* +* SOURCE +*/ +#define IB_SMP_DIRECTION_HO 0x8000 +#define IB_SMP_DIRECTION (CL_HTON16(IB_SMP_DIRECTION_HO)) +/**********/ + +/****d* IBA Base: Constants/IB_SMP_STATUS_MASK +* NAME +* IB_SMP_STATUS_MASK +* +* DESCRIPTION +* Mask value for extracting status from a directed route SMP. +* +* SOURCE +*/ +#define IB_SMP_STATUS_MASK_HO 0x7FFF +#define IB_SMP_STATUS_MASK (CL_HTON16(IB_SMP_STATUS_MASK_HO)) +/**********/ + +/****s* IBA Base: Types/ib_smp_t +* NAME +* ib_smp_t +* +* DESCRIPTION +* IBA defined SMP. (14.2.1.2) +* +* SYNOPSIS +*/ +#define IB_SMP_DATA_SIZE 64 +typedef struct _ib_smp { + uint8_t base_ver; + uint8_t mgmt_class; + uint8_t class_ver; + uint8_t method; + ib_net16_t status; + uint8_t hop_ptr; + uint8_t hop_count; + ib_net64_t trans_id; + ib_net16_t attr_id; + ib_net16_t resv; + ib_net32_t attr_mod; + ib_net64_t m_key; + ib_net16_t dr_slid; + ib_net16_t dr_dlid; + uint32_t resv1[7]; + uint8_t data[IB_SMP_DATA_SIZE]; + uint8_t initial_path[IB_SUBNET_PATH_HOPS_MAX]; + uint8_t return_path[IB_SUBNET_PATH_HOPS_MAX]; +} ib_smp_t; +/* +* FIELDS +* base_ver +* MAD base format. +* +* mgmt_class +* Class of operation. +* +* class_ver +* Version of MAD class-specific format. +* +* method +* Method to perform, including 'R' bit. +* +* status +* Status of operation. +* +* hop_ptr +* Hop pointer for directed route MADs. +* +* hop_count +* Hop count for directed route MADs. +* +* trans_Id +* Transaction ID. +* +* attr_id +* Attribute ID. +* +* resv +* Reserved field. +* +* attr_mod +* Attribute modifier. +* +* m_key +* Management key value. +* +* dr_slid +* Directed route source LID. +* +* dr_dlid +* Directed route destination LID. +* +* resv0 +* Reserved for 64 byte alignment. +* +* data +* MAD data payload. +* +* initial_path +* Outbound port list. +* +* return_path +* Inbound port list. +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_smp_get_status +* NAME +* ib_smp_get_status +* +* DESCRIPTION +* Returns the SMP status value in network order. +* +* SYNOPSIS +*/ +static inline ib_net16_t OSM_API +ib_smp_get_status(IN const ib_smp_t * const p_smp) +{ + return ((ib_net16_t) (p_smp->status & IB_SMP_STATUS_MASK)); +} + +/* +* PARAMETERS +* p_smp +* [in] Pointer to the SMP packet. +* +* RETURN VALUES +* Returns the SMP status value in network order. +* +* NOTES +* +* SEE ALSO +* ib_smp_t +*********/ + +/****f* IBA Base: Types/ib_smp_is_response +* NAME +* ib_smp_is_response +* +* DESCRIPTION +* Returns TRUE if the SMP is a response MAD, FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_smp_is_response(IN const ib_smp_t * const p_smp) +{ + return (ib_mad_is_response((const ib_mad_t *)p_smp)); +} + +/* +* PARAMETERS +* p_smp +* [in] Pointer to the SMP packet. +* +* RETURN VALUES +* Returns TRUE if the SMP is a response MAD, FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* ib_smp_t +*********/ + +/****f* IBA Base: Types/ib_smp_is_d +* NAME +* ib_smp_is_d +* +* DESCRIPTION +* Returns TRUE if the SMP 'D' (direction) bit is set. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API ib_smp_is_d(IN const ib_smp_t * const p_smp) +{ + return ((p_smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); +} + +/* +* PARAMETERS +* p_smp +* [in] Pointer to the SMP packet. +* +* RETURN VALUES +* Returns TRUE if the SMP 'D' (direction) bit is set. +* +* NOTES +* +* SEE ALSO +* ib_smp_t +*********/ + +/****f* IBA Base: Types/ib_smp_init_new +* NAME +* ib_smp_init_new +* +* DESCRIPTION +* Initializes a MAD common header. +* +* TODO +* This is too big for inlining, but leave it here for now +* since there is not yet another convenient spot. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_smp_init_new(IN ib_smp_t * const p_smp, + IN const uint8_t method, + IN const ib_net64_t trans_id, + IN const ib_net16_t attr_id, + IN const ib_net32_t attr_mod, + IN const uint8_t hop_count, + IN const ib_net64_t m_key, + IN const uint8_t * path_out, + IN const ib_net16_t dr_slid, IN const ib_net16_t dr_dlid) +{ + CL_ASSERT(p_smp); + CL_ASSERT(hop_count < IB_SUBNET_PATH_HOPS_MAX); + p_smp->base_ver = 1; + p_smp->mgmt_class = IB_MCLASS_SUBN_DIR; + p_smp->class_ver = 1; + p_smp->method = method; + p_smp->status = 0; + p_smp->hop_ptr = 0; + p_smp->hop_count = hop_count; + p_smp->trans_id = trans_id; + p_smp->attr_id = attr_id; + p_smp->resv = 0; + p_smp->attr_mod = attr_mod; + p_smp->m_key = m_key; + p_smp->dr_slid = dr_slid; + p_smp->dr_dlid = dr_dlid; + + memset(p_smp->resv1, 0, + sizeof(p_smp->resv1) + + sizeof(p_smp->data) + + sizeof(p_smp->initial_path) + sizeof(p_smp->return_path)); + + /* copy the path */ + memcpy(&p_smp->initial_path, path_out, sizeof(p_smp->initial_path)); +} + +/* +* PARAMETERS +* p_smp +* [in] Pointer to the SMP packet. +* +* method +* [in] Method to perform, including 'R' bit. +* +* trans_Id +* [in] Transaction ID. +* +* attr_id +* [in] Attribute ID. +* +* attr_mod +* [in] Attribute modifier. +* +* hop_count +* [in] Number of hops in the path. +* +* m_key +* [in] Management key for this SMP. +* +* path_out +* [in] Port array for outbound path. +* +* +* RETURN VALUES +* None. +* +* NOTES +* Payload area is initialized to zero. +* +* +* SEE ALSO +* ib_mad_t +*********/ + +/****f* IBA Base: Types/ib_smp_get_payload_ptr +* NAME +* ib_smp_get_payload_ptr +* +* DESCRIPTION +* Gets a pointer to the SMP payload area. +* +* SYNOPSIS +*/ +static inline void *OSM_API +ib_smp_get_payload_ptr(IN const ib_smp_t * const p_smp) +{ + return ((void *)p_smp->data); +} + +/* +* PARAMETERS +* p_smp +* [in] Pointer to the SMP packet. +* +* RETURN VALUES +* Pointer to SMP payload area. +* +* NOTES +* +* SEE ALSO +* ib_mad_t +*********/ + +/****s* IBA Base: Types/ib_node_info_t +* NAME +* ib_node_info_t +* +* DESCRIPTION +* IBA defined NodeInfo. (14.2.5.3) +* +* SYNOPSIS +*/ +#include +typedef struct _ib_node_info { + uint8_t base_version; + uint8_t class_version; + uint8_t node_type; + uint8_t num_ports; + ib_net64_t sys_guid; + ib_net64_t node_guid; + ib_net64_t port_guid; + ib_net16_t partition_cap; + ib_net16_t device_id; + ib_net32_t revision; + ib_net32_t port_num_vendor_id; +} PACK_SUFFIX ib_node_info_t; +#include +/************/ + +/****s* IBA Base: Types/ib_sa_mad_t +* NAME +* ib_sa_mad_t +* +* DESCRIPTION +* IBA defined SA MAD format. (15.2.1) +* +* SYNOPSIS +*/ +#define IB_SA_DATA_SIZE 200 + +#include +typedef struct _ib_sa_mad { + uint8_t base_ver; + uint8_t mgmt_class; + uint8_t class_ver; + uint8_t method; + ib_net16_t status; + ib_net16_t resv; + ib_net64_t trans_id; + ib_net16_t attr_id; + ib_net16_t resv1; + ib_net32_t attr_mod; + uint8_t rmpp_version; + uint8_t rmpp_type; + uint8_t rmpp_flags; + uint8_t rmpp_status; + ib_net32_t seg_num; + ib_net32_t paylen_newwin; + ib_net64_t sm_key; + ib_net16_t attr_offset; + ib_net16_t resv3; + ib_net64_t comp_mask; + uint8_t data[IB_SA_DATA_SIZE]; +} PACK_SUFFIX ib_sa_mad_t; +#include +/**********/ +#define IB_SA_MAD_HDR_SIZE (sizeof(ib_sa_mad_t) - IB_SA_DATA_SIZE) + +static inline uint32_t OSM_API ib_get_attr_size(IN const ib_net16_t attr_offset) +{ + return (((uint32_t) cl_ntoh16(attr_offset)) << 3); +} + +static inline ib_net16_t OSM_API ib_get_attr_offset(IN const uint32_t attr_size) +{ + return (cl_hton16((uint16_t) (attr_size >> 3))); +} + +/****f* IBA Base: Types/ib_sa_mad_get_payload_ptr +* NAME +* ib_sa_mad_get_payload_ptr +* +* DESCRIPTION +* Gets a pointer to the SA MAD's payload area. +* +* SYNOPSIS +*/ +static inline void *OSM_API +ib_sa_mad_get_payload_ptr(IN const ib_sa_mad_t * const p_sa_mad) +{ + return ((void *)p_sa_mad->data); +} + +/* +* PARAMETERS +* p_sa_mad +* [in] Pointer to the SA MAD packet. +* +* RETURN VALUES +* Pointer to SA MAD payload area. +* +* NOTES +* +* SEE ALSO +* ib_mad_t +*********/ + +#define IB_NODE_INFO_PORT_NUM_MASK (CL_HTON32(0xFF000000)) +#define IB_NODE_INFO_VEND_ID_MASK (CL_HTON32(0x00FFFFFF)) +#if CPU_LE +#define IB_NODE_INFO_PORT_NUM_SHIFT 0 +#else +#define IB_NODE_INFO_PORT_NUM_SHIFT 24 +#endif + +/****f* IBA Base: Types/ib_node_info_get_local_port_num +* NAME +* ib_node_info_get_local_port_num +* +* DESCRIPTION +* Gets the local port number from the NodeInfo attribute. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_node_info_get_local_port_num(IN const ib_node_info_t * const p_ni) +{ + return ((uint8_t) ((p_ni->port_num_vendor_id & + IB_NODE_INFO_PORT_NUM_MASK) + >> IB_NODE_INFO_PORT_NUM_SHIFT)); +} + +/* +* PARAMETERS +* p_ni +* [in] Pointer to a NodeInfo attribute. +* +* RETURN VALUES +* Local port number that returned the attribute. +* +* NOTES +* +* SEE ALSO +* ib_node_info_t +*********/ + +/****f* IBA Base: Types/ib_node_info_get_vendor_id +* NAME +* ib_node_info_get_vendor_id +* +* DESCRIPTION +* Gets the VendorID from the NodeInfo attribute. +* +* SYNOPSIS +*/ +static inline ib_net32_t OSM_API +ib_node_info_get_vendor_id(IN const ib_node_info_t * const p_ni) +{ + return ((ib_net32_t) (p_ni->port_num_vendor_id & + IB_NODE_INFO_VEND_ID_MASK)); +} + +/* +* PARAMETERS +* p_ni +* [in] Pointer to a NodeInfo attribute. +* +* RETURN VALUES +* VendorID that returned the attribute. +* +* NOTES +* +* SEE ALSO +* ib_node_info_t +*********/ + +#define IB_NODE_DESCRIPTION_SIZE 64 + +typedef struct _ib_node_desc { + // Node String is an array of UTF-8 characters + // that describe the node in text format + // Note that this string is NOT NULL TERMINATED! + uint8_t description[IB_NODE_DESCRIPTION_SIZE]; +} ib_node_desc_t; + +#include +typedef struct _ib_node_record_t { + ib_net16_t lid; + ib_net16_t resv; + ib_node_info_t node_info; + ib_node_desc_t node_desc; + uint8_t pad[4]; +} PACK_SUFFIX ib_node_record_t; +#include + +/****s* IBA Base: Types/ib_port_info_t +* NAME +* ib_port_info_t +* +* DESCRIPTION +* IBA defined PortInfo. (14.2.5.6) +* +* SYNOPSIS +*/ +typedef struct _ib_port_info { + ib_net64_t m_key; + ib_net64_t subnet_prefix; + ib_net16_t base_lid; + ib_net16_t master_sm_base_lid; + ib_net32_t capability_mask; + ib_net16_t diag_code; + ib_net16_t m_key_lease_period; + uint8_t local_port_num; + uint8_t link_width_enabled; + uint8_t link_width_supported; + uint8_t link_width_active; + uint8_t state_info1; /* LinkSpeedSupported and PortState */ + uint8_t state_info2; /* PortPhysState and LinkDownDefaultState */ + uint8_t mkey_lmc; /* M_KeyProtectBits and LMC */ + uint8_t link_speed; /* LinkSpeedEnabled and LinkSpeedActive */ + uint8_t mtu_smsl; + uint8_t vl_cap; /* VLCap and InitType */ + uint8_t vl_high_limit; + uint8_t vl_arb_high_cap; + uint8_t vl_arb_low_cap; + uint8_t mtu_cap; + uint8_t vl_stall_life; + uint8_t vl_enforce; + ib_net16_t m_key_violations; + ib_net16_t p_key_violations; + ib_net16_t q_key_violations; + uint8_t guid_cap; + uint8_t subnet_timeout; /* cli_rereg(1b), mcast_pkey_trap_suppr(2b), timeout(5b) */ + uint8_t resp_time_value; /* reserv(3b), rtv(5b) */ + uint8_t error_threshold; /* local phy errors(4b), overrun errors(4b) */ + ib_net16_t max_credit_hint; + ib_net32_t link_rt_latency; /* reserv(8b), link round trip lat(24b) */ + ib_net16_t capability_mask2; + uint8_t link_speed_ext; /* LinkSpeedExtActive and LinkSpeedExtSupported */ + uint8_t link_speed_ext_enabled; /* reserv(3b), LinkSpeedExtEnabled(5b) */ +} ib_port_info_t; +/************/ + +#define IB_PORT_STATE_MASK 0x0F +#define IB_PORT_LMC_MASK 0x07 +#define IB_PORT_LMC_MAX 0x07 +#define IB_PORT_MPB_MASK 0xC0 +#define IB_PORT_MPB_SHIFT 6 +#define IB_PORT_LINK_SPEED_SHIFT 4 +#define IB_PORT_LINK_SPEED_SUPPORTED_MASK 0xF0 +#define IB_PORT_LINK_SPEED_ACTIVE_MASK 0xF0 +#define IB_PORT_LINK_SPEED_ENABLED_MASK 0x0F +#define IB_PORT_PHYS_STATE_MASK 0xF0 +#define IB_PORT_PHYS_STATE_SHIFT 4 +#define IB_PORT_PHYS_STATE_NO_CHANGE 0 +#define IB_PORT_PHYS_STATE_SLEEP 1 +#define IB_PORT_PHYS_STATE_POLLING 2 +#define IB_PORT_PHYS_STATE_DISABLED 3 +#define IB_PORT_PHYS_STATE_PORTCONFTRAIN 4 +#define IB_PORT_PHYS_STATE_LINKUP 5 +#define IB_PORT_PHYS_STATE_LINKERRRECOVER 6 +#define IB_PORT_PHYS_STATE_PHYTEST 7 +#define IB_PORT_LNKDWNDFTSTATE_MASK 0x0F + +#define IB_PORT_CAP_RESV0 (CL_HTON32(0x00000001)) +#define IB_PORT_CAP_IS_SM (CL_HTON32(0x00000002)) +#define IB_PORT_CAP_HAS_NOTICE (CL_HTON32(0x00000004)) +#define IB_PORT_CAP_HAS_TRAP (CL_HTON32(0x00000008)) +#define IB_PORT_CAP_HAS_IPD (CL_HTON32(0x00000010)) +#define IB_PORT_CAP_HAS_AUTO_MIG (CL_HTON32(0x00000020)) +#define IB_PORT_CAP_HAS_SL_MAP (CL_HTON32(0x00000040)) +#define IB_PORT_CAP_HAS_NV_MKEY (CL_HTON32(0x00000080)) +#define IB_PORT_CAP_HAS_NV_PKEY (CL_HTON32(0x00000100)) +#define IB_PORT_CAP_HAS_LED_INFO (CL_HTON32(0x00000200)) +#define IB_PORT_CAP_SM_DISAB (CL_HTON32(0x00000400)) +#define IB_PORT_CAP_HAS_SYS_IMG_GUID (CL_HTON32(0x00000800)) +#define IB_PORT_CAP_HAS_PKEY_SW_EXT_PORT_TRAP (CL_HTON32(0x00001000)) +#define IB_PORT_CAP_HAS_CABLE_INFO (CL_HTON32(0x00002000)) +#define IB_PORT_CAP_HAS_EXT_SPEEDS (CL_HTON32(0x00004000)) +#define IB_PORT_CAP_HAS_CAP_MASK2 (CL_HTON32(0x00008000)) +#define IB_PORT_CAP_HAS_COM_MGT (CL_HTON32(0x00010000)) +#define IB_PORT_CAP_HAS_SNMP (CL_HTON32(0x00020000)) +#define IB_PORT_CAP_REINIT (CL_HTON32(0x00040000)) +#define IB_PORT_CAP_HAS_DEV_MGT (CL_HTON32(0x00080000)) +#define IB_PORT_CAP_HAS_VEND_CLS (CL_HTON32(0x00100000)) +#define IB_PORT_CAP_HAS_DR_NTC (CL_HTON32(0x00200000)) +#define IB_PORT_CAP_HAS_CAP_NTC (CL_HTON32(0x00400000)) +#define IB_PORT_CAP_HAS_BM (CL_HTON32(0x00800000)) +#define IB_PORT_CAP_HAS_LINK_RT_LATENCY (CL_HTON32(0x01000000)) +#define IB_PORT_CAP_HAS_CLIENT_REREG (CL_HTON32(0x02000000)) +#define IB_PORT_CAP_HAS_OTHER_LOCAL_CHANGES_NTC (CL_HTON32(0x04000000)) +#define IB_PORT_CAP_HAS_LINK_SPEED_WIDTH_PAIRS_TBL (CL_HTON32(0x08000000)) +#define IB_PORT_CAP_HAS_VEND_MADS (CL_HTON32(0x10000000)) +#define IB_PORT_CAP_HAS_MCAST_PKEY_TRAP_SUPPRESS (CL_HTON32(0x20000000)) +#define IB_PORT_CAP_HAS_MCAST_FDB_TOP (CL_HTON32(0x40000000)) +#define IB_PORT_CAP_HAS_HIER_INFO (CL_HTON32(0x80000000)) + +#define IB_PORT_CAP2_IS_SET_NODE_DESC_SUPPORTED (CL_HTON16(0x0001)) +#define IB_PORT_CAP2_IS_PORT_INFO_EXT_SUPPORTED (CL_HTON16(0x0002)) +#define IB_PORT_CAP2_IS_VIRT_SUPPORTED (CL_HTON16(0x0004)) +#define IB_PORT_CAP2_IS_SWITCH_PORT_STATE_TBL_SUPP (CL_HTON16(0x0008)) +#define IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED (CL_HTON16(0x0010)) +#define IB_PORT_CAP2_IS_LINK_SPEED_HDR_SUPPORTED (CL_HTON16(0x0020)) + +/****s* IBA Base: Types/ib_port_info_ext_t +* NAME +* ib_port_info_ext_t +* +* DESCRIPTION +* IBA defined PortInfoExtended. (14.2.5.19) +* +* SYNOPSIS +*/ +typedef struct _ib_port_info_ext { + ib_net32_t cap_mask; + ib_net16_t fec_mode_active; + ib_net16_t fdr_fec_mode_sup; + ib_net16_t fdr_fec_mode_enable; + ib_net16_t edr_fec_mode_sup; + ib_net16_t edr_fec_mode_enable; + ib_net16_t hdr_fec_mode_sup; + ib_net16_t hdr_fec_mode_enable; + uint8_t reserved[46]; +} ib_port_info_ext_t; +/************/ + +#define IB_PORT_EXT_NO_FEC_MODE_ACTIVE 0 +#define IB_PORT_EXT_FIRE_CODE_FEC_MODE_ACTIVE (CL_HTON16(0x0001)) +#define IB_PORT_EXT_RS_FEC_MODE_ACTIVE (CL_HTON16(0x0002)) +#define IB_PORT_EXT_LOW_LATENCY_RS_FEC_MODE_ACTIVE (CL_HTON16(0x0003)) + +#define IB_PORT_EXT_CAP_IS_FEC_MODE_SUPPORTED (CL_HTON32(0x00000001)) +/****f* IBA Base: Types/ib_port_info_get_port_state +* NAME +* ib_port_info_get_port_state +* +* DESCRIPTION +* Returns the port state. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_port_state(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) (p_pi->state_info1 & IB_PORT_STATE_MASK)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Port state. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_port_state +* NAME +* ib_port_info_set_port_state +* +* DESCRIPTION +* Sets the port state. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_port_state(IN ib_port_info_t * const p_pi, + IN const uint8_t port_state) +{ + p_pi->state_info1 = (uint8_t) ((p_pi->state_info1 & 0xF0) | port_state); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* port_state +* [in] Port state value to set. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_vl_cap +* NAME +* ib_port_info_get_vl_cap +* +* DESCRIPTION +* Gets the VL Capability of a port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_vl_cap(IN const ib_port_info_t * const p_pi) +{ + return ((p_pi->vl_cap >> 4) & 0x0F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* VL_CAP field +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_init_type +* NAME +* ib_port_info_get_init_type +* +* DESCRIPTION +* Gets the init type of a port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_init_type(IN const ib_port_info_t * const p_pi) +{ + return (uint8_t) (p_pi->vl_cap & 0x0F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* InitType field +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_op_vls +* NAME +* ib_port_info_get_op_vls +* +* DESCRIPTION +* Gets the operational VLs on a port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_op_vls(IN const ib_port_info_t * const p_pi) +{ + return ((p_pi->vl_enforce >> 4) & 0x0F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* OP_VLS field +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_op_vls +* NAME +* ib_port_info_set_op_vls +* +* DESCRIPTION +* Sets the operational VLs on a port. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_op_vls(IN ib_port_info_t * const p_pi, IN const uint8_t op_vls) +{ + p_pi->vl_enforce = + (uint8_t) ((p_pi->vl_enforce & 0x0F) | (op_vls << 4)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* op_vls +* [in] Encoded operation VLs value. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_state_no_change +* NAME +* ib_port_info_set_state_no_change +* +* DESCRIPTION +* Sets the port state fields to the value for "no change". +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_state_no_change(IN ib_port_info_t * const p_pi) +{ + ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); + p_pi->state_info2 = 0; +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_link_speed_sup +* NAME +* ib_port_info_get_link_speed_sup +* +* DESCRIPTION +* Returns the encoded value for the link speed supported. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_link_speed_sup(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) ((p_pi->state_info1 & + IB_PORT_LINK_SPEED_SUPPORTED_MASK) >> + IB_PORT_LINK_SPEED_SHIFT)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the encoded value for the link speed supported. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_link_speed_sup +* NAME +* ib_port_info_set_link_speed_sup +* +* DESCRIPTION +* Given an integer of the supported link speed supported. +* Set the appropriate bits in state_info1 +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_link_speed_sup(IN uint8_t const speed, + IN ib_port_info_t * p_pi) +{ + p_pi->state_info1 = + (~IB_PORT_LINK_SPEED_SUPPORTED_MASK & p_pi->state_info1) | + (IB_PORT_LINK_SPEED_SUPPORTED_MASK & + (speed << IB_PORT_LINK_SPEED_SHIFT)); +} + +/* +* PARAMETERS +* speed +* [in] Supported Speeds Code. +* +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_port_phys_state +* NAME +* ib_port_info_get_port_phys_state +* +* DESCRIPTION +* Returns the encoded value for the port physical state. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_port_phys_state(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) ((p_pi->state_info2 & + IB_PORT_PHYS_STATE_MASK) >> + IB_PORT_PHYS_STATE_SHIFT)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the encoded value for the port physical state. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_port_phys_state +* NAME +* ib_port_info_set_port_phys_state +* +* DESCRIPTION +* Given an integer of the port physical state, +* Set the appropriate bits in state_info2 +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_port_phys_state(IN uint8_t const phys_state, + IN ib_port_info_t * p_pi) +{ + p_pi->state_info2 = + (~IB_PORT_PHYS_STATE_MASK & p_pi->state_info2) | + (IB_PORT_PHYS_STATE_MASK & + (phys_state << IB_PORT_PHYS_STATE_SHIFT)); +} + +/* +* PARAMETERS +* phys_state +* [in] port physical state. +* +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_link_down_def_state +* NAME +* ib_port_info_get_link_down_def_state +* +* DESCRIPTION +* Returns the link down default state. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_link_down_def_state(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) (p_pi->state_info2 & IB_PORT_LNKDWNDFTSTATE_MASK)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* link down default state of the port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_link_down_def_state +* NAME +* ib_port_info_set_link_down_def_state +* +* DESCRIPTION +* Sets the link down default state of the port. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_link_down_def_state(IN ib_port_info_t * const p_pi, + IN const uint8_t link_dwn_state) +{ + p_pi->state_info2 = + (uint8_t) ((p_pi->state_info2 & 0xF0) | link_dwn_state); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* link_dwn_state +* [in] Link down default state of the port. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_link_speed_active +* NAME +* ib_port_info_get_link_speed_active +* +* DESCRIPTION +* Returns the Link Speed Active value assigned to this port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_link_speed_active(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) ((p_pi->link_speed & + IB_PORT_LINK_SPEED_ACTIVE_MASK) >> + IB_PORT_LINK_SPEED_SHIFT)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the link speed active value assigned to this port. +* +* NOTES +* +* SEE ALSO +*********/ + +#define IB_LINK_WIDTH_ACTIVE_1X 1 +#define IB_LINK_WIDTH_ACTIVE_4X 2 +#define IB_LINK_WIDTH_ACTIVE_8X 4 +#define IB_LINK_WIDTH_ACTIVE_12X 8 +#define IB_LINK_WIDTH_ACTIVE_2X 16 +#define IB_LINK_WIDTH_1X_2X_4X_8X_OR_12X (IB_LINK_WIDTH_ACTIVE_1X | \ + IB_LINK_WIDTH_ACTIVE_2X | \ + IB_LINK_WIDTH_ACTIVE_4X | \ + IB_LINK_WIDTH_ACTIVE_8X | \ + IB_LINK_WIDTH_ACTIVE_12X) +#define IB_LINK_WIDTH_SET_LWS 255 +#define IB_LINK_SPEED_ACTIVE_EXTENDED 0 +#define IB_LINK_SPEED_ACTIVE_2_5 1 +#define IB_LINK_SPEED_ACTIVE_5 2 +#define IB_LINK_SPEED_ACTIVE_10 4 +#define IB_LINK_SPEED_2_5_5_OR_10 (IB_LINK_SPEED_ACTIVE_2_5 | \ + IB_LINK_SPEED_ACTIVE_5 | \ + IB_LINK_SPEED_ACTIVE_10) +#define IB_LINK_SPEED_SET_LSS 15 +#define IB_LINK_SPEED_EXT_ACTIVE_NONE 0 +#define IB_LINK_SPEED_EXT_ACTIVE_14 1 +#define IB_LINK_SPEED_EXT_ACTIVE_25 2 +#define IB_LINK_SPEED_EXT_ACTIVE_50 4 +#define IB_LINK_SPEED_EXT_14_25_OR_50 (IB_LINK_SPEED_EXT_ACTIVE_14 | \ + IB_LINK_SPEED_EXT_ACTIVE_25 | \ + IB_LINK_SPEED_EXT_ACTIVE_50) +#define IB_LINK_SPEED_EXT_DISABLE 30 +#define IB_LINK_SPEED_EXT_SET_LSES 31 + +/* following v1 ver1.3 p984 */ +#define IB_PATH_RECORD_RATE_2_5_GBS 2 +#define IB_PATH_RECORD_RATE_10_GBS 3 +#define IB_PATH_RECORD_RATE_30_GBS 4 +#define IB_PATH_RECORD_RATE_5_GBS 5 +#define IB_PATH_RECORD_RATE_20_GBS 6 +#define IB_PATH_RECORD_RATE_40_GBS 7 +#define IB_PATH_RECORD_RATE_60_GBS 8 +#define IB_PATH_RECORD_RATE_80_GBS 9 +#define IB_PATH_RECORD_RATE_120_GBS 10 +#define IB_PATH_RECORD_RATE_14_GBS 11 +#define IB_PATH_RECORD_RATE_56_GBS 12 +#define IB_PATH_RECORD_RATE_112_GBS 13 +#define IB_PATH_RECORD_RATE_168_GBS 14 +#define IB_PATH_RECORD_RATE_25_GBS 15 +#define IB_PATH_RECORD_RATE_100_GBS 16 +#define IB_PATH_RECORD_RATE_200_GBS 17 +#define IB_PATH_RECORD_RATE_300_GBS 18 +#define IB_PATH_RECORD_RATE_28_GBS 19 +#define IB_PATH_RECORD_RATE_50_GBS 20 +#define IB_PATH_RECORD_RATE_400_GBS 21 +#define IB_PATH_RECORD_RATE_600_GBS 22 + +#define IB_MIN_RATE IB_PATH_RECORD_RATE_2_5_GBS +#define IB_MAX_RATE IB_PATH_RECORD_RATE_600_GBS +#define IB_RATE_MAX IB_PATH_RECORD_RATE_600_GBS + +static inline uint8_t OSM_API +ib_port_info_get_link_speed_ext_active(IN const ib_port_info_t * const p_pi); + +/****f* IBA Base: Types/ib_port_info_compute_rate +* NAME +* ib_port_info_compute_rate +* +* DESCRIPTION +* Returns the encoded value for the path rate. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_compute_rate(IN const ib_port_info_t * const p_pi, + IN const int extended) +{ + uint8_t rate = 0; + + if (extended) { + switch (ib_port_info_get_link_speed_ext_active(p_pi)) { + case IB_LINK_SPEED_EXT_ACTIVE_14: + switch (p_pi->link_width_active) { + case IB_LINK_WIDTH_ACTIVE_1X: + rate = IB_PATH_RECORD_RATE_14_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_4X: + rate = IB_PATH_RECORD_RATE_56_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_8X: + rate = IB_PATH_RECORD_RATE_112_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_12X: + rate = IB_PATH_RECORD_RATE_168_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_2X: + rate = IB_PATH_RECORD_RATE_28_GBS; + break; + + default: + rate = IB_PATH_RECORD_RATE_14_GBS; + break; + } + break; + case IB_LINK_SPEED_EXT_ACTIVE_25: + switch (p_pi->link_width_active) { + case IB_LINK_WIDTH_ACTIVE_1X: + rate = IB_PATH_RECORD_RATE_25_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_4X: + rate = IB_PATH_RECORD_RATE_100_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_8X: + rate = IB_PATH_RECORD_RATE_200_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_12X: + rate = IB_PATH_RECORD_RATE_300_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_2X: + rate = IB_PATH_RECORD_RATE_50_GBS; + break; + + default: + rate = IB_PATH_RECORD_RATE_25_GBS; + break; + } + break; + case IB_LINK_SPEED_EXT_ACTIVE_50: + switch (p_pi->link_width_active) { + case IB_LINK_WIDTH_ACTIVE_1X: + rate = IB_PATH_RECORD_RATE_50_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_4X: + rate = IB_PATH_RECORD_RATE_200_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_8X: + rate = IB_PATH_RECORD_RATE_400_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_12X: + rate = IB_PATH_RECORD_RATE_600_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_2X: + rate = IB_PATH_RECORD_RATE_100_GBS; + break; + + default: + rate = IB_PATH_RECORD_RATE_50_GBS; + break; + } + break; + /* IB_LINK_SPEED_EXT_ACTIVE_NONE and any others */ + default: + break; + } + if (rate) + return rate; + } + + switch (ib_port_info_get_link_speed_active(p_pi)) { + case IB_LINK_SPEED_ACTIVE_2_5: + switch (p_pi->link_width_active) { + case IB_LINK_WIDTH_ACTIVE_1X: + rate = IB_PATH_RECORD_RATE_2_5_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_4X: + rate = IB_PATH_RECORD_RATE_10_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_8X: + rate = IB_PATH_RECORD_RATE_20_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_12X: + rate = IB_PATH_RECORD_RATE_30_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_2X: + rate = IB_PATH_RECORD_RATE_5_GBS; + break; + + default: + rate = IB_PATH_RECORD_RATE_2_5_GBS; + break; + } + break; + case IB_LINK_SPEED_ACTIVE_5: + switch (p_pi->link_width_active) { + case IB_LINK_WIDTH_ACTIVE_1X: + rate = IB_PATH_RECORD_RATE_5_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_4X: + rate = IB_PATH_RECORD_RATE_20_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_8X: + rate = IB_PATH_RECORD_RATE_40_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_12X: + rate = IB_PATH_RECORD_RATE_60_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_2X: + rate = IB_PATH_RECORD_RATE_10_GBS; + break; + + default: + rate = IB_PATH_RECORD_RATE_5_GBS; + break; + } + break; + case IB_LINK_SPEED_ACTIVE_10: + switch (p_pi->link_width_active) { + case IB_LINK_WIDTH_ACTIVE_1X: + rate = IB_PATH_RECORD_RATE_10_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_4X: + rate = IB_PATH_RECORD_RATE_40_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_8X: + rate = IB_PATH_RECORD_RATE_80_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_12X: + rate = IB_PATH_RECORD_RATE_120_GBS; + break; + + case IB_LINK_WIDTH_ACTIVE_2X: + rate = IB_PATH_RECORD_RATE_20_GBS; + break; + + default: + rate = IB_PATH_RECORD_RATE_10_GBS; + break; + } + break; + default: + rate = IB_PATH_RECORD_RATE_2_5_GBS; + break; + } + + return rate; +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* extended +* [in] Indicates whether or not to use extended link speeds. +* +* RETURN VALUES +* Returns the encoded value for the link speed supported. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_path_get_ipd +* NAME +* ib_path_get_ipd +* +* DESCRIPTION +* Returns the encoded value for the inter packet delay. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_path_get_ipd(IN uint8_t local_link_width_supported, IN uint8_t path_rec_rate) +{ + uint8_t ipd = 0; + + switch (local_link_width_supported) { + /* link_width_supported = 1: 1x */ + case 1: + break; + + /* link_width_supported = 3: 1x or 4x */ + case 3: + switch (path_rec_rate & 0x3F) { + case IB_PATH_RECORD_RATE_2_5_GBS: + ipd = 3; + break; + default: + break; + } + break; + + /* link_width_supported = 11: 1x or 4x or 12x */ + case 11: + switch (path_rec_rate & 0x3F) { + case IB_PATH_RECORD_RATE_2_5_GBS: + ipd = 11; + break; + case IB_PATH_RECORD_RATE_10_GBS: + ipd = 2; + break; + default: + break; + } + break; + + default: + break; + } + + return ipd; +} + +/* +* PARAMETERS +* local_link_width_supported +* [in] link with supported for this port +* +* path_rec_rate +* [in] rate field of the path record +* +* RETURN VALUES +* Returns the ipd +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_mtu_cap +* NAME +* ib_port_info_get_mtu_cap +* +* DESCRIPTION +* Returns the encoded value for the maximum MTU supported by this port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_mtu_cap(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) (p_pi->mtu_cap & 0x0F)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the encooded value for the maximum MTU supported by this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_neighbor_mtu +* NAME +* ib_port_info_get_neighbor_mtu +* +* DESCRIPTION +* Returns the encoded value for the neighbor MTU supported by this port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_neighbor_mtu(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) ((p_pi->mtu_smsl & 0xF0) >> 4)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the encoded value for the neighbor MTU at this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_neighbor_mtu +* NAME +* ib_port_info_set_neighbor_mtu +* +* DESCRIPTION +* Sets the Neighbor MTU value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_neighbor_mtu(IN ib_port_info_t * const p_pi, + IN const uint8_t mtu) +{ + CL_ASSERT(mtu <= 5); + CL_ASSERT(mtu != 0); + p_pi->mtu_smsl = (uint8_t) ((p_pi->mtu_smsl & 0x0F) | (mtu << 4)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* mtu +* [in] Encoded MTU value to set +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_master_smsl +* NAME +* ib_port_info_get_master_smsl +* +* DESCRIPTION +* Returns the encoded value for the Master SMSL at this port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_master_smsl(IN const ib_port_info_t * const p_pi) +{ + return (uint8_t) (p_pi->mtu_smsl & 0x0F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the encoded value for the Master SMSL at this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_master_smsl +* NAME +* ib_port_info_set_master_smsl +* +* DESCRIPTION +* Sets the Master SMSL value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_master_smsl(IN ib_port_info_t * const p_pi, + IN const uint8_t smsl) +{ + p_pi->mtu_smsl = (uint8_t) ((p_pi->mtu_smsl & 0xF0) | smsl); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* mtu +* [in] Encoded Master SMSL value to set +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_timeout +* NAME +* ib_port_info_set_timeout +* +* DESCRIPTION +* Sets the encoded subnet timeout value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_timeout(IN ib_port_info_t * const p_pi, + IN const uint8_t timeout) +{ + CL_ASSERT(timeout <= 0x1F); + p_pi->subnet_timeout = + (uint8_t) ((p_pi->subnet_timeout & 0xE0) | (timeout & 0x1F)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* timeout +* [in] Encoded timeout value to set +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_client_rereg +* NAME +* ib_port_info_set_client_rereg +* +* DESCRIPTION +* Sets the encoded client reregistration bit value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_client_rereg(IN ib_port_info_t * const p_pi, + IN const uint8_t client_rereg) +{ + CL_ASSERT(client_rereg <= 0x1); + p_pi->subnet_timeout = + (uint8_t) ((p_pi->subnet_timeout & 0x7F) | (client_rereg << 7)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* client_rereg +* [in] Client reregistration value to set (either 1 or 0). +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_mcast_pkey_trap_suppress +* NAME +* ib_port_info_set_mcast_pkey_trap_suppress +* +* DESCRIPTION +* Sets the encoded multicast pkey trap suppression enabled bits value +* in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_mcast_pkey_trap_suppress(IN ib_port_info_t * const p_pi, + IN const uint8_t trap_suppress) +{ + CL_ASSERT(trap_suppress <= 0x1); + p_pi->subnet_timeout = + (uint8_t) ((p_pi->subnet_timeout & 0x9F) | (trap_suppress << 5)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* trap_suppress +* [in] Multicast pkey trap suppression enabled value to set +* (either 1 or 0). +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_timeout +* NAME +* ib_port_info_get_timeout +* +* DESCRIPTION +* Gets the encoded subnet timeout value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_timeout(IN ib_port_info_t const *p_pi) +{ + return (p_pi->subnet_timeout & 0x1F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* The encoded timeout value +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_link_speed_ext_active +* NAME +* ib_port_info_get_link_speed_ext_active +* +* DESCRIPTION +* Gets the encoded LinkSpeedExtActive value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_link_speed_ext_active(IN const ib_port_info_t * const p_pi) +{ + return ((p_pi->link_speed_ext & 0xF0) >> 4); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* The encoded LinkSpeedExtActive value +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_link_speed_ext_sup +* NAME +* ib_port_info_get_link_speed_ext_sup +* +* DESCRIPTION +* Returns the encoded value for the link speed extended supported. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_link_speed_ext_sup(IN const ib_port_info_t * const p_pi) +{ + return (p_pi->link_speed_ext & 0x0F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* The encoded LinkSpeedExtSupported value +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_link_speed_ext_enabled +* NAME +* ib_port_info_get_link_speed_ext_enabled +* +* DESCRIPTION +* Gets the encoded LinkSpeedExtEnabled value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_link_speed_ext_enabled(IN const ib_port_info_t * const p_pi) +{ + return (p_pi->link_speed_ext_enabled & 0x1F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* The encoded LinkSpeedExtEnabled value +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_link_speed_ext_enabled +* NAME +* ib_port_info_set_link_speed_ext_enabled +* +* DESCRIPTION +* Sets the link speed extended enabled value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_link_speed_ext_enabled(IN ib_port_info_t * const p_pi, + IN const uint8_t link_speed_ext_enabled) +{ + CL_ASSERT(link_speed_ext_enabled <= 0x1F); + p_pi->link_speed_ext_enabled = link_speed_ext_enabled & 0x1F; +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* link_speed_ext_enabled +* [in] link speed extehded enabled value to set. +* +* RETURN VALUES +* The encoded LinkSpeedExtEnabled value +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_resp_time_value +* NAME +* ib_port_info_get_resp_time_value +* +* DESCRIPTION +* Gets the encoded resp time value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_resp_time_value(IN const ib_port_info_t * const p_pi) +{ + return (p_pi->resp_time_value & 0x1F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* The encoded resp time value +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_client_rereg +* NAME +* ib_port_info_get_client_rereg +* +* DESCRIPTION +* Gets the encoded client reregistration bit value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_client_rereg(IN ib_port_info_t const *p_pi) +{ + return ((p_pi->subnet_timeout & 0x80) >> 7); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Client reregistration value (either 1 or 0). +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_mcast_pkey_trap_suppress +* NAME +* ib_port_info_get_mcast_pkey_trap_suppress +* +* DESCRIPTION +* Gets the encoded multicast pkey trap suppression enabled bits value +* in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_mcast_pkey_trap_suppress(IN ib_port_info_t const *p_pi) +{ + return ((p_pi->subnet_timeout & 0x60) >> 5); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Multicast PKey trap suppression enabled value (either 1 or 0). +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_hoq_lifetime +* NAME +* ib_port_info_set_hoq_lifetime +* +* DESCRIPTION +* Sets the Head of Queue Lifetime for which a packet can live in the head +* of VL queue +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_hoq_lifetime(IN ib_port_info_t * const p_pi, + IN const uint8_t hoq_life) +{ + p_pi->vl_stall_life = (uint8_t) ((hoq_life & 0x1f) | + (p_pi->vl_stall_life & 0xe0)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* hoq_life +* [in] Encoded lifetime value to set +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_hoq_lifetime +* NAME +* ib_port_info_get_hoq_lifetime +* +* DESCRIPTION +* Gets the Head of Queue Lifetime for which a packet can live in the head +* of VL queue +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_hoq_lifetime(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) (p_pi->vl_stall_life & 0x1f)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Encoded lifetime value +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_vl_stall_count +* NAME +* ib_port_info_set_vl_stall_count +* +* DESCRIPTION +* Sets the VL Stall Count which define the number of contiguous +* HLL (hoq) drops that will put the VL into stalled mode. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_vl_stall_count(IN ib_port_info_t * const p_pi, + IN const uint8_t vl_stall_count) +{ + p_pi->vl_stall_life = (uint8_t) ((p_pi->vl_stall_life & 0x1f) | + ((vl_stall_count << 5) & 0xe0)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* vl_stall_count +* [in] value to set +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_vl_stall_count +* NAME +* ib_port_info_get_vl_stall_count +* +* DESCRIPTION +* Gets the VL Stall Count which define the number of contiguous +* HLL (hoq) drops that will put the VL into stalled mode +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_vl_stall_count(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) (p_pi->vl_stall_life & 0xe0) >> 5); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* vl stall count +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_lmc +* NAME +* ib_port_info_get_lmc +* +* DESCRIPTION +* Returns the LMC value assigned to this port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_lmc(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) (p_pi->mkey_lmc & IB_PORT_LMC_MASK)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the LMC value assigned to this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_lmc +* NAME +* ib_port_info_set_lmc +* +* DESCRIPTION +* Sets the LMC value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_lmc(IN ib_port_info_t * const p_pi, IN const uint8_t lmc) +{ + CL_ASSERT(lmc <= IB_PORT_LMC_MAX); + p_pi->mkey_lmc = (uint8_t) ((p_pi->mkey_lmc & 0xF8) | lmc); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* lmc +* [in] LMC value to set, must be less than 7. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_link_speed_enabled +* NAME +* ib_port_info_get_link_speed_enabled +* +* DESCRIPTION +* Returns the link speed enabled value assigned to this port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_link_speed_enabled(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) (p_pi->link_speed & IB_PORT_LINK_SPEED_ENABLED_MASK)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Port state. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_link_speed_enabled +* NAME +* ib_port_info_set_link_speed_enabled +* +* DESCRIPTION +* Sets the link speed enabled value in the PortInfo attribute. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_link_speed_enabled(IN ib_port_info_t * const p_pi, + IN const uint8_t link_speed_enabled) +{ + p_pi->link_speed = + (uint8_t) ((p_pi->link_speed & 0xF0) | link_speed_enabled); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* link_speed_enabled +* [in] link speed enabled value to set. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_mpb +* NAME +* ib_port_info_get_mpb +* +* DESCRIPTION +* Returns the M_Key protect bits assigned to this port. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_mpb(IN const ib_port_info_t * const p_pi) +{ + return ((uint8_t) ((p_pi->mkey_lmc & IB_PORT_MPB_MASK) >> + IB_PORT_MPB_SHIFT)); +} + +/* +* PARAMETERS +* p_ni +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the M_Key protect bits assigned to this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_mpb +* NAME +* ib_port_info_set_mpb +* +* DESCRIPTION +* Set the M_Key protect bits of this port. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_mpb(IN ib_port_info_t * p_pi, IN uint8_t mpb) +{ + p_pi->mkey_lmc = + (~IB_PORT_MPB_MASK & p_pi->mkey_lmc) | + (IB_PORT_MPB_MASK & (mpb << IB_PORT_MPB_SHIFT)); +} + +/* +* PARAMETERS +* mpb +* [in] M_Key protect bits +* p_ni +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_local_phy_err_thd +* NAME +* ib_port_info_get_local_phy_err_thd +* +* DESCRIPTION +* Returns the Phy Link Threshold +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_local_phy_err_thd(IN const ib_port_info_t * const p_pi) +{ + return (uint8_t) ((p_pi->error_threshold & 0xF0) >> 4); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the Phy Link error threshold assigned to this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_overrun_err_thd +* NAME +* ib_port_info_get_local_overrun_err_thd +* +* DESCRIPTION +* Returns the Credits Overrun Errors Threshold +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_port_info_get_overrun_err_thd(IN const ib_port_info_t * const p_pi) +{ + return (uint8_t) (p_pi->error_threshold & 0x0F); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* Returns the Credits Overrun errors threshold assigned to this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_phy_and_overrun_err_thd +* NAME +* ib_port_info_set_phy_and_overrun_err_thd +* +* DESCRIPTION +* Sets the Phy Link and Credits Overrun Errors Threshold +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_phy_and_overrun_err_thd(IN ib_port_info_t * const p_pi, + IN uint8_t phy_threshold, + IN uint8_t overrun_threshold) +{ + p_pi->error_threshold = + (uint8_t) (((phy_threshold & 0x0F) << 4) | + (overrun_threshold & 0x0F)); +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* phy_threshold +* [in] Physical Link Errors Threshold above which Trap 129 is generated +* +* overrun_threshold +* [in] Credits overrun Errors Threshold above which Trap 129 is generated +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_get_m_key +* NAME +* ib_port_info_get_m_key +* +* DESCRIPTION +* Gets the M_Key +* +* SYNOPSIS +*/ +static inline ib_net64_t OSM_API +ib_port_info_get_m_key(IN const ib_port_info_t * const p_pi) +{ + return p_pi->m_key; +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* +* RETURN VALUES +* M_Key. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_port_info_set_m_key +* NAME +* ib_port_info_set_m_key +* +* DESCRIPTION +* Sets the M_Key value +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_port_info_set_m_key(IN ib_port_info_t * const p_pi, IN ib_net64_t m_key) +{ + p_pi->m_key = m_key; +} + +/* +* PARAMETERS +* p_pi +* [in] Pointer to a PortInfo attribute. +* m_key +* [in] M_Key value. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +#define FDR10 0x01 + +/****f* IBA Base: Types/ib_get_highest_link_speed +* NAME +* ib_get_highest_link_speed +* +* DESCRIPTION +* Returns the highest link speed encoded in the given bit field. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API ib_get_highest_link_speed(IN const uint8_t speeds) +{ + uint8_t ret = 0; + uint8_t extspeeds = (speeds >> 4); + + if (extspeeds & IB_LINK_SPEED_EXT_ACTIVE_50) + ret = IB_LINK_SPEED_EXT_ACTIVE_50 << 4; + else if (extspeeds & IB_LINK_SPEED_EXT_ACTIVE_25) + ret = IB_LINK_SPEED_EXT_ACTIVE_25 << 4; + else if (extspeeds & IB_LINK_SPEED_EXT_ACTIVE_14) + ret = IB_LINK_SPEED_EXT_ACTIVE_14 << 4; + else if (speeds & (FDR10 << 3)) + ret = FDR10 << 3; + else if (speeds & IB_LINK_SPEED_ACTIVE_10) + ret = IB_LINK_SPEED_ACTIVE_10; + else if (speeds & IB_LINK_SPEED_ACTIVE_5) + ret = IB_LINK_SPEED_ACTIVE_5; + else if (speeds & IB_LINK_SPEED_ACTIVE_2_5) + ret = IB_LINK_SPEED_ACTIVE_2_5; + + return ret; +} + +/* +* PARAMETERS +* speed +* [in] The bit field for the supported or enabled link speeds, +* where the 3 LSBs of `speeds' encode the last 3 bits of +* LinkSpeedSupported or LinkSpeedEnabled, the 4. bit encodes +* supported/enabled FDR10 and the first 4 bits of `speeds' +* encode either all bits of LinkSpeedExtSupported or the last +* 4 bits of the 5-bit long LinkSpeedExtEnabled component. +* +* RETURN VALUES +* Returns the highest link speed encoded in the given bit field, e.g. +* a return value of 2 indicates DDR, 8 indicates FDR10, and a return value +* of 32 (or 00100000 in bit) would indicate EDR speed. +* +* NOTES +* This fn most likely will only support up to NDR speeds, because the +* LinkSpeedExtActive component is defined as 4 bits wide (and HDR already +* occupies the 3rd bit). +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_get_highest_link_width +* NAME +* ib_get_highest_link_width +* +* DESCRIPTION +* Returns the highest link width encoded in the given bit field. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API ib_get_highest_link_width(IN const uint8_t widths) +{ + uint8_t ret = 0; + + if (widths & IB_LINK_WIDTH_ACTIVE_12X) + ret = IB_LINK_WIDTH_ACTIVE_12X; + else if (widths & IB_LINK_WIDTH_ACTIVE_8X) + ret = IB_LINK_WIDTH_ACTIVE_8X; + else if (widths & IB_LINK_WIDTH_ACTIVE_4X) + ret = IB_LINK_WIDTH_ACTIVE_4X; + else if (widths & IB_LINK_WIDTH_ACTIVE_2X) + ret = IB_LINK_WIDTH_ACTIVE_2X; + else if (widths & IB_LINK_WIDTH_ACTIVE_1X) + ret = IB_LINK_WIDTH_ACTIVE_1X; + + return ret; +} + +/* +* PARAMETERS +* widths +* [in] The bit field for the supported or enabled link widths. +* +* RETURN VALUES +* Returns the highest link width encoded in the given bit field. +* +* NOTES +* +* SEE ALSO +*********/ + + +/****s* IBA Base: Types/ib_mlnx_ext_port_info_t +* NAME +* ib_mlnx_ext_port_info_t +* +* DESCRIPTION +* Mellanox ExtendedPortInfo (Vendor specific SM class attribute). +* +* SYNOPSIS +*/ +typedef struct _ib_mlnx_ext_port_info { + uint8_t resvd1[3]; + uint8_t state_change_enable; + uint8_t resvd2[3]; + uint8_t link_speed_supported; + uint8_t resvd3[3]; + uint8_t link_speed_enabled; + uint8_t resvd4[3]; + uint8_t link_speed_active; + uint8_t resvd5[48]; +} ib_mlnx_ext_port_info_t; +/************/ + +typedef uint8_t ib_svc_name_t[64]; + +typedef struct _ib_service_record { + ib_net64_t service_id; + ib_gid_t service_gid; + ib_net16_t service_pkey; + ib_net16_t resv; + ib_net32_t service_lease; + uint8_t service_key[16]; + ib_svc_name_t service_name; + uint8_t service_data8[16]; + ib_net16_t service_data16[8]; + ib_net32_t service_data32[4]; + ib_net64_t service_data64[2]; +} ib_service_record_t; + +#include +typedef struct _ib_portinfo_record { + ib_net16_t lid; + uint8_t port_num; + uint8_t options; + ib_port_info_t port_info; + uint8_t pad[4]; +} PACK_SUFFIX ib_portinfo_record_t; +#include + +typedef struct _ib_portinfoext_record { + ib_net16_t lid; + uint8_t port_num; + uint8_t options; + ib_port_info_ext_t port_info_ext; +} ib_portinfoext_record_t; + +typedef struct _ib_link_record { + ib_net16_t from_lid; + uint8_t from_port_num; + uint8_t to_port_num; + ib_net16_t to_lid; + uint8_t pad[2]; +} ib_link_record_t; + +#include +typedef struct _ib_sminfo_record { + ib_net16_t lid; + uint16_t resv0; + ib_sm_info_t sm_info; + uint8_t pad[7]; +} PACK_SUFFIX ib_sminfo_record_t; +#include + +/****s* IBA Base: Types/ib_lft_record_t +* NAME +* ib_lft_record_t +* +* DESCRIPTION +* IBA defined LinearForwardingTableRecord (15.2.5.6) +* +* SYNOPSIS +*/ +typedef struct _ib_lft_record { + ib_net16_t lid; + ib_net16_t block_num; + uint32_t resv0; + uint8_t lft[64]; +} ib_lft_record_t; +/************/ + +/****s* IBA Base: Types/ib_mft_record_t +* NAME +* ib_mft_record_t +* +* DESCRIPTION +* IBA defined MulticastForwardingTableRecord (15.2.5.8) +* +* SYNOPSIS +*/ +typedef struct _ib_mft_record { + ib_net16_t lid; + ib_net16_t position_block_num; + uint32_t resv0; + ib_net16_t mft[IB_MCAST_BLOCK_SIZE]; +} ib_mft_record_t; +/************/ + +/****s* IBA Base: Types/ib_switch_info_t +* NAME +* ib_switch_info_t +* +* DESCRIPTION +* IBA defined SwitchInfo. (14.2.5.4) +* +* SYNOPSIS +*/ +typedef struct _ib_switch_info { + ib_net16_t lin_cap; + ib_net16_t rand_cap; + ib_net16_t mcast_cap; + ib_net16_t lin_top; + uint8_t def_port; + uint8_t def_mcast_pri_port; + uint8_t def_mcast_not_port; + uint8_t life_state; + ib_net16_t lids_per_port; + ib_net16_t enforce_cap; + uint8_t flags; + uint8_t resvd; + ib_net16_t mcast_top; +} ib_switch_info_t; +/************/ + +typedef struct _ib_switch_info_record { + ib_net16_t lid; + uint16_t resv0; + ib_switch_info_t switch_info; +} ib_switch_info_record_t; + +#define IB_SWITCH_PSC 0x04 + +/****f* IBA Base: Types/ib_switch_info_get_state_change +* NAME +* ib_switch_info_get_state_change +* +* DESCRIPTION +* Returns the value of the state change flag. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_switch_info_get_state_change(IN const ib_switch_info_t * const p_si) +{ + return ((p_si->life_state & IB_SWITCH_PSC) == IB_SWITCH_PSC); +} + +/* +* PARAMETERS +* p_si +* [in] Pointer to a SwitchInfo attribute. +* +* RETURN VALUES +* Returns the value of the state change flag. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_switch_info_clear_state_change +* NAME +* ib_switch_info_clear_state_change +* +* DESCRIPTION +* Clears the switch's state change bit. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_switch_info_clear_state_change(IN ib_switch_info_t * const p_si) +{ + p_si->life_state = (uint8_t) (p_si->life_state & 0xFB); +} + +/* +* PARAMETERS +* p_si +* [in] Pointer to a SwitchInfo attribute. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_switch_info_state_change_set +* NAME +* ib_switch_info_state_change_set +* +* DESCRIPTION +* Clears the switch's state change bit. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_switch_info_state_change_set(IN ib_switch_info_t * const p_si) +{ + p_si->life_state = (uint8_t) ((p_si->life_state & ~IB_SWITCH_PSC) | IB_SWITCH_PSC); +} + +/* +* PARAMETERS +* p_si +* [in] Pointer to a SwitchInfo attribute. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_switch_info_get_opt_sl2vlmapping +* NAME +* ib_switch_info_get_state_opt_sl2vlmapping +* +* DESCRIPTION +* Returns the value of the optimized SLtoVLMapping programming flag. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_switch_info_get_opt_sl2vlmapping(IN const ib_switch_info_t * const p_si) +{ + return ((p_si->life_state & 0x01) == 0x01); +} + +/* +* PARAMETERS +* p_si +* [in] Pointer to a SwitchInfo attribute. +* +* RETURN VALUES +* Returns the value of the optimized SLtoVLMapping programming flag. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_switch_info_set_life_time +* NAME +* ib_switch_info_set_life_time +* +* DESCRIPTION +* Sets the value of LifeTimeValue. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_switch_info_set_life_time(IN ib_switch_info_t * const p_si, + IN const uint8_t life_time_val) +{ + p_si->life_state = (p_si->life_state & 0x1f) | + (life_time_val << 3); +} + +/* +* PARAMETERS +* p_si +* [in] Pointer to a SwitchInfo attribute. +* life_time_val +* [in] LiveTimeValue. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_switch_info_is_enhanced_port0 +* NAME +* ib_switch_info_is_enhanced_port0 +* +* DESCRIPTION +* Returns TRUE if the enhancedPort0 bit is on (meaning the switch +* port zero supports enhanced functions). +* Returns FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_switch_info_is_enhanced_port0(IN const ib_switch_info_t * const p_si) +{ + return ((p_si->flags & 0x08) == 0x08); +} + +/* +* PARAMETERS +* p_si +* [in] Pointer to a SwitchInfo attribute. +* +* RETURN VALUES +* Returns TRUE if the switch supports enhanced port 0. FALSE otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****s* IBA Base: Types/ib_guid_info_t +* NAME +* ib_guid_info_t +* +* DESCRIPTION +* IBA defined GuidInfo. (14.2.5.5) +* +* SYNOPSIS +*/ +#define GUID_TABLE_MAX_ENTRIES 8 + +typedef struct _ib_guid_info { + ib_net64_t guid[GUID_TABLE_MAX_ENTRIES]; +} ib_guid_info_t; +/************/ + +typedef struct _ib_guidinfo_record { + ib_net16_t lid; + uint8_t block_num; + uint8_t resv; + uint32_t reserved; + ib_guid_info_t guid_info; +} ib_guidinfo_record_t; + +#define IB_MULTIPATH_MAX_GIDS 11 /* Support max that can fit into first MAD (for now) */ + +typedef struct _ib_multipath_rec_t { + ib_net32_t hop_flow_raw; + uint8_t tclass; + uint8_t num_path; + ib_net16_t pkey; + ib_net16_t qos_class_sl; + uint8_t mtu; + uint8_t rate; + uint8_t pkt_life; + uint8_t service_id_8msb; + uint8_t independence; /* formerly resv2 */ + uint8_t sgid_count; + uint8_t dgid_count; + uint8_t service_id_56lsb[7]; + ib_gid_t gids[IB_MULTIPATH_MAX_GIDS]; +} ib_multipath_rec_t; +/* +* FIELDS +* hop_flow_raw +* Global routing parameters: hop count, flow label and raw bit. +* +* tclass +* Another global routing parameter. +* +* num_path +* Reversible path - 1 bit to say if path is reversible. +* num_path [6:0] In queries, maximum number of paths to return. +* In responses, undefined. +* +* pkey +* Partition key (P_Key) to use on this path. +* +* qos_class_sl +* QoS class and service level to use on this path. +* +* mtu +* MTU and MTU selector fields to use on this path +* rate +* Rate and rate selector fields to use on this path. +* +* pkt_life +* Packet lifetime +* +* service_id_8msb +* 8 most significant bits of Service ID +* +* service_id_56lsb +* 56 least significant bits of Service ID +* +* preference +* Indicates the relative merit of this path versus other path +* records returned from the SA. Lower numbers are better. +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_num_path +* NAME +* ib_multipath_rec_num_path +* +* DESCRIPTION +* Get max number of paths to return. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_num_path(IN const ib_multipath_rec_t * const p_rec) +{ + return (p_rec->num_path & 0x7F); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Maximum number of paths to return for each unique SGID_DGID combination. +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_set_sl +* NAME +* ib_multipath_rec_set_sl +* +* DESCRIPTION +* Set path service level. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_multipath_rec_set_sl( + IN ib_multipath_rec_t* const p_rec, + IN const uint8_t sl ) +{ + p_rec->qos_class_sl = + (p_rec->qos_class_sl & CL_HTON16(IB_MULTIPATH_REC_QOS_CLASS_MASK)) | + cl_hton16(sl & IB_MULTIPATH_REC_SL_MASK); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the MultiPath record object. +* +* sl +* [in] Service level to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_sl +* NAME +* ib_multipath_rec_sl +* +* DESCRIPTION +* Get multipath service level. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_sl(IN const ib_multipath_rec_t * const p_rec) +{ + return ((uint8_t) ((cl_ntoh16(p_rec->qos_class_sl)) & IB_MULTIPATH_REC_SL_MASK)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* SL. +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_set_qos_class +* NAME +* ib_multipath_rec_set_qos_class +* +* DESCRIPTION +* Set path QoS class. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_multipath_rec_set_qos_class( + IN ib_multipath_rec_t* const p_rec, + IN const uint16_t qos_class ) +{ + p_rec->qos_class_sl = + (p_rec->qos_class_sl & CL_HTON16(IB_MULTIPATH_REC_SL_MASK)) | + cl_hton16(qos_class << 4); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the MultiPath record object. +* +* qos_class +* [in] QoS class to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_qos_class +* NAME +* ib_multipath_rec_qos_class +* +* DESCRIPTION +* Get QoS class. +* +* SYNOPSIS +*/ +static inline uint16_t OSM_API +ib_multipath_rec_qos_class( + IN const ib_multipath_rec_t* const p_rec ) +{ + return (cl_ntoh16( p_rec->qos_class_sl ) >> 4); +} +/* +* PARAMETERS +* p_rec +* [in] Pointer to the MultiPath record object. +* +* RETURN VALUES +* QoS class of the MultiPath record. +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_mtu +* NAME +* ib_multipath_rec_mtu +* +* DESCRIPTION +* Get encoded path MTU. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_mtu(IN const ib_multipath_rec_t * const p_rec) +{ + return ((uint8_t) (p_rec->mtu & IB_MULTIPATH_REC_BASE_MASK)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Encoded path MTU. +* 1: 256 +* 2: 512 +* 3: 1024 +* 4: 2048 +* 5: 4096 +* others: reserved +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_mtu_sel +* NAME +* ib_multipath_rec_mtu_sel +* +* DESCRIPTION +* Get encoded multipath MTU selector. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_mtu_sel(IN const ib_multipath_rec_t * const p_rec) +{ + return ((uint8_t) ((p_rec->mtu & IB_MULTIPATH_REC_SELECTOR_MASK) >> 6)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Encoded path MTU selector value (for queries). +* 0: greater than MTU specified +* 1: less than MTU specified +* 2: exactly the MTU specified +* 3: largest MTU available +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_rate +* NAME +* ib_multipath_rec_rate +* +* DESCRIPTION +* Get encoded multipath rate. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_rate(IN const ib_multipath_rec_t * const p_rec) +{ + return ((uint8_t) (p_rec->rate & IB_MULTIPATH_REC_BASE_MASK)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Encoded multipath rate. +* 2: 2.5 Gb/sec. +* 3: 10 Gb/sec. +* 4: 30 Gb/sec. +* 5: 5 Gb/sec. +* 6: 20 Gb/sec. +* 7: 40 Gb/sec. +* 8: 60 Gb/sec. +* 9: 80 Gb/sec. +* 10: 120 Gb/sec. +* 11: 14 Gb/sec. +* 12: 56 Gb/sec. +* 13: 112 Gb/sec. +* 14: 168 Gb/sec. +* 15: 25 Gb/sec. +* 16: 100 Gb/sec. +* 17: 200 Gb/sec. +* 18: 300 Gb/sec. +* 19: 28 Gb/sec. +* 20: 50 Gb/sec. +* 21: 400 Gb/sec. +* 22: 600 Gb/sec. +* others: reserved +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_rate_sel +* NAME +* ib_multipath_rec_rate_sel +* +* DESCRIPTION +* Get encoded multipath rate selector. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_rate_sel(IN const ib_multipath_rec_t * const p_rec) +{ + return ((uint8_t) + ((p_rec->rate & IB_MULTIPATH_REC_SELECTOR_MASK) >> 6)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Encoded path rate selector value (for queries). +* 0: greater than rate specified +* 1: less than rate specified +* 2: exactly the rate specified +* 3: largest rate available +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_pkt_life +* NAME +* ib_multipath_rec_pkt_life +* +* DESCRIPTION +* Get encoded multipath pkt_life. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_pkt_life(IN const ib_multipath_rec_t * const p_rec) +{ + return ((uint8_t) (p_rec->pkt_life & IB_MULTIPATH_REC_BASE_MASK)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Encoded multipath pkt_life = 4.096 usec * 2 ** PacketLifeTime. +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_pkt_life_sel +* NAME +* ib_multipath_rec_pkt_life_sel +* +* DESCRIPTION +* Get encoded multipath pkt_lifetime selector. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_multipath_rec_pkt_life_sel(IN const ib_multipath_rec_t * const p_rec) +{ + return ((uint8_t) + ((p_rec->pkt_life & IB_MULTIPATH_REC_SELECTOR_MASK) >> 6)); +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Encoded path pkt_lifetime selector value (for queries). +* 0: greater than rate specified +* 1: less than rate specified +* 2: exactly the rate specified +* 3: smallest packet lifetime available +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +/****f* IBA Base: Types/ib_multipath_rec_service_id +* NAME +* ib_multipath_rec_service_id +* +* DESCRIPTION +* Get multipath service id. +* +* SYNOPSIS +*/ +static inline ib_net64_t OSM_API +ib_multipath_rec_service_id(IN const ib_multipath_rec_t * const p_rec) +{ + union { + ib_net64_t sid; + uint8_t sid_arr[8]; + } sid_union; + sid_union.sid_arr[0] = p_rec->service_id_8msb; + memcpy(&sid_union.sid_arr[1], p_rec->service_id_56lsb, 7); + return sid_union.sid; +} + +/* +* PARAMETERS +* p_rec +* [in] Pointer to the multipath record object. +* +* RETURN VALUES +* Service ID +* +* NOTES +* +* SEE ALSO +* ib_multipath_rec_t +*********/ + +#define IB_NUM_PKEY_ELEMENTS_IN_BLOCK 32 +/****s* IBA Base: Types/ib_pkey_table_t +* NAME +* ib_pkey_table_t +* +* DESCRIPTION +* IBA defined PKey table. (14.2.5.7) +* +* SYNOPSIS +*/ + +typedef struct _ib_pkey_table { + ib_net16_t pkey_entry[IB_NUM_PKEY_ELEMENTS_IN_BLOCK]; +} ib_pkey_table_t; +/************/ + +/****s* IBA Base: Types/ib_pkey_table_record_t +* NAME +* ib_pkey_table_record_t +* +* DESCRIPTION +* IBA defined P_Key Table Record for SA Query. (15.2.5.11) +* +* SYNOPSIS +*/ +typedef struct _ib_pkey_table_record { + ib_net16_t lid; // for CA: lid of port, for switch lid of port 0 + ib_net16_t block_num; + uint8_t port_num; // for switch: port number, for CA: reserved + uint8_t reserved1; + uint16_t reserved2; + ib_pkey_table_t pkey_tbl; +} ib_pkey_table_record_t; +/************/ + +#define IB_DROP_VL 15 +#define IB_MAX_NUM_VLS 16 +/****s* IBA Base: Types/ib_slvl_table_t +* NAME +* ib_slvl_table_t +* +* DESCRIPTION +* IBA defined SL2VL Mapping Table Attribute. (14.2.5.8) +* +* SYNOPSIS +*/ +typedef struct _ib_slvl_table { + uint8_t raw_vl_by_sl[IB_MAX_NUM_VLS / 2]; +} ib_slvl_table_t; +/************/ + +/****s* IBA Base: Types/ib_slvl_table_record_t +* NAME +* ib_slvl_table_record_t +* +* DESCRIPTION +* IBA defined SL to VL Mapping Table Record for SA Query. (15.2.5.4) +* +* SYNOPSIS +*/ +typedef struct _ib_slvl_table_record { + ib_net16_t lid; // for CA: lid of port, for switch lid of port 0 + uint8_t in_port_num; // reserved for CAs + uint8_t out_port_num; // reserved for CAs + uint32_t resv; + ib_slvl_table_t slvl_tbl; +} ib_slvl_table_record_t; +/************/ + +/****f* IBA Base: Types/ib_slvl_table_set +* NAME +* ib_slvl_table_set +* +* DESCRIPTION +* Set slvl table entry. +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_slvl_table_set(IN ib_slvl_table_t * p_slvl_tbl, + IN uint8_t sl_index, IN uint8_t vl) +{ + uint8_t idx = sl_index / 2; + CL_ASSERT(vl <= 15); + CL_ASSERT(sl_index <= 15); + + if (sl_index % 2) + /* this is an odd sl. Need to update the ls bits */ + p_slvl_tbl->raw_vl_by_sl[idx] = + (p_slvl_tbl->raw_vl_by_sl[idx] & 0xF0) | vl; + else + /* this is an even sl. Need to update the ms bits */ + p_slvl_tbl->raw_vl_by_sl[idx] = + (vl << 4) | (p_slvl_tbl->raw_vl_by_sl[idx] & 0x0F); +} + +/* +* PARAMETERS +* p_slvl_tbl +* [in] pointer to ib_slvl_table_t object. +* +* sl_index +* [in] the sl index in the table to be updated. +* +* vl +* [in] the vl value to update for that sl. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* ib_slvl_table_t +*********/ + +/****f* IBA Base: Types/ib_slvl_table_get +* NAME +* ib_slvl_table_get +* +* DESCRIPTION +* Get slvl table entry. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_slvl_table_get(IN const ib_slvl_table_t * p_slvl_tbl, IN uint8_t sl_index) +{ + uint8_t idx = sl_index / 2; + CL_ASSERT(sl_index <= 15); + + if (sl_index % 2) + /* this is an odd sl. Need to return the ls bits. */ + return (p_slvl_tbl->raw_vl_by_sl[idx] & 0x0F); + else + /* this is an even sl. Need to return the ms bits. */ + return ((p_slvl_tbl->raw_vl_by_sl[idx] & 0xF0) >> 4); +} + +/* +* PARAMETERS +* p_slvl_tbl +* [in] pointer to ib_slvl_table_t object. +* +* sl_index +* [in] the sl index in the table whose value should be returned. +* +* RETURN VALUES +* vl for the requested sl_index. +* +* NOTES +* +* SEE ALSO +* ib_slvl_table_t +*********/ + +/****s* IBA Base: Types/ib_vl_arb_element_t +* NAME +* ib_vl_arb_element_t +* +* DESCRIPTION +* IBA defined VL Arbitration Table Element. (14.2.5.9) +* +* SYNOPSIS +*/ +typedef struct _ib_vl_arb_element { + uint8_t vl; + uint8_t weight; +} ib_vl_arb_element_t; +/************/ + +#define IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK 32 + +/****s* IBA Base: Types/ib_vl_arb_table_t +* NAME +* ib_vl_arb_table_t +* +* DESCRIPTION +* IBA defined VL Arbitration Table. (14.2.5.9) +* +* SYNOPSIS +*/ +typedef struct _ib_vl_arb_table { + ib_vl_arb_element_t vl_entry[IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]; +} ib_vl_arb_table_t; +/************/ + +/****s* IBA Base: Types/ib_vl_arb_table_record_t +* NAME +* ib_vl_arb_table_record_t +* +* DESCRIPTION +* IBA defined VL Arbitration Table Record for SA Query. (15.2.5.9) +* +* SYNOPSIS +*/ +typedef struct _ib_vl_arb_table_record { + ib_net16_t lid; // for CA: lid of port, for switch lid of port 0 + uint8_t port_num; + uint8_t block_num; + uint32_t reserved; + ib_vl_arb_table_t vl_arb_tbl; +} ib_vl_arb_table_record_t; +/************/ + +/* + * Global route header information received with unreliable datagram messages + */ +typedef struct _ib_grh { + ib_net32_t ver_class_flow; + ib_net16_t resv1; + uint8_t resv2; + uint8_t hop_limit; + ib_gid_t src_gid; + ib_gid_t dest_gid; +} ib_grh_t; + +/****f* IBA Base: Types/ib_grh_get_ver_class_flow +* NAME +* ib_grh_get_ver_class_flow +* +* DESCRIPTION +* Get encoded version, traffic class and flow label in grh +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_grh_get_ver_class_flow(IN const ib_net32_t ver_class_flow, + OUT uint8_t * const p_ver, + OUT uint8_t * const p_tclass, + OUT uint32_t * const p_flow_lbl) +{ + ib_net32_t tmp_ver_class_flow; + + if (p_ver) + *p_ver = (uint8_t) (ver_class_flow & 0x0f); + + tmp_ver_class_flow = ver_class_flow >> 4; + + if (p_tclass) + *p_tclass = (uint8_t) (tmp_ver_class_flow & 0xff); + + tmp_ver_class_flow = tmp_ver_class_flow >> 8; + + if (p_flow_lbl) + *p_flow_lbl = tmp_ver_class_flow & 0xfffff; +} + +/* +* PARAMETERS +* ver_class_flow +* [in] the version, traffic class and flow label info. +* +* RETURN VALUES +* p_ver +* [out] pointer to the version info. +* +* p_tclass +* [out] pointer to the traffic class info. +* +* p_flow_lbl +* [out] pointer to the flow label info +* +* NOTES +* +* SEE ALSO +* ib_grh_t +*********/ + +/****f* IBA Base: Types/ib_grh_set_ver_class_flow +* NAME +* ib_grh_set_ver_class_flow +* +* DESCRIPTION +* Set encoded version, traffic class and flow label in grh +* +* SYNOPSIS +*/ +static inline ib_net32_t OSM_API +ib_grh_set_ver_class_flow(IN const uint8_t ver, + IN const uint8_t tclass, IN const uint32_t flow_lbl) +{ + ib_net32_t ver_class_flow; + + ver_class_flow = flow_lbl; + ver_class_flow = ver_class_flow << 8; + ver_class_flow = ver_class_flow | tclass; + ver_class_flow = ver_class_flow << 4; + ver_class_flow = ver_class_flow | ver; + return (ver_class_flow); +} + +/* +* PARAMETERS +* ver +* [in] the version info. +* +* tclass +* [in] the traffic class info. +* +* flow_lbl +* [in] the flow label info +* +* RETURN VALUES +* ver_class_flow +* [out] the version, traffic class and flow label info. +* +* NOTES +* +* SEE ALSO +* ib_grh_t +*********/ + +/****s* IBA Base: Types/ib_member_rec_t +* NAME +* ib_member_rec_t +* +* DESCRIPTION +* Multicast member record, used to create, join, and leave multicast +* groups. +* +* SYNOPSIS +*/ +typedef struct _ib_member_rec { + ib_gid_t mgid; + ib_gid_t port_gid; + ib_net32_t qkey; + ib_net16_t mlid; + uint8_t mtu; + uint8_t tclass; + ib_net16_t pkey; + uint8_t rate; + uint8_t pkt_life; + ib_net32_t sl_flow_hop; + uint8_t scope_state; + uint8_t proxy_join:1; + uint8_t reserved[2]; + uint8_t pad[4]; +} ib_member_rec_t; +/* +* FIELDS +* mgid +* Multicast GID address for this multicast group. +* +* port_gid +* Valid GID of the endpoint joining this multicast group. +* +* qkey +* Q_Key to be sued by this multicast group. +* +* mlid +* Multicast LID for this multicast group. +* +* mtu +* MTU and MTU selector fields to use on this path +* +* tclass +* Another global routing parameter. +* +* pkey +* Partition key (P_Key) to use for this member. +* +* rate +* Rate and rate selector fields to use on this path. +* +* pkt_life +* Packet lifetime +* +* sl_flow_hop +* Global routing parameters: service level, hop count, and flow label. +* +* scope_state +* MGID scope and JoinState of multicast request. +* +* proxy_join +* Enables others in the Partition to proxy add/remove from the group +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_member_get_sl_flow_hop +* NAME +* ib_member_get_sl_flow_hop +* +* DESCRIPTION +* Get encoded sl, flow label, and hop limit +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_member_get_sl_flow_hop(IN const ib_net32_t sl_flow_hop, + OUT uint8_t * const p_sl, + OUT uint32_t * const p_flow_lbl, + OUT uint8_t * const p_hop) +{ + uint32_t tmp; + + tmp = cl_ntoh32(sl_flow_hop); + if (p_hop) + *p_hop = (uint8_t) tmp; + tmp >>= 8; + + if (p_flow_lbl) + *p_flow_lbl = (uint32_t) (tmp & 0xfffff); + tmp >>= 20; + + if (p_sl) + *p_sl = (uint8_t) tmp; +} + +/* +* PARAMETERS +* sl_flow_hop +* [in] the sl, flow label, and hop limit of MC Group +* +* RETURN VALUES +* p_sl +* [out] pointer to the service level +* +* p_flow_lbl +* [out] pointer to the flow label info +* +* p_hop +* [out] pointer to the hop count limit. +* +* NOTES +* +* SEE ALSO +* ib_member_rec_t +*********/ + +/****f* IBA Base: Types/ib_member_set_sl_flow_hop +* NAME +* ib_member_set_sl_flow_hop +* +* DESCRIPTION +* Set encoded sl, flow label, and hop limit +* +* SYNOPSIS +*/ +static inline ib_net32_t OSM_API +ib_member_set_sl_flow_hop(IN const uint8_t sl, + IN const uint32_t flow_label, + IN const uint8_t hop_limit) +{ + uint32_t tmp; + + tmp = (sl << 28) | ((flow_label & 0xfffff) << 8) | hop_limit; + return cl_hton32(tmp); +} + +/* +* PARAMETERS +* sl +* [in] the service level. +* +* flow_lbl +* [in] the flow label info +* +* hop_limit +* [in] the hop limit. +* +* RETURN VALUES +* sl_flow_hop +* [out] the encoded sl, flow label, and hop limit +* +* NOTES +* +* SEE ALSO +* ib_member_rec_t +*********/ + +/****f* IBA Base: Types/ib_member_get_scope_state +* NAME +* ib_member_get_scope_state +* +* DESCRIPTION +* Get encoded MGID scope and JoinState +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_member_get_scope_state(IN const uint8_t scope_state, + OUT uint8_t * const p_scope, + OUT uint8_t * const p_state) +{ + uint8_t tmp_scope_state; + + if (p_state) + *p_state = (uint8_t) (scope_state & 0x0f); + + tmp_scope_state = scope_state >> 4; + + if (p_scope) + *p_scope = (uint8_t) (tmp_scope_state & 0x0f); + +} + +/* +* PARAMETERS +* scope_state +* [in] the scope and state +* +* RETURN VALUES +* p_scope +* [out] pointer to the MGID scope +* +* p_state +* [out] pointer to the join state +* +* NOTES +* +* SEE ALSO +* ib_member_rec_t +*********/ + +/****f* IBA Base: Types/ib_member_set_scope_state +* NAME +* ib_member_set_scope_state +* +* DESCRIPTION +* Set encoded version, MGID scope and JoinState +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_member_set_scope_state(IN const uint8_t scope, IN const uint8_t state) +{ + uint8_t scope_state; + + scope_state = scope; + scope_state = scope_state << 4; + scope_state = scope_state | state; + return (scope_state); +} + +/* +* PARAMETERS +* scope +* [in] the MGID scope +* +* state +* [in] the JoinState +* +* RETURN VALUES +* scope_state +* [out] the encoded one +* +* NOTES +* +* SEE ALSO +* ib_member_rec_t +*********/ + +/****f* IBA Base: Types/ib_member_set_join_state +* NAME +* ib_member_set_join_state +* +* DESCRIPTION +* Set JoinState +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_member_set_join_state(IN OUT ib_member_rec_t * p_mc_rec, + IN const uint8_t state) +{ + /* keep the scope as it is */ + p_mc_rec->scope_state = (p_mc_rec->scope_state & 0xF0) | (0x0f & state); +} + +/* +* PARAMETERS +* p_mc_rec +* [in] pointer to the member record +* +* state +* [in] the JoinState +* +* RETURN VALUES +* NONE +* +* NOTES +* +* SEE ALSO +* ib_member_rec_t +*********/ + +/* + * Join State Codes: + */ +#define IB_MC_REC_STATE_FULL_MEMBER 0x01 +#define IB_MC_REC_STATE_NON_MEMBER 0x02 +#define IB_MC_REC_STATE_SEND_ONLY_NON_MEMBER 0x04 +#define IB_MC_REC_STATE_SEND_ONLY_FULL_MEMBER 0x08 + +/* + * Generic MAD notice types + */ +#define IB_NOTICE_TYPE_FATAL 0x00 +#define IB_NOTICE_TYPE_URGENT 0x01 +#define IB_NOTICE_TYPE_SECURITY 0x02 +#define IB_NOTICE_TYPE_SUBN_MGMT 0x03 +#define IB_NOTICE_TYPE_INFO 0x04 +#define IB_NOTICE_TYPE_EMPTY 0x7F + +#define SM_GID_IN_SERVICE_TRAP 64 +#define SM_GID_OUT_OF_SERVICE_TRAP 65 +#define SM_MGID_CREATED_TRAP 66 +#define SM_MGID_DESTROYED_TRAP 67 +#define SM_UNPATH_TRAP 68 +#define SM_REPATH_TRAP 69 +#define SM_LINK_STATE_CHANGED_TRAP 128 +#define SM_LINK_INTEGRITY_THRESHOLD_TRAP 129 +#define SM_BUFFER_OVERRUN_THRESHOLD_TRAP 130 +#define SM_WATCHDOG_TIMER_EXPIRED_TRAP 131 +#define SM_LOCAL_CHANGES_TRAP 144 +#define SM_SYS_IMG_GUID_CHANGED_TRAP 145 +#define SM_BAD_MKEY_TRAP 256 +#define SM_BAD_PKEY_TRAP 257 +#define SM_BAD_QKEY_TRAP 258 +#define SM_BAD_SWITCH_PKEY_TRAP 259 + +#include +typedef struct _ib_mad_notice_attr // Total Size calc Accumulated +{ + uint8_t generic_type; // 1 1 + union _notice_g_or_v { + struct _notice_generic // 5 6 + { + uint8_t prod_type_msb; + ib_net16_t prod_type_lsb; + ib_net16_t trap_num; + } PACK_SUFFIX generic; + struct _notice_vend { + uint8_t vend_id_msb; + ib_net16_t vend_id_lsb; + ib_net16_t dev_id; + } PACK_SUFFIX vend; + } g_or_v; + ib_net16_t issuer_lid; // 2 8 + ib_net16_t toggle_count; // 2 10 + union _data_details // 54 64 + { + struct _raw_data { + uint8_t details[54]; + } PACK_SUFFIX raw_data; + struct _ntc_64_67 { + uint8_t res[6]; + ib_gid_t gid; // the Node or Multicast Group that came in/out + } PACK_SUFFIX ntc_64_67; + struct _ntc_128 { + ib_net16_t sw_lid; // the sw lid of which link state changed + } PACK_SUFFIX ntc_128; + struct _ntc_129_131 { + ib_net16_t pad; + ib_net16_t lid; // lid and port number of the violation + uint8_t port_num; + } PACK_SUFFIX ntc_129_131; + struct _ntc_144 { + ib_net16_t pad1; + ib_net16_t lid; // lid where change occured + uint8_t pad2; // reserved + uint8_t local_changes; // 7b reserved 1b local changes + ib_net32_t new_cap_mask; // new capability mask + ib_net16_t change_flgs; // 10b reserved 6b change flags + ib_net16_t cap_mask2; + } PACK_SUFFIX ntc_144; + struct _ntc_145 { + ib_net16_t pad1; + ib_net16_t lid; // lid where sys guid changed + ib_net16_t pad2; + ib_net64_t new_sys_guid; // new system image guid + } PACK_SUFFIX ntc_145; + struct _ntc_256 { // total: 54 + ib_net16_t pad1; // 2 + ib_net16_t lid; // 2 + ib_net16_t dr_slid; // 2 + uint8_t method; // 1 + uint8_t pad2; // 1 + ib_net16_t attr_id; // 2 + ib_net32_t attr_mod; // 4 + ib_net64_t mkey; // 8 + uint8_t pad3; // 1 + uint8_t dr_trunc_hop; // 1 + uint8_t dr_rtn_path[30]; // 30 + } PACK_SUFFIX ntc_256; + struct _ntc_257_258 // violation of p/q_key // 49 + { + ib_net16_t pad1; // 2 + ib_net16_t lid1; // 2 + ib_net16_t lid2; // 2 + ib_net32_t key; // 4 + ib_net32_t qp1; // 4b sl, 4b pad, 24b qp1 + ib_net32_t qp2; // 8b pad, 24b qp2 + ib_gid_t gid1; // 16 + ib_gid_t gid2; // 16 + } PACK_SUFFIX ntc_257_258; + struct _ntc_259 // pkey violation from switch 51 + { + ib_net16_t data_valid; // 2 + ib_net16_t lid1; // 2 + ib_net16_t lid2; // 2 + ib_net16_t pkey; // 2 + ib_net32_t sl_qp1; // 4b sl, 4b pad, 24b qp1 + ib_net32_t qp2; // 8b pad, 24b qp2 + ib_gid_t gid1; // 16 + ib_gid_t gid2; // 16 + ib_net16_t sw_lid; // 2 + uint8_t port_no; // 1 + } PACK_SUFFIX ntc_259; + struct _ntc_bkey_259 // bkey violation + { + ib_net16_t lidaddr; + uint8_t method; + uint8_t reserved; + ib_net16_t attribute_id; + ib_net32_t attribute_modifier; + ib_net32_t qp; // qp is low 24 bits + ib_net64_t bkey; + ib_gid_t gid; + } PACK_SUFFIX ntc_bkey_259; + struct _ntc_cckey_0 // CC key violation + { + ib_net16_t slid; // source LID from offending packet LRH + uint8_t method; // method, from common MAD header + uint8_t resv0; + ib_net16_t attribute_id; // Attribute ID, from common MAD header + ib_net16_t resv1; + ib_net32_t attribute_modifier; // Attribute Modif, from common MAD header + ib_net32_t qp; // 8b pad, 24b dest QP from BTH + ib_net64_t cc_key; // CC key of the offending packet + ib_gid_t source_gid; // GID from GRH of the offending packet + uint8_t padding[14]; // Padding - ignored on read + } PACK_SUFFIX ntc_cckey_0; + } data_details; + ib_gid_t issuer_gid; // 16 80 +} PACK_SUFFIX ib_mad_notice_attr_t; +#include + +/** + * Trap 259 masks + */ +#define TRAP_259_MASK_SL (CL_HTON32(0xF0000000)) +#define TRAP_259_MASK_QP (CL_HTON32(0x00FFFFFF)) + +/** + * Trap 144 masks + */ +#define TRAP_144_MASK_OTHER_LOCAL_CHANGES 0x01 +#define TRAP_144_MASK_CAPABILITY_MASK2_CHANGE (CL_HTON16(0x0020)) +#define TRAP_144_MASK_HIERARCHY_INFO_CHANGE (CL_HTON16(0x0010)) +#define TRAP_144_MASK_SM_PRIORITY_CHANGE (CL_HTON16(0x0008)) +#define TRAP_144_MASK_LINK_SPEED_ENABLE_CHANGE (CL_HTON16(0x0004)) +#define TRAP_144_MASK_LINK_WIDTH_ENABLE_CHANGE (CL_HTON16(0x0002)) +#define TRAP_144_MASK_NODE_DESCRIPTION_CHANGE (CL_HTON16(0x0001)) + +/****f* IBA Base: Types/ib_notice_is_generic +* NAME +* ib_notice_is_generic +* +* DESCRIPTION +* Check if the notice is generic +* +* SYNOPSIS +*/ +static inline boolean_t OSM_API +ib_notice_is_generic(IN const ib_mad_notice_attr_t * p_ntc) +{ + return (p_ntc->generic_type & 0x80); +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* RETURN VALUES +* TRUE if notice MAD is generic +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +/****f* IBA Base: Types/ib_notice_get_type +* NAME +* ib_notice_get_type +* +* DESCRIPTION +* Get the notice type +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_notice_get_type(IN const ib_mad_notice_attr_t * p_ntc) +{ + return p_ntc->generic_type & 0x7f; +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* RETURN VALUES +* TRUE if mad is generic +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +/****f* IBA Base: Types/ib_notice_get_prod_type +* NAME +* ib_notice_get_prod_type +* +* DESCRIPTION +* Get the notice Producer Type of Generic Notice +* +* SYNOPSIS +*/ +static inline ib_net32_t OSM_API +ib_notice_get_prod_type(IN const ib_mad_notice_attr_t * p_ntc) +{ + uint32_t pt; + + pt = cl_ntoh16(p_ntc->g_or_v.generic.prod_type_lsb) | + (p_ntc->g_or_v.generic.prod_type_msb << 16); + return cl_hton32(pt); +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* RETURN VALUES +* The producer type +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +/****f* IBA Base: Types/ib_notice_set_prod_type +* NAME +* ib_notice_set_prod_type +* +* DESCRIPTION +* Set the notice Producer Type of Generic Notice +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_notice_set_prod_type(IN ib_mad_notice_attr_t * p_ntc, + IN ib_net32_t prod_type_val) +{ + uint32_t ptv = cl_ntoh32(prod_type_val); + p_ntc->g_or_v.generic.prod_type_lsb = + cl_hton16((uint16_t) (ptv & 0x0000ffff)); + p_ntc->g_or_v.generic.prod_type_msb = + (uint8_t) ((ptv & 0x00ff0000) >> 16); +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* prod_type +* [in] The producer Type code +* +* RETURN VALUES +* None +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +/****f* IBA Base: Types/ib_notice_set_prod_type_ho +* NAME +* ib_notice_set_prod_type_ho +* +* DESCRIPTION +* Set the notice Producer Type of Generic Notice given Host Order +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_notice_set_prod_type_ho(IN ib_mad_notice_attr_t * p_ntc, + IN uint32_t prod_type_val_ho) +{ + p_ntc->g_or_v.generic.prod_type_lsb = + cl_hton16((uint16_t) (prod_type_val_ho & 0x0000ffff)); + p_ntc->g_or_v.generic.prod_type_msb = + (uint8_t) ((prod_type_val_ho & 0x00ff0000) >> 16); +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* prod_type +* [in] The producer Type code in host order +* +* RETURN VALUES +* None +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +/****f* IBA Base: Types/ib_notice_get_vend_id +* NAME +* ib_notice_get_vend_id +* +* DESCRIPTION +* Get the Vendor Id of Vendor type Notice +* +* SYNOPSIS +*/ +static inline ib_net32_t OSM_API +ib_notice_get_vend_id(IN const ib_mad_notice_attr_t * p_ntc) +{ + uint32_t vi; + + vi = cl_ntoh16(p_ntc->g_or_v.vend.vend_id_lsb) | + (p_ntc->g_or_v.vend.vend_id_msb << 16); + return cl_hton32(vi); +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* RETURN VALUES +* The Vendor Id of Vendor type Notice +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +/****f* IBA Base: Types/ib_notice_set_vend_id +* NAME +* ib_notice_set_vend_id +* +* DESCRIPTION +* Set the notice Producer Type of Generic Notice +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_notice_set_vend_id(IN ib_mad_notice_attr_t * p_ntc, IN ib_net32_t vend_id) +{ + uint32_t vi = cl_ntoh32(vend_id); + p_ntc->g_or_v.vend.vend_id_lsb = + cl_hton16((uint16_t) (vi & 0x0000ffff)); + p_ntc->g_or_v.vend.vend_id_msb = (uint8_t) ((vi & 0x00ff0000) >> 16); +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* vend_id +* [in] The producer Type code +* +* RETURN VALUES +* None +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +/****f* IBA Base: Types/ib_notice_set_vend_id_ho +* NAME +* ib_notice_set_vend_id_ho +* +* DESCRIPTION +* Set the notice Producer Type of Generic Notice given a host order value +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_notice_set_vend_id_ho(IN ib_mad_notice_attr_t * p_ntc, + IN uint32_t vend_id_ho) +{ + p_ntc->g_or_v.vend.vend_id_lsb = + cl_hton16((uint16_t) (vend_id_ho & 0x0000ffff)); + p_ntc->g_or_v.vend.vend_id_msb = + (uint8_t) ((vend_id_ho & 0x00ff0000) >> 16); +} + +/* +* PARAMETERS +* p_ntc +* [in] Pointer to the notice MAD attribute +* +* vend_id_ho +* [in] The producer Type code in host order +* +* RETURN VALUES +* None +* +* SEE ALSO +* ib_mad_notice_attr_t +*********/ + +#include +typedef struct _ib_inform_info { + ib_gid_t gid; + ib_net16_t lid_range_begin; + ib_net16_t lid_range_end; + ib_net16_t reserved1; + uint8_t is_generic; + uint8_t subscribe; + ib_net16_t trap_type; + union _inform_g_or_v { + struct _inform_generic { + ib_net16_t trap_num; + ib_net32_t qpn_resp_time_val; + uint8_t reserved2; + uint8_t node_type_msb; + ib_net16_t node_type_lsb; + } PACK_SUFFIX generic; + struct _inform_vend { + ib_net16_t dev_id; + ib_net32_t qpn_resp_time_val; + uint8_t reserved2; + uint8_t vendor_id_msb; + ib_net16_t vendor_id_lsb; + } PACK_SUFFIX vend; + } PACK_SUFFIX g_or_v; +} PACK_SUFFIX ib_inform_info_t; +#include + +/****f* IBA Base: Types/ib_inform_info_get_qpn_resp_time +* NAME +* ib_inform_info_get_qpn_resp_time +* +* DESCRIPTION +* Get QPN of the inform info +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_inform_info_get_qpn_resp_time(IN const ib_net32_t qpn_resp_time_val, + OUT ib_net32_t * const p_qpn, + OUT uint8_t * const p_resp_time_val) +{ + uint32_t tmp = cl_ntoh32(qpn_resp_time_val); + + if (p_qpn) + *p_qpn = cl_hton32((tmp & 0xffffff00) >> 8); + if (p_resp_time_val) + *p_resp_time_val = (uint8_t) (tmp & 0x0000001f); +} + +/* +* PARAMETERS +* qpn_resp_time_val +* [in] the qpn and resp time val from the mad +* +* RETURN VALUES +* p_qpn +* [out] pointer to the qpn +* +* p_state +* [out] pointer to the resp time val +* +* NOTES +* +* SEE ALSO +* ib_inform_info_t +*********/ + +/****f* IBA Base: Types/ib_inform_info_set_qpn +* NAME +* ib_inform_info_set_qpn +* +* DESCRIPTION +* Set the QPN of the inform info +* +* SYNOPSIS +*/ +static inline void OSM_API +ib_inform_info_set_qpn(IN ib_inform_info_t * p_ii, IN ib_net32_t const qpn) +{ + uint32_t tmp = cl_ntoh32(p_ii->g_or_v.generic.qpn_resp_time_val); + uint32_t qpn_h = cl_ntoh32(qpn); + + p_ii->g_or_v.generic.qpn_resp_time_val = + cl_hton32((tmp & 0x000000ff) | ((qpn_h << 8) & 0xffffff00) + ); +} + +/* +* PARAMETERS +* +* NOTES +* +* SEE ALSO +* ib_inform_info_t +*********/ + +/****f* IBA Base: Types/ib_inform_info_get_prod_type +* NAME +* ib_inform_info_get_prod_type +* +* DESCRIPTION +* Get Producer Type of the Inform Info +* 13.4.8.3 InformInfo +* +* SYNOPSIS +*/ +static inline ib_net32_t OSM_API +ib_inform_info_get_prod_type(IN const ib_inform_info_t * p_inf) +{ + uint32_t nt; + + nt = cl_ntoh16(p_inf->g_or_v.generic.node_type_lsb) | + (p_inf->g_or_v.generic.node_type_msb << 16); + return cl_hton32(nt); +} + +/* +* PARAMETERS +* p_inf +* [in] pointer to an inform info +* +* RETURN VALUES +* The producer type +* +* NOTES +* +* SEE ALSO +* ib_inform_info_t +*********/ + +/****f* IBA Base: Types/ib_inform_info_get_vend_id +* NAME +* ib_inform_info_get_vend_id +* +* DESCRIPTION +* Get Node Type of the Inform Info +* +* SYNOPSIS +*/ +static inline ib_net32_t OSM_API +ib_inform_info_get_vend_id(IN const ib_inform_info_t * p_inf) +{ + uint32_t vi; + + vi = cl_ntoh16(p_inf->g_or_v.vend.vendor_id_lsb) | + (p_inf->g_or_v.vend.vendor_id_msb << 16); + return cl_hton32(vi); +} + +/* +* PARAMETERS +* p_inf +* [in] pointer to an inform info +* +* RETURN VALUES +* The node type +* +* NOTES +* +* SEE ALSO +* ib_inform_info_t +*********/ + +/****s* IBA Base: Types/ib_inform_info_record_t +* NAME +* ib_inform_info_record_t +* +* DESCRIPTION +* IBA defined InformInfo Record. (15.2.5.12) +* +* SYNOPSIS +*/ +#include +typedef struct _ib_inform_info_record { + ib_gid_t subscriber_gid; + ib_net16_t subscriber_enum; + uint8_t reserved[6]; + ib_inform_info_t inform_info; + uint8_t pad[4]; +} PACK_SUFFIX ib_inform_info_record_t; +#include + +/****s* IBA Base: Types/ib_perfmgt_mad_t +* NAME +* ib_perfmgt_mad_t +* +* DESCRIPTION +* IBA defined Perf Management MAD (16.3.1) +* +* SYNOPSIS +*/ +typedef struct _ib_perfmgt_mad { + ib_mad_t header; + uint8_t resv[40]; +#define IB_PM_DATA_SIZE 192 + uint8_t data[IB_PM_DATA_SIZE]; +} ib_perfmgt_mad_t; +/* +* FIELDS +* header +* Common MAD header. +* +* resv +* Reserved. +* +* data +* Performance Management payload. The structure and content of this field +* depends upon the method, attr_id, and attr_mod fields in the header. +* +* SEE ALSO +* ib_mad_t +*********/ + +/****s* IBA Base: Types/ib_port_counters +* NAME +* ib_port_counters_t +* +* DESCRIPTION +* IBA defined PortCounters Attribute. (16.1.3.5) +* +* SYNOPSIS +*/ +typedef struct _ib_port_counters { + uint8_t reserved; + uint8_t port_select; + ib_net16_t counter_select; + ib_net16_t symbol_err_cnt; + uint8_t link_err_recover; + uint8_t link_downed; + ib_net16_t rcv_err; + ib_net16_t rcv_rem_phys_err; + ib_net16_t rcv_switch_relay_err; + ib_net16_t xmit_discards; + uint8_t xmit_constraint_err; + uint8_t rcv_constraint_err; + uint8_t counter_select2; + uint8_t link_int_buffer_overrun; + ib_net16_t qp1_dropped; + ib_net16_t vl15_dropped; + ib_net32_t xmit_data; + ib_net32_t rcv_data; + ib_net32_t xmit_pkts; + ib_net32_t rcv_pkts; + ib_net32_t xmit_wait; +} ib_port_counters_t; + +#define PC_LINK_INT(integ_buf_over) ((integ_buf_over & 0xF0) >> 4) +#define PC_BUF_OVERRUN(integ_buf_over) (integ_buf_over & 0x0F) + +/****s* IBA Base: Types/ib_port_counters_ext +* NAME +* ib_port_counters_ext_t +* +* DESCRIPTION +* IBA defined PortCounters Extended Attribute. (16.1.4.11) +* +* SYNOPSIS +*/ +typedef struct _ib_port_counters_ext { + uint8_t reserved; + uint8_t port_select; + ib_net16_t counter_select; + ib_net32_t counter_select2; + ib_net64_t xmit_data; + ib_net64_t rcv_data; + ib_net64_t xmit_pkts; + ib_net64_t rcv_pkts; + ib_net64_t unicast_xmit_pkts; + ib_net64_t unicast_rcv_pkts; + ib_net64_t multicast_xmit_pkts; + ib_net64_t multicast_rcv_pkts; + ib_net64_t symbol_err_cnt; + ib_net64_t link_err_recover; + ib_net64_t link_downed; + ib_net64_t rcv_err; + ib_net64_t rcv_rem_phys_err; + ib_net64_t rcv_switch_relay_err; + ib_net64_t xmit_discards; + ib_net64_t xmit_constraint_err; + ib_net64_t rcv_constraint_err; + ib_net64_t link_integrity_err; + ib_net64_t buffer_overrun; + ib_net64_t vl15_dropped; + ib_net64_t xmit_wait; + ib_net64_t qp1_dropped; +} ib_port_counters_ext_t; + +/****s* IBA Base: Types/ib_port_samples_control +* NAME +* ib_port_samples_control_t +* +* DESCRIPTION +* IBA defined PortSamplesControl Attribute. (16.1.3.2) +* +* SYNOPSIS +*/ +#include +typedef struct _ib_port_samples_control { + uint8_t op_code; + uint8_t port_select; + uint8_t tick; + uint8_t counter_width; /* 5 bits res : 3bits counter_width */ + ib_net32_t counter_mask; /* 2 bits res : 3 bits counter_mask : 27 bits counter_masks_1to9 */ + ib_net16_t counter_mask_10to14; /* 1 bits res : 15 bits counter_masks_10to14 */ + uint8_t sample_mech; + uint8_t sample_status; /* 6 bits res : 2 bits sample_status */ + ib_net64_t option_mask; + ib_net64_t vendor_mask; + ib_net32_t sample_start; + ib_net32_t sample_interval; + ib_net16_t tag; + ib_net16_t counter_select0; + ib_net16_t counter_select1; + ib_net16_t counter_select2; + ib_net16_t counter_select3; + ib_net16_t counter_select4; + ib_net16_t counter_select5; + ib_net16_t counter_select6; + ib_net16_t counter_select7; + ib_net16_t counter_select8; + ib_net16_t counter_select9; + ib_net16_t counter_select10; + ib_net16_t counter_select11; + ib_net16_t counter_select12; + ib_net16_t counter_select13; + ib_net16_t counter_select14; +} PACK_SUFFIX ib_port_samples_control_t; +#include + +/****d* IBA Base: Types/CounterSelect values +* NAME +* Counter select values +* +* DESCRIPTION +* Mandatory counter select values (16.1.3.3) +* +* SYNOPSIS +*/ +#define IB_CS_PORT_XMIT_DATA (CL_HTON16(0x0001)) +#define IB_CS_PORT_RCV_DATA (CL_HTON16(0x0002)) +#define IB_CS_PORT_XMIT_PKTS (CL_HTON16(0x0003)) +#define IB_CS_PORT_RCV_PKTS (CL_HTON16(0x0004)) +#define IB_CS_PORT_XMIT_WAIT (CL_HTON16(0x0005)) + +/****s* IBA Base: Types/ib_port_samples_result +* NAME +* ib_port_samples_result_t +* +* DESCRIPTION +* IBA defined PortSamplesControl Attribute. (16.1.3.2) +* +* SYNOPSIS +*/ +typedef struct _ib_port_samples_result { + ib_net16_t tag; + ib_net16_t sample_status; /* 14 bits res : 2 bits sample_status */ + ib_net32_t counter0; + ib_net32_t counter1; + ib_net32_t counter2; + ib_net32_t counter3; + ib_net32_t counter4; + ib_net32_t counter5; + ib_net32_t counter6; + ib_net32_t counter7; + ib_net32_t counter8; + ib_net32_t counter9; + ib_net32_t counter10; + ib_net32_t counter11; + ib_net32_t counter12; + ib_net32_t counter13; + ib_net32_t counter14; +} ib_port_samples_result_t; + +/****s* IBA Base: Types/ib_port_xmit_data_sl +* NAME +* ib_port_xmit_data_sl_t +* +* DESCRIPTION +* IBA defined PortXmitDataSL Attribute. (A13.6.4) +* +* SYNOPSIS +*/ +typedef struct _ib_port_xmit_data_sl { + uint8_t reserved; + uint8_t port_select; + ib_net16_t counter_select; + ib_net32_t port_xmit_data_sl[16]; + uint8_t resv[124]; +} ib_port_xmit_data_sl_t; + +/****s* IBA Base: Types/ib_port_rcv_data_sl +* NAME +* ib_port_rcv_data_sl_t +* +* DESCRIPTION +* IBA defined PortRcvDataSL Attribute. (A13.6.4) +* +* SYNOPSIS +*/ +typedef struct _ib_port_rcv_data_sl { + uint8_t reserved; + uint8_t port_select; + ib_net16_t counter_select; + ib_net32_t port_rcv_data_sl[16]; + uint8_t resv[124]; +} ib_port_rcv_data_sl_t; + +/****d* IBA Base: Types/DM_SVC_NAME +* NAME +* DM_SVC_NAME +* +* DESCRIPTION +* IBA defined Device Management service name (16.3) +* +* SYNOPSIS +*/ +#define DM_SVC_NAME "DeviceManager.IBTA" +/* +* SEE ALSO +*********/ + +/****s* IBA Base: Types/ib_dm_mad_t +* NAME +* ib_dm_mad_t +* +* DESCRIPTION +* IBA defined Device Management MAD (16.3.1) +* +* SYNOPSIS +*/ +typedef struct _ib_dm_mad { + ib_mad_t header; + uint8_t resv[40]; +#define IB_DM_DATA_SIZE 192 + uint8_t data[IB_DM_DATA_SIZE]; +} ib_dm_mad_t; +/* +* FIELDS +* header +* Common MAD header. +* +* resv +* Reserved. +* +* data +* Device Management payload. The structure and content of this field +* depend upon the method, attr_id, and attr_mod fields in the header. +* +* SEE ALSO +* ib_mad_t +*********/ + +/****s* IBA Base: Types/ib_iou_info_t +* NAME +* ib_iou_info_t +* +* DESCRIPTION +* IBA defined IO Unit information structure (16.3.3.3) +* +* SYNOPSIS +*/ +typedef struct _ib_iou_info { + ib_net16_t change_id; + uint8_t max_controllers; + uint8_t diag_rom; +#define IB_DM_CTRL_LIST_SIZE 128 + uint8_t controller_list[IB_DM_CTRL_LIST_SIZE]; +#define IOC_NOT_INSTALLED 0x0 +#define IOC_INSTALLED 0x1 +// Reserved values 0x02-0xE +#define SLOT_DOES_NOT_EXIST 0xF +} ib_iou_info_t; +/* +* FIELDS +* change_id +* Value incremented, with rollover, by any change to the controller_list. +* +* max_controllers +* Number of slots in controller_list. +* +* diag_rom +* A byte containing two fields: DiagDeviceID and OptionROM. +* These fields may be read using the ib_iou_info_diag_dev_id +* and ib_iou_info_option_rom functions. +* +* controller_list +* A series of 4-bit nibbles, with each nibble representing a slot +* in the IO Unit. Individual nibbles may be read using the +* ioc_at_slot function. +* +* SEE ALSO +* ib_dm_mad_t, ib_iou_info_diag_dev_id, ib_iou_info_option_rom, ioc_at_slot +*********/ + +/****f* IBA Base: Types/ib_iou_info_diag_dev_id +* NAME +* ib_iou_info_diag_dev_id +* +* DESCRIPTION +* Returns the DiagDeviceID. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_iou_info_diag_dev_id(IN const ib_iou_info_t * const p_iou_info) +{ + return ((uint8_t) (p_iou_info->diag_rom >> 6 & 1)); +} + +/* +* PARAMETERS +* p_iou_info +* [in] Pointer to the IO Unit information structure. +* +* RETURN VALUES +* DiagDeviceID field of the IO Unit information. +* +* NOTES +* +* SEE ALSO +* ib_iou_info_t +*********/ + +/****f* IBA Base: Types/ib_iou_info_option_rom +* NAME +* ib_iou_info_option_rom +* +* DESCRIPTION +* Returns the OptionROM. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ib_iou_info_option_rom(IN const ib_iou_info_t * const p_iou_info) +{ + return ((uint8_t) (p_iou_info->diag_rom >> 7)); +} + +/* +* PARAMETERS +* p_iou_info +* [in] Pointer to the IO Unit information structure. +* +* RETURN VALUES +* OptionROM field of the IO Unit information. +* +* NOTES +* +* SEE ALSO +* ib_iou_info_t +*********/ + +/****f* IBA Base: Types/ioc_at_slot +* NAME +* ioc_at_slot +* +* DESCRIPTION +* Returns the IOC value at the specified slot. +* +* SYNOPSIS +*/ +static inline uint8_t OSM_API +ioc_at_slot(IN const ib_iou_info_t * const p_iou_info, IN uint8_t slot) +{ + if (slot >= IB_DM_CTRL_LIST_SIZE) + return SLOT_DOES_NOT_EXIST; + else + return (int8_t) + ((slot % 2) ? + ((p_iou_info->controller_list[slot / 2] & 0xf0) >> 4) : + (p_iou_info->controller_list[slot / 2] & 0x0f)); +} + +/* +* PARAMETERS +* p_iou_info +* [in] Pointer to the IO Unit information structure. +* +* slot +* [in] Pointer to the IO Unit information structure. +* +* RETURN VALUES +* OptionROM field of the IO Unit information. +* +* NOTES +* +* SEE ALSO +* ib_iou_info_t +*********/ + +/****s* IBA Base: Types/ib_ioc_profile_t +* NAME +* ib_ioc_profile_t +* +* DESCRIPTION +* IBA defined IO Controller profile structure (16.3.3.4) +* +* SYNOPSIS +*/ +typedef struct _ib_ioc_profile { + ib_net64_t ioc_guid; + ib_net32_t vend_id; + ib_net32_t dev_id; + ib_net16_t dev_ver; + ib_net16_t resv2; + ib_net32_t subsys_vend_id; + ib_net32_t subsys_id; + ib_net16_t io_class; + ib_net16_t io_subclass; + ib_net16_t protocol; + ib_net16_t protocol_ver; + ib_net32_t resv3; + ib_net16_t send_msg_depth; + uint8_t resv4; + uint8_t rdma_read_depth; + ib_net32_t send_msg_size; + ib_net32_t rdma_size; + uint8_t ctrl_ops_cap; +#define CTRL_OPS_CAP_ST 0x01 +#define CTRL_OPS_CAP_SF 0x02 +#define CTRL_OPS_CAP_RT 0x04 +#define CTRL_OPS_CAP_RF 0x08 +#define CTRL_OPS_CAP_WT 0x10 +#define CTRL_OPS_CAP_WF 0x20 +#define CTRL_OPS_CAP_AT 0x40 +#define CTRL_OPS_CAP_AF 0x80 + uint8_t resv5; + uint8_t num_svc_entries; +#define MAX_NUM_SVC_ENTRIES 0xff + uint8_t resv6[9]; +#define CTRL_ID_STRING_LEN 64 + char id_string[CTRL_ID_STRING_LEN]; +} ib_ioc_profile_t; +/* +* FIELDS +* ioc_guid +* An EUI-64 GUID used to uniquely identify the IO controller. +* +* vend_id +* IO controller vendor ID, IEEE format. +* +* dev_id +* A number assigned by the vendor to identify the type of controller. +* +* dev_ver +* A number assigned by the vendor to identify the divice version. +* +* subsys_vend_id +* ID of the vendor of the enclosure, if any, in which the IO controller +* resides in IEEE format; otherwise zero. +* +* subsys_id +* A number identifying the subsystem where the controller resides. +* +* io_class +* 0x0000 - 0xfffe = reserved for IO classes encompased by InfiniBand +* Architecture. 0xffff = Vendor specific. +* +* io_subclass +* 0x0000 - 0xfffe = reserved for IO subclasses encompased by InfiniBand +* Architecture. 0xffff = Vendor specific. This shall be set to 0xfff +* if the io_class component is 0xffff. +* +* protocol +* 0x0000 - 0xfffe = reserved for IO subclasses encompased by InfiniBand +* Architecture. 0xffff = Vendor specific. This shall be set to 0xfff +* if the io_class component is 0xffff. +* +* protocol_ver +* Protocol specific. +* +* send_msg_depth +* Maximum depth of the send message queue. +* +* rdma_read_depth +* Maximum depth of the per-channel RDMA read queue. +* +* send_msg_size +* Maximum size of send messages. +* +* ctrl_ops_cap +* Supported operation types of this IO controller. A bit set to one +* for affirmation of supported capability. +* +* num_svc_entries +* Number of entries in the service entries table. +* +* id_string +* UTF-8 encoded string for identifying the controller to an operator. +* +* SEE ALSO +* ib_dm_mad_t +*********/ + +static inline uint32_t OSM_API +ib_ioc_profile_get_vend_id(IN const ib_ioc_profile_t * const p_ioc_profile) +{ + return (cl_ntoh32(p_ioc_profile->vend_id) >> 8); +} + +static inline void OSM_API +ib_ioc_profile_set_vend_id(IN ib_ioc_profile_t * const p_ioc_profile, + IN const uint32_t vend_id) +{ + p_ioc_profile->vend_id = (cl_hton32(vend_id) << 8); +} + +/****s* IBA Base: Types/ib_svc_entry_t +* NAME +* ib_svc_entry_t +* +* DESCRIPTION +* IBA defined IO Controller service entry structure (16.3.3.5) +* +* SYNOPSIS +*/ +typedef struct _ib_svc_entry { +#define MAX_SVC_ENTRY_NAME_LEN 40 + char name[MAX_SVC_ENTRY_NAME_LEN]; + ib_net64_t id; +} ib_svc_entry_t; +/* +* FIELDS +* name +* UTF-8 encoded, null-terminated name of the service. +* +* id +* An identifier of the associated Service. +* +* SEE ALSO +* ib_svc_entries_t +*********/ + +/****s* IBA Base: Types/ib_svc_entries_t +* NAME +* ib_svc_entries_t +* +* DESCRIPTION +* IBA defined IO Controller service entry array (16.3.3.5) +* +* SYNOPSIS +*/ +typedef struct _ib_svc_entries { +#define SVC_ENTRY_COUNT 4 + ib_svc_entry_t service_entry[SVC_ENTRY_COUNT]; +} ib_svc_entries_t; +/* +* FIELDS +* service_entry +* An array of IO controller service entries. +* +* SEE ALSO +* ib_dm_mad_t, ib_svc_entry_t +*********/ + +static inline void OSM_API +ib_dm_get_slot_lo_hi(IN const ib_net32_t slot_lo_hi, + OUT uint8_t * const p_slot, + OUT uint8_t * const p_lo, OUT uint8_t * const p_hi) +{ + ib_net32_t tmp_slot_lo_hi = CL_NTOH32(slot_lo_hi); + + if (p_slot) + *p_slot = (uint8_t) ((tmp_slot_lo_hi >> 16) & 0x0f); + if (p_hi) + *p_hi = (uint8_t) ((tmp_slot_lo_hi >> 8) & 0xff); + if (p_lo) + *p_lo = (uint8_t) ((tmp_slot_lo_hi >> 0) & 0xff); +} + +/* + * IBA defined information describing an I/O controller + */ +typedef struct _ib_ioc_info { + ib_net64_t module_guid; + ib_net64_t iou_guid; + ib_ioc_profile_t ioc_profile; + ib_net64_t access_key; + uint16_t initiators_conf; + uint8_t resv[38]; +} ib_ioc_info_t; + +/* + * The following definitions are shared between the Access Layer and VPD + */ +typedef struct _ib_ca *__ptr64 ib_ca_handle_t; +typedef struct _ib_pd *__ptr64 ib_pd_handle_t; +typedef struct _ib_rdd *__ptr64 ib_rdd_handle_t; +typedef struct _ib_mr *__ptr64 ib_mr_handle_t; +typedef struct _ib_mw *__ptr64 ib_mw_handle_t; +typedef struct _ib_qp *__ptr64 ib_qp_handle_t; +typedef struct _ib_eec *__ptr64 ib_eec_handle_t; +typedef struct _ib_cq *__ptr64 ib_cq_handle_t; +typedef struct _ib_av *__ptr64 ib_av_handle_t; +typedef struct _ib_mcast *__ptr64 ib_mcast_handle_t; + +/* Currently for windows branch, use the extended version of ib special verbs struct + in order to be compliant with Infinicon ib_types; later we'll change it to support + OpenSM ib_types.h */ + +#ifndef __WIN__ +/****d* Access Layer/ib_api_status_t +* NAME +* ib_api_status_t +* +* DESCRIPTION +* Function return codes indicating the success or failure of an API call. +* Note that success is indicated by the return value IB_SUCCESS, which +* is always zero. +* +* NOTES +* IB_VERBS_PROCESSING_DONE is used by UVP library to terminate a verbs call +* in the pre-ioctl step itself. +* +* SYNOPSIS +*/ +typedef enum _ib_api_status_t { + IB_SUCCESS, + IB_INSUFFICIENT_RESOURCES, + IB_INSUFFICIENT_MEMORY, + IB_INVALID_PARAMETER, + IB_INVALID_SETTING, + IB_NOT_FOUND, + IB_TIMEOUT, + IB_CANCELED, + IB_INTERRUPTED, + IB_INVALID_PERMISSION, + IB_UNSUPPORTED, + IB_OVERFLOW, + IB_MAX_MCAST_QPS_REACHED, + IB_INVALID_QP_STATE, + IB_INVALID_EEC_STATE, + IB_INVALID_APM_STATE, + IB_INVALID_PORT_STATE, + IB_INVALID_STATE, + IB_RESOURCE_BUSY, + IB_INVALID_PKEY, + IB_INVALID_LKEY, + IB_INVALID_RKEY, + IB_INVALID_MAX_WRS, + IB_INVALID_MAX_SGE, + IB_INVALID_CQ_SIZE, + IB_INVALID_SERVICE_TYPE, + IB_INVALID_GID, + IB_INVALID_LID, + IB_INVALID_GUID, + IB_INVALID_CA_HANDLE, + IB_INVALID_AV_HANDLE, + IB_INVALID_CQ_HANDLE, + IB_INVALID_EEC_HANDLE, + IB_INVALID_QP_HANDLE, + IB_INVALID_PD_HANDLE, + IB_INVALID_MR_HANDLE, + IB_INVALID_MW_HANDLE, + IB_INVALID_RDD_HANDLE, + IB_INVALID_MCAST_HANDLE, + IB_INVALID_CALLBACK, + IB_INVALID_AL_HANDLE, /* InfiniBand Access Layer */ + IB_INVALID_HANDLE, /* InfiniBand Access Layer */ + IB_ERROR, /* InfiniBand Access Layer */ + IB_REMOTE_ERROR, /* Infiniband Access Layer */ + IB_VERBS_PROCESSING_DONE, /* See Notes above */ + IB_INVALID_WR_TYPE, + IB_QP_IN_TIMEWAIT, + IB_EE_IN_TIMEWAIT, + IB_INVALID_PORT, + IB_NOT_DONE, + IB_UNKNOWN_ERROR /* ALWAYS LAST ENUM VALUE! */ +} ib_api_status_t; +/*****/ + +OSM_EXPORT const char *ib_error_str[]; + +/****f* IBA Base: Types/ib_get_err_str +* NAME +* ib_get_err_str +* +* DESCRIPTION +* Returns a string for the specified status value. +* +* SYNOPSIS +*/ +static inline const char *OSM_API ib_get_err_str(IN ib_api_status_t status) +{ + if (status > IB_UNKNOWN_ERROR) + status = IB_UNKNOWN_ERROR; + return (ib_error_str[status]); +} + +/* +* PARAMETERS +* status +* [in] status value +* +* RETURN VALUES +* Pointer to the status description string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****d* Verbs/ib_async_event_t +* NAME +* ib_async_event_t -- Async event types +* +* DESCRIPTION +* This type indicates the reason the async callback was called. +* The context in the ib_event_rec_t indicates the resource context +* that associated with the callback. For example, for IB_AE_CQ_ERROR +* the context provided during the ib_create_cq is returned in the event. +* +* SYNOPSIS +*/ +typedef enum _ib_async_event_t { + IB_AE_SQ_ERROR = 1, + IB_AE_SQ_DRAINED, + IB_AE_RQ_ERROR, + IB_AE_CQ_ERROR, + IB_AE_QP_FATAL, + IB_AE_QP_COMM, + IB_AE_QP_APM, + IB_AE_EEC_FATAL, + IB_AE_EEC_COMM, + IB_AE_EEC_APM, + IB_AE_LOCAL_FATAL, + IB_AE_PKEY_TRAP, + IB_AE_QKEY_TRAP, + IB_AE_MKEY_TRAP, + IB_AE_PORT_TRAP, + IB_AE_SYSIMG_GUID_TRAP, + IB_AE_BUF_OVERRUN, + IB_AE_LINK_INTEGRITY, + IB_AE_FLOW_CTRL_ERROR, + IB_AE_BKEY_TRAP, + IB_AE_QP_APM_ERROR, + IB_AE_EEC_APM_ERROR, + IB_AE_WQ_REQ_ERROR, + IB_AE_WQ_ACCESS_ERROR, + IB_AE_PORT_ACTIVE, + IB_AE_PORT_DOWN, + IB_AE_UNKNOWN /* ALWAYS LAST ENUM VALUE */ +} ib_async_event_t; +/* +* VALUES +* IB_AE_SQ_ERROR +* An error occurred when accessing the send queue of the QP or EEC. +* This event is optional. +* +* IB_AE_SQ_DRAINED +* The send queue of the specified QP has completed the outstanding +* messages in progress when the state change was requested and, if +* applicable, has received all acknowledgements for those messages. +* +* IB_AE_RQ_ERROR +* An error occurred when accessing the receive queue of the QP or EEC. +* This event is optional. +* +* IB_AE_CQ_ERROR +* An error occurred when writing an entry to the CQ. +* +* IB_AE_QP_FATAL +* A catastrophic error occurred while accessing or processing the +* work queue that prevents reporting of completions. +* +* IB_AE_QP_COMM +* The first packet has arrived for the receive work queue where the +* QP is still in the RTR state. +* +* IB_AE_QP_APM +* If alternate path migration is supported, this event indicates that +* the QP connection has migrated to the alternate path. +* +* IB_AE_EEC_FATAL +* If reliable datagram service is supported, this event indicates that +* a catastrophic error occurred while accessing or processing the EEC +* that prevents reporting of completions. +* +* IB_AE_EEC_COMM +* If reliable datagram service is supported, this event indicates that +* the first packet has arrived for the receive work queue where the +* EEC is still in the RTR state. +* +* IB_AE_EEC_APM +* If reliable datagram service and alternate path migration is supported, +* this event indicates that the EEC connection has migrated to the +* alternate path. +* +* IB_AE_LOCAL_FATAL +* A catastrophic HCA error occurred which cannot be attributed to +* any resource; behavior is indeterminate. +* +* IB_AE_PKEY_TRAP +* A PKEY violation was detected. This event is optional. +* +* IB_AE_QKEY_TRAP +* A QKEY violation was detected. This event is optional. +* +* IB_AE_MKEY_TRAP +* A MKEY violation was detected. This event is optional. +* +* IB_AE_PORT_TRAP +* A port capability change was detected. This event is optional. +* +* IB_AE_SYSIMG_GUID_TRAP +* If the system image GUID is supported, this event indicates that +* the system image GUID of this HCA has been changed. This event +* is optional. +* +* IB_AE_BUF_OVERRUN +* The number of consecutive flow control update periods with at least +* one overrun error in each period has exceeded the threshold specified +* in the port info attributes. This event is optional. +* +* IB_AE_LINK_INTEGRITY +* The detection of excessively frequent local physical errors has +* exceeded the threshold specified in the port info attributes. This +* event is optional. +* +* IB_AE_FLOW_CTRL_ERROR +* An HCA watchdog timer monitoring the arrival of flow control updates +* has expired without receiving an update. This event is optional. +* +* IB_AE_BKEY_TRAP +* An BKEY violation was detected. This event is optional. +* +* IB_AE_QP_APM_ERROR +* If alternate path migration is supported, this event indicates that +* an incoming path migration request to this QP was not accepted. +* +* IB_AE_EEC_APM_ERROR +* If reliable datagram service and alternate path migration is supported, +* this event indicates that an incoming path migration request to this +* EEC was not accepted. +* +* IB_AE_WQ_REQ_ERROR +* An OpCode violation was detected at the responder. +* +* IB_AE_WQ_ACCESS_ERROR +* An access violation was detected at the responder. +* +* IB_AE_PORT_ACTIVE +* If the port active event is supported, this event is generated +* when the link becomes active: IB_LINK_ACTIVE. +* +* IB_AE_PORT_DOWN +* The link is declared unavailable: IB_LINK_INIT, IB_LINK_ARMED, +* IB_LINK_DOWN. +* +* IB_AE_UNKNOWN +* An unknown error occurred which cannot be attributed to any +* resource; behavior is indeterminate. +* +*****/ + +OSM_EXPORT const char *ib_async_event_str[]; + +/****f* IBA Base: Types/ib_get_async_event_str +* NAME +* ib_get_async_event_str +* +* DESCRIPTION +* Returns a string for the specified asynchronous event. +* +* SYNOPSIS +*/ +static inline const char *OSM_API +ib_get_async_event_str(IN ib_async_event_t event) +{ + if (event > IB_AE_UNKNOWN) + event = IB_AE_UNKNOWN; + return (ib_async_event_str[event]); +} + +/* +* PARAMETERS +* event +* [in] event value +* +* RETURN VALUES +* Pointer to the asynchronous event description string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****s* Verbs/ib_event_rec_t +* NAME +* ib_event_rec_t -- Async event notification record +* +* DESCRIPTION +* When an async event callback is made, this structure is passed to indicate +* the type of event, the source of event that caused it, and the context +* associated with this event. +* +* context -- Context of the resource that caused the event. +* -- ca_context if this is a port/adapter event. +* -- qp_context if the source is a QP event +* -- cq_context if the source is a CQ event. +* -- ee_context if the source is an EE event. +* +* SYNOPSIS +*/ +typedef struct _ib_event_rec { + void *context; + ib_async_event_t type; + /* HCA vendor specific event information. */ + uint64_t vendor_specific; + /* The following structures are valid only for trap types. */ + union _trap { + struct { + uint16_t lid; + ib_net64_t port_guid; + uint8_t port_num; + /* + * The following structure is valid only for + * P_KEY, Q_KEY, and M_KEY violation traps. + */ + struct { + uint8_t sl; + uint16_t src_lid; + uint16_t dest_lid; + union _key { + uint16_t pkey; + uint32_t qkey; + uint64_t mkey; + } key; + uint32_t src_qp; + uint32_t dest_qp; + ib_gid_t src_gid; + ib_gid_t dest_gid; + } violation; + } info; + ib_net64_t sysimg_guid; + } trap; +} ib_event_rec_t; +/*******/ + +/****d* Access Layer/ib_atomic_t +* NAME +* ib_atomic_t +* +* DESCRIPTION +* Indicates atomicity levels supported by an adapter. +* +* SYNOPSIS +*/ +typedef enum _ib_atomic_t { + IB_ATOMIC_NONE, + IB_ATOMIC_LOCAL, + IB_ATOMIC_GLOBAL +} ib_atomic_t; +/* +* VALUES +* IB_ATOMIC_NONE +* Atomic operations not supported. +* +* IB_ATOMIC_LOCAL +* Atomic operations guaranteed between QPs of a single CA. +* +* IB_ATOMIC_GLOBAL +* Atomic operations are guaranteed between CA and any other entity +* in the system. +*****/ + +/****s* Access Layer/ib_port_cap_t +* NAME +* ib_port_cap_t +* +* DESCRIPTION +* Indicates which management agents are currently available on the specified +* port. +* +* SYNOPSIS +*/ +typedef struct _ib_port_cap { + boolean_t cm; + boolean_t snmp; + boolean_t dev_mgmt; + boolean_t vend; + boolean_t sm; + boolean_t sm_disable; + boolean_t qkey_ctr; + boolean_t pkey_ctr; + boolean_t notice; + boolean_t trap; + boolean_t apm; + boolean_t slmap; + boolean_t pkey_nvram; + boolean_t mkey_nvram; + boolean_t sysguid; + boolean_t dr_notice; + boolean_t boot_mgmt; + boolean_t capm_notice; + boolean_t reinit; + boolean_t ledinfo; + boolean_t port_active; +} ib_port_cap_t; +/*****/ + +/****d* Access Layer/ib_init_type_t +* NAME +* ib_init_type_t +* +* DESCRIPTION +* If supported by the HCA, the type of initialization requested by +* this port before SM moves it to the active or armed state. If the +* SM implements reinitialization, it shall set these bits to indicate +* the type of initialization performed prior to activating the port. +* Otherwise, these bits shall be set to 0. +* +* SYNOPSIS +*/ +typedef uint8_t ib_init_type_t; +#define IB_INIT_TYPE_NO_LOAD 0x01 +#define IB_INIT_TYPE_PRESERVE_CONTENT 0x02 +#define IB_INIT_TYPE_PRESERVE_PRESENCE 0x04 +#define IB_INIT_TYPE_DO_NOT_RESUSCITATE 0x08 +/*****/ + +/****s* Access Layer/ib_port_attr_mod_t +* NAME +* ib_port_attr_mod_t +* +* DESCRIPTION +* Port attributes that may be modified. +* +* SYNOPSIS +*/ +typedef struct _ib_port_attr_mod { + ib_port_cap_t cap; + uint16_t pkey_ctr; + uint16_t qkey_ctr; + ib_init_type_t init_type; + ib_net64_t system_image_guid; +} ib_port_attr_mod_t; +/* +* SEE ALSO +* ib_port_cap_t +*****/ + +/****s* Access Layer/ib_port_attr_t +* NAME +* ib_port_attr_t +* +* DESCRIPTION +* Information about a port on a given channel adapter. +* +* SYNOPSIS +*/ +typedef struct _ib_port_attr { + ib_net64_t port_guid; + uint8_t port_num; + uint8_t mtu; + uint64_t max_msg_size; + ib_net16_t lid; + uint8_t lmc; + /* + * LinkWidthSupported as defined in PortInfo. Required to calculate + * inter-packet delay (a.k.a. static rate). + */ + uint8_t link_width_supported; + uint16_t max_vls; + ib_net16_t sm_lid; + uint8_t sm_sl; + uint8_t link_state; + ib_init_type_t init_type_reply; /* Optional */ + /* + * subnet_timeout: + * The maximum expected subnet propagation delay to reach any port on + * the subnet. This value also determines the rate at which traps can + * be generated from this node. + * + * timeout = 4.096 microseconds * 2^subnet_timeout + */ + uint8_t subnet_timeout; + ib_port_cap_t cap; + uint16_t pkey_ctr; + uint16_t qkey_ctr; + uint16_t num_gids; + uint16_t num_pkeys; + /* + * Pointers at the end of the structure to allow doing a simple + * memory comparison of contents up to the first pointer. + */ + ib_gid_t *p_gid_table; + ib_net16_t *p_pkey_table; +} ib_port_attr_t; +/* +* SEE ALSO +* uint8_t, ib_port_cap_t, ib_link_states_t +*****/ + +/****s* Access Layer/ib_ca_attr_t +* NAME +* ib_ca_attr_t +* +* DESCRIPTION +* Information about a channel adapter. +* +* SYNOPSIS +*/ +typedef struct _ib_ca_attr { + ib_net64_t ca_guid; + uint32_t vend_id; + uint16_t dev_id; + uint16_t revision; + uint64_t fw_ver; + /* + * Total size of the ca attributes in bytes + */ + uint32_t size; + uint32_t max_qps; + uint32_t max_wrs; + uint32_t max_sges; + uint32_t max_rd_sges; + uint32_t max_cqs; + uint32_t max_cqes; + uint32_t max_pds; + uint32_t init_regions; + uint64_t init_region_size; + uint32_t init_windows; + uint32_t max_addr_handles; + uint32_t max_partitions; + ib_atomic_t atomicity; + uint8_t max_qp_resp_res; + uint8_t max_eec_resp_res; + uint8_t max_resp_res; + uint8_t max_qp_init_depth; + uint8_t max_eec_init_depth; + uint32_t max_eecs; + uint32_t max_rdds; + uint32_t max_ipv6_qps; + uint32_t max_ether_qps; + uint32_t max_mcast_grps; + uint32_t max_mcast_qps; + uint32_t max_qps_per_mcast_grp; + uint32_t max_fmr; + uint32_t max_map_per_fmr; + /* + * local_ack_delay: + * Specifies the maximum time interval between the local CA receiving + * a message and the transmission of the associated ACK or NAK. + * + * timeout = 4.096 microseconds * 2^local_ack_delay + */ + uint8_t local_ack_delay; + boolean_t bad_pkey_ctr_support; + boolean_t bad_qkey_ctr_support; + boolean_t raw_mcast_support; + boolean_t apm_support; + boolean_t av_port_check; + boolean_t change_primary_port; + boolean_t modify_wr_depth; + boolean_t current_qp_state_support; + boolean_t shutdown_port_capability; + boolean_t init_type_support; + boolean_t port_active_event_support; + boolean_t system_image_guid_support; + boolean_t hw_agents; + ib_net64_t system_image_guid; + uint32_t num_page_sizes; + uint8_t num_ports; + uint32_t *p_page_size; + ib_port_attr_t *p_port_attr; +} ib_ca_attr_t; +/* +* FIELDS +* ca_guid +* GUID for this adapter. +* +* vend_id +* IEEE vendor ID for this adapter +* +* dev_id +* Device ID of this adapter. (typically from PCI device ID) +* +* revision +* Revision ID of this adapter +* +* fw_ver +* Device Firmware version. +* +* size +* Total size in bytes for the HCA attributes. This size includes total +* size required for all the variable members of the structure. If a +* vendor requires to pass vendor specific fields beyond this structure, +* the HCA vendor can choose to report a larger size. If a vendor is +* reporting extended vendor specific features, they should also provide +* appropriate access functions to aid with the required interpretation. +* +* max_qps +* Maximum number of QP's supported by this HCA. +* +* max_wrs +* Maximum number of work requests supported by this HCA. +* +* max_sges +* Maximum number of scatter gather elements supported per work request. +* +* max_rd_sges +* Maximum number of scatter gather elements supported for READ work +* requests for a Reliable Datagram QP. This value must be zero if RD +* service is not supported. +* +* max_cqs +* Maximum number of Completion Queues supported. +* +* max_cqes +* Maximum number of CQ elements supported per CQ. +* +* max_pds +* Maximum number of protection domains supported. +* +* init_regions +* Initial number of memory regions supported. These are only informative +* values. HCA vendors can extended and grow these limits on demand. +* +* init_region_size +* Initial limit on the size of the registered memory region. +* +* init_windows +* Initial number of window entries supported. +* +* max_addr_handles +* Maximum number of address handles supported. +* +* max_partitions +* Maximum number of partitions supported. +* +* atomicity +* Indicates level of atomic operations supported by this HCA. +* +* max_qp_resp_res +* max_eec_resp_res +* Maximum limit on number of responder resources for incoming RDMA +* operations, on QPs and EEC's respectively. +* +* max_resp_res +* Maximum number of responder resources per HCA, with this HCA used as +* the target. +* +* max_qp_init_depth +* max_eec_init_depth +* Maximimum initiator depth per QP or EEC for initiating RDMA reads and +* atomic operations. +* +* max_eecs +* Maximimum number of EEC's supported by the HCA. +* +* max_rdds +* Maximum number of Reliable datagram domains supported. +* +* max_ipv6_qps +* max_ether_qps +* Maximum number of IPV6 and raw ether QP's supported by this HCA. +* +* max_mcast_grps +* Maximum number of multicast groups supported. +* +* max_mcast_qps +* Maximum number of QP's that can support multicast operations. +* +* max_qps_per_mcast_grp +* Maximum number of multicast QP's per multicast group. +* +* local_ack_delay +* Specifies the maximum time interval between the local CA receiving +* a message and the transmission of the associated ACK or NAK. +* timeout = 4.096 microseconds * 2^local_ack_delay +* +* bad_pkey_ctr_support +* bad_qkey_ctr_support +* Indicates support for the bad pkey and qkey counters. +* +* raw_mcast_support +* Indicates support for raw packet multicast. +* +* apm_support +* Indicates support for Automatic Path Migration. +* +* av_port_check +* Indicates ability to check port number in address handles. +* +* change_primary_port +* Indicates ability to change primary port for a QP or EEC during a +* SQD->RTS transition. +* +* modify_wr_depth +* Indicates ability to modify QP depth during a modify QP operation. +* Check the verb specification for permitted states. +* +* current_qp_state_support +* Indicates ability of the HCA to support the current QP state modifier +* during a modify QP operation. +* +* shutdown_port_capability +* Shutdown port capability support indicator. +* +* init_type_support +* Indicates init_type_reply and ability to set init_type is supported. +* +* port_active_event_support +* Port active event support indicator. +* +* system_image_guid_support +* System image GUID support indicator. +* +* hw_agents +* Indicates SMA is implemented in HW. +* +* system_image_guid +* Optional system image GUID. This field is valid only if the +* system_image_guid_support flag is set. +* +* num_page_sizes +* Indicates support for different page sizes supported by the HCA. +* The variable size array can be obtained from p_page_size. +* +* num_ports +* Number of physical ports supported on this HCA. +* +* p_page_size +* Array holding different page size supported. +* +* p_port_attr +* Array holding port attributes. +* +* NOTES +* This structure contains the attributes of a channel adapter. Users must +* call ib_copy_ca_attr to copy the contents of this structure to a new +* memory region. +* +* SEE ALSO +* ib_port_attr_t, ib_atomic_t, ib_copy_ca_attr +*****/ + +/****f* Access layer/ib_copy_ca_attr +* NAME +* ib_copy_ca_attr +* +* DESCRIPTION +* Copies CA attributes. +* +* SYNOPSIS +*/ +ib_ca_attr_t *ib_copy_ca_attr(IN ib_ca_attr_t * const p_dest, + IN const ib_ca_attr_t * const p_src); +/* +* PARAMETERS +* p_dest +* Pointer to the buffer that is the destination of the copy. +* +* p_src +* Pointer to the CA attributes to copy. +* +* RETURN VALUE +* Pointer to the copied CA attributes. +* +* NOTES +* The buffer pointed to by the p_dest parameter must be at least the size +* specified in the size field of the buffer pointed to by p_src. +* +* SEE ALSO +* ib_ca_attr_t, ib_dup_ca_attr, ib_free_ca_attr +*****/ + +/****s* Access Layer/ib_av_attr_t +* NAME +* ib_av_attr_t +* +* DESCRIPTION +* IBA address vector. +* +* SYNOPSIS +*/ +typedef struct _ib_av_attr { + uint8_t port_num; + uint8_t sl; + ib_net16_t dlid; + boolean_t grh_valid; + ib_grh_t grh; + uint8_t static_rate; + uint8_t path_bits; + struct _av_conn { + uint8_t path_mtu; + uint8_t local_ack_timeout; + uint8_t seq_err_retry_cnt; + uint8_t rnr_retry_cnt; + } conn; +} ib_av_attr_t; +/* +* SEE ALSO +* ib_gid_t +*****/ + +/****d* Access Layer/ib_qp_type_t +* NAME +* ib_qp_type_t +* +* DESCRIPTION +* Indicates the type of queue pair being created. +* +* SYNOPSIS +*/ +typedef enum _ib_qp_type { + IB_QPT_RELIABLE_CONN = 0, /* Matches CM REQ transport type */ + IB_QPT_UNRELIABLE_CONN = 1, /* Matches CM REQ transport type */ + IB_QPT_RELIABLE_DGRM = 2, /* Matches CM REQ transport type */ + IB_QPT_UNRELIABLE_DGRM, + IB_QPT_QP0, + IB_QPT_QP1, + IB_QPT_RAW_IPV6, + IB_QPT_RAW_ETHER, + IB_QPT_MAD, /* InfiniBand Access Layer */ + IB_QPT_QP0_ALIAS, /* InfiniBand Access Layer */ + IB_QPT_QP1_ALIAS /* InfiniBand Access Layer */ +} ib_qp_type_t; +/* +* VALUES +* IB_QPT_RELIABLE_CONN +* Reliable, connected queue pair. +* +* IB_QPT_UNRELIABLE_CONN +* Unreliable, connected queue pair. +* +* IB_QPT_RELIABLE_DGRM +* Reliable, datagram queue pair. +* +* IB_QPT_UNRELIABLE_DGRM +* Unreliable, datagram queue pair. +* +* IB_QPT_QP0 +* Queue pair 0. +* +* IB_QPT_QP1 +* Queue pair 1. +* +* IB_QPT_RAW_DGRM +* Raw datagram queue pair. +* +* IB_QPT_RAW_IPV6 +* Raw IP version 6 queue pair. +* +* IB_QPT_RAW_ETHER +* Raw Ethernet queue pair. +* +* IB_QPT_MAD +* Unreliable, datagram queue pair that will send and receive management +* datagrams with assistance from the access layer. +* +* IB_QPT_QP0_ALIAS +* Alias to queue pair 0. Aliased QPs can only be created on an aliased +* protection domain. +* +* IB_QPT_QP1_ALIAS +* Alias to queue pair 1. Aliased QPs can only be created on an aliased +* protection domain. +*****/ + +/****d* Access Layer/ib_access_t +* NAME +* ib_access_t +* +* DESCRIPTION +* Indicates the type of access is permitted on resources such as QPs, +* memory regions and memory windows. +* +* SYNOPSIS +*/ +typedef uint32_t ib_access_t; +#define IB_AC_RDMA_READ 0x00000001 +#define IB_AC_RDMA_WRITE 0x00000002 +#define IB_AC_ATOMIC 0x00000004 +#define IB_AC_LOCAL_WRITE 0x00000008 +#define IB_AC_MW_BIND 0x00000010 +/* +* NOTES +* Users may combine access rights using a bit-wise or operation to specify +* additional access. For example: IB_AC_RDMA_READ | IB_AC_RDMA_WRITE grants +* RDMA read and write access. +*****/ + +/****d* Access Layer/ib_qp_state_t +* NAME +* ib_qp_state_t +* +* DESCRIPTION +* Indicates or sets the state of a queue pair. The current state of a queue +* pair is returned through the ib_qp_query call and set via the +* ib_qp_modify call. +* +* SYNOPSIS +*/ +typedef uint32_t ib_qp_state_t; +#define IB_QPS_RESET 0x00000001 +#define IB_QPS_INIT 0x00000002 +#define IB_QPS_RTR 0x00000004 +#define IB_QPS_RTS 0x00000008 +#define IB_QPS_SQD 0x00000010 +#define IB_QPS_SQD_DRAINING 0x00000030 +#define IB_QPS_SQD_DRAINED 0x00000050 +#define IB_QPS_SQERR 0x00000080 +#define IB_QPS_ERROR 0x00000100 +#define IB_QPS_TIME_WAIT 0xDEAD0000 /* InfiniBand Access Layer */ +/*****/ + +/****d* Access Layer/ib_apm_state_t +* NAME +* ib_apm_state_t +* +* DESCRIPTION +* The current automatic path migration state of a queue pair +* +* SYNOPSIS +*/ +typedef enum _ib_apm_state { + IB_APM_MIGRATED = 1, + IB_APM_REARM, + IB_APM_ARMED +} ib_apm_state_t; +/*****/ + +/****s* Access Layer/ib_qp_create_t +* NAME +* ib_qp_create_t +* +* DESCRIPTION +* Attributes used to initialize a queue pair at creation time. +* +* SYNOPSIS +*/ +typedef struct _ib_qp_create { + ib_qp_type_t qp_type; + ib_rdd_handle_t h_rdd; + uint32_t sq_depth; + uint32_t rq_depth; + uint32_t sq_sge; + uint32_t rq_sge; + ib_cq_handle_t h_sq_cq; + ib_cq_handle_t h_rq_cq; + boolean_t sq_signaled; +} ib_qp_create_t; +/* +* FIELDS +* type +* Specifies the type of queue pair to create. +* +* h_rdd +* A handle to a reliable datagram domain to associate with the queue +* pair. This field is ignored if the queue pair is not a reliable +* datagram type queue pair. +* +* sq_depth +* Indicates the requested maximum number of work requests that may be +* outstanding on the queue pair's send queue. This value must be less +* than or equal to the maximum reported by the channel adapter associated +* with the queue pair. +* +* rq_depth +* Indicates the requested maximum number of work requests that may be +* outstanding on the queue pair's receive queue. This value must be less +* than or equal to the maximum reported by the channel adapter associated +* with the queue pair. +* +* sq_sge +* Indicates the maximum number scatter-gather elements that may be +* given in a send work request. This value must be less +* than or equal to the maximum reported by the channel adapter associated +* with the queue pair. +* +* rq_sge +* Indicates the maximum number scatter-gather elements that may be +* given in a receive work request. This value must be less +* than or equal to the maximum reported by the channel adapter associated +* with the queue pair. +* +* h_sq_cq +* A handle to the completion queue that will be used to report send work +* request completions. This handle must be NULL if the type is +* IB_QPT_MAD, IB_QPT_QP0_ALIAS, or IB_QPT_QP1_ALIAS. +* +* h_rq_cq +* A handle to the completion queue that will be used to report receive +* work request completions. This handle must be NULL if the type is +* IB_QPT_MAD, IB_QPT_QP0_ALIAS, or IB_QPT_QP1_ALIAS. +* +* sq_signaled +* A flag that is used to indicate whether the queue pair will signal +* an event upon completion of a send work request. If set to +* TRUE, send work requests will always generate a completion +* event. If set to FALSE, a completion event will only be +* generated if the send_opt field of the send work request has the +* IB_SEND_OPT_SIGNALED flag set. +* +* SEE ALSO +* ib_qp_type_t, ib_qp_attr_t +*****/ + +/****s* Access Layer/ib_qp_attr_t +* NAME +* ib_qp_attr_t +* +* DESCRIPTION +* Queue pair attributes returned through ib_query_qp. +* +* SYNOPSIS +*/ +typedef struct _ib_qp_attr { + ib_pd_handle_t h_pd; + ib_qp_type_t qp_type; + ib_access_t access_ctrl; + uint16_t pkey_index; + uint32_t sq_depth; + uint32_t rq_depth; + uint32_t sq_sge; + uint32_t rq_sge; + uint8_t init_depth; + uint8_t resp_res; + ib_cq_handle_t h_sq_cq; + ib_cq_handle_t h_rq_cq; + ib_rdd_handle_t h_rdd; + boolean_t sq_signaled; + ib_qp_state_t state; + ib_net32_t num; + ib_net32_t dest_num; + ib_net32_t qkey; + ib_net32_t sq_psn; + ib_net32_t rq_psn; + uint8_t primary_port; + uint8_t alternate_port; + ib_av_attr_t primary_av; + ib_av_attr_t alternate_av; + ib_apm_state_t apm_state; +} ib_qp_attr_t; +/* +* FIELDS +* h_pd +* This is a handle to a protection domain associated with the queue +* pair, or NULL if the queue pair is type IB_QPT_RELIABLE_DGRM. +* +* NOTES +* Other fields are defined by the Infiniband specification. +* +* SEE ALSO +* ib_qp_type_t, ib_access_t, ib_qp_state_t, ib_av_attr_t, ib_apm_state_t +*****/ + +/****d* Access Layer/ib_qp_opts_t +* NAME +* ib_qp_opts_t +* +* DESCRIPTION +* Optional fields supplied in the modify QP operation. +* +* SYNOPSIS +*/ +typedef uint32_t ib_qp_opts_t; +#define IB_MOD_QP_ALTERNATE_AV 0x00000001 +#define IB_MOD_QP_PKEY 0x00000002 +#define IB_MOD_QP_APM_STATE 0x00000004 +#define IB_MOD_QP_PRIMARY_AV 0x00000008 +#define IB_MOD_QP_RNR_NAK_TIMEOUT 0x00000010 +#define IB_MOD_QP_RESP_RES 0x00000020 +#define IB_MOD_QP_INIT_DEPTH 0x00000040 +#define IB_MOD_QP_PRIMARY_PORT 0x00000080 +#define IB_MOD_QP_ACCESS_CTRL 0x00000100 +#define IB_MOD_QP_QKEY 0x00000200 +#define IB_MOD_QP_SQ_DEPTH 0x00000400 +#define IB_MOD_QP_RQ_DEPTH 0x00000800 +#define IB_MOD_QP_CURRENT_STATE 0x00001000 +#define IB_MOD_QP_RETRY_CNT 0x00002000 +#define IB_MOD_QP_LOCAL_ACK_TIMEOUT 0x00004000 +#define IB_MOD_QP_RNR_RETRY_CNT 0x00008000 +/* +* SEE ALSO +* ib_qp_mod_t +*****/ + +/****s* Access Layer/ib_qp_mod_t +* NAME +* ib_qp_mod_t +* +* DESCRIPTION +* Information needed to change the state of a queue pair through the +* ib_modify_qp call. +* +* SYNOPSIS +*/ +typedef struct _ib_qp_mod { + ib_qp_state_t req_state; + union _qp_state { + struct _qp_reset { + /* + * Time, in milliseconds, that the QP needs to spend in + * the time wait state before being reused. + */ + uint32_t timewait; + } reset; + struct _qp_init { + ib_qp_opts_t opts; + uint8_t primary_port; + ib_net32_t qkey; + uint16_t pkey_index; + ib_access_t access_ctrl; + } init; + struct _qp_rtr { + ib_net32_t rq_psn; + ib_net32_t dest_qp; + ib_av_attr_t primary_av; + uint8_t resp_res; + ib_qp_opts_t opts; + ib_av_attr_t alternate_av; + ib_net32_t qkey; + uint16_t pkey_index; + ib_access_t access_ctrl; + uint32_t sq_depth; + uint32_t rq_depth; + uint8_t rnr_nak_timeout; + } rtr; + struct _qp_rts { + ib_net32_t sq_psn; + uint8_t retry_cnt; + uint8_t rnr_retry_cnt; + uint8_t rnr_nak_timeout; + uint8_t local_ack_timeout; + uint8_t init_depth; + ib_qp_opts_t opts; + ib_qp_state_t current_state; + ib_net32_t qkey; + ib_access_t access_ctrl; + uint8_t resp_res; + ib_av_attr_t primary_av; + ib_av_attr_t alternate_av; + uint32_t sq_depth; + uint32_t rq_depth; + ib_apm_state_t apm_state; + uint8_t primary_port; + uint16_t pkey_index; + } rts; + struct _qp_sqd { + boolean_t sqd_event; + } sqd; + } state; +} ib_qp_mod_t; +/* +* SEE ALSO +* ib_qp_state_t, ib_access_t, ib_av_attr_t, ib_apm_state_t +*****/ + +/****s* Access Layer/ib_eec_attr_t +* NAME +* ib_eec_attr_t +* +* DESCRIPTION +* Information about an end-to-end context. +* +* SYNOPSIS +*/ +typedef struct _ib_eec_attr { + ib_qp_state_t state; + ib_rdd_handle_t h_rdd; + ib_net32_t local_eecn; + ib_net32_t sq_psn; + ib_net32_t rq_psn; + uint8_t primary_port; + uint16_t pkey_index; + uint32_t resp_res; + ib_net32_t remote_eecn; + uint32_t init_depth; + uint32_t dest_num; // ??? What is this? + ib_av_attr_t primary_av; + ib_av_attr_t alternate_av; + ib_apm_state_t apm_state; +} ib_eec_attr_t; +/* +* SEE ALSO +* ib_qp_state_t, ib_av_attr_t, ib_apm_state_t +*****/ + +/****d* Access Layer/ib_eec_opts_t +* NAME +* ib_eec_opts_t +* +* DESCRIPTION +* Optional fields supplied in the modify EEC operation. +* +* SYNOPSIS +*/ +typedef uint32_t ib_eec_opts_t; +#define IB_MOD_EEC_ALTERNATE_AV 0x00000001 +#define IB_MOD_EEC_PKEY 0x00000002 +#define IB_MOD_EEC_APM_STATE 0x00000004 +#define IB_MOD_EEC_PRIMARY_AV 0x00000008 +#define IB_MOD_EEC_RNR 0x00000010 +#define IB_MOD_EEC_RESP_RES 0x00000020 +#define IB_MOD_EEC_OUTSTANDING 0x00000040 +#define IB_MOD_EEC_PRIMARY_PORT 0x00000080 +/* +* NOTES +* +* +*****/ + +/****s* Access Layer/ib_eec_mod_t +* NAME +* ib_eec_mod_t +* +* DESCRIPTION +* Information needed to change the state of an end-to-end context through +* the ib_modify_eec function. +* +* SYNOPSIS +*/ +typedef struct _ib_eec_mod { + ib_qp_state_t req_state; + union _eec_state { + struct _eec_init { + uint8_t primary_port; + uint16_t pkey_index; + } init; + struct _eec_rtr { + ib_net32_t rq_psn; + ib_net32_t remote_eecn; + ib_av_attr_t primary_av; + uint8_t resp_res; + ib_eec_opts_t opts; + ib_av_attr_t alternate_av; + uint16_t pkey_index; + } rtr; + struct _eec_rts { + ib_net32_t sq_psn; + uint8_t retry_cnt; + uint8_t rnr_retry_cnt; + uint8_t local_ack_timeout; + uint8_t init_depth; + ib_eec_opts_t opts; + ib_av_attr_t alternate_av; + ib_apm_state_t apm_state; + ib_av_attr_t primary_av; + uint16_t pkey_index; + uint8_t primary_port; + } rts; + struct _eec_sqd { + boolean_t sqd_event; + } sqd; + } state; +} ib_eec_mod_t; +/* +* SEE ALSO +* ib_qp_state_t, ib_av_attr_t, ib_apm_state_t +*****/ + +/****d* Access Layer/ib_wr_type_t +* NAME +* ib_wr_type_t +* +* DESCRIPTION +* Identifies the type of work request posted to a queue pair. +* +* SYNOPSIS +*/ +typedef enum _ib_wr_type_t { + WR_SEND = 1, + WR_RDMA_WRITE, + WR_RDMA_READ, + WR_COMPARE_SWAP, + WR_FETCH_ADD +} ib_wr_type_t; +/*****/ + +/****s* Access Layer/ib_local_ds_t +* NAME +* ib_local_ds_t +* +* DESCRIPTION +* Local data segment information referenced by send and receive work +* requests. This is used to specify local data buffers used as part of a +* work request. +* +* SYNOPSIS +*/ +typedef struct _ib_local_ds { + void *vaddr; + uint32_t length; + uint32_t lkey; +} ib_local_ds_t; +/*****/ + +/****d* Access Layer/ib_send_opt_t +* NAME +* ib_send_opt_t +* +* DESCRIPTION +* Optional flags used when posting send work requests. These flags +* indicate specific processing for the send operation. +* +* SYNOPSIS +*/ +typedef uint32_t ib_send_opt_t; +#define IB_SEND_OPT_IMMEDIATE 0x00000001 +#define IB_SEND_OPT_FENCE 0x00000002 +#define IB_SEND_OPT_SIGNALED 0x00000004 +#define IB_SEND_OPT_SOLICITED 0x00000008 +#define IB_SEND_OPT_INLINE 0x00000010 +#define IB_SEND_OPT_LOCAL 0x00000020 +#define IB_SEND_OPT_VEND_MASK 0xFFFF0000 +/* +* VALUES +* The following flags determine the behavior of a work request when +* posted to the send side. +* +* IB_SEND_OPT_IMMEDIATE +* Send immediate data with the given request. +* +* IB_SEND_OPT_FENCE +* The operation is fenced. Complete all pending send operations +* before processing this request. +* +* IB_SEND_OPT_SIGNALED +* If the queue pair is configured for signaled completion, then +* generate a completion queue entry when this request completes. +* +* IB_SEND_OPT_SOLICITED +* Set the solicited bit on the last packet of this request. +* +* IB_SEND_OPT_INLINE +* Indicates that the requested send data should be copied into a VPD +* owned data buffer. This flag permits the user to issue send operations +* without first needing to register the buffer(s) associated with the +* send operation. Verb providers that support this operation may place +* vendor specific restrictions on the size of send operation that may +* be performed as inline. +* +* +* IB_SEND_OPT_LOCAL +* Indicates that a sent MAD request should be given to the local VPD for +* processing. MADs sent using this option are not placed on the wire. +* This send option is only valid for MAD send operations. +* +* +* IB_SEND_OPT_VEND_MASK +* This mask indicates bits reserved in the send options that may be used +* by the verbs provider to indicate vendor specific options. Bits set +* in this area of the send options are ignored by the Access Layer, but +* may have specific meaning to the underlying VPD. +* +*****/ + +/****s* Access Layer/ib_send_wr_t +* NAME +* ib_send_wr_t +* +* DESCRIPTION +* Information used to submit a work request to the send queue of a queue +* pair. +* +* SYNOPSIS +*/ +typedef struct _ib_send_wr { + struct _ib_send_wr *p_next; + uint64_t wr_id; + ib_wr_type_t wr_type; + ib_send_opt_t send_opt; + uint32_t num_ds; + ib_local_ds_t *ds_array; + ib_net32_t immediate_data; + union _send_dgrm { + struct _send_ud { + ib_net32_t remote_qp; + ib_net32_t remote_qkey; + ib_av_handle_t h_av; + } ud; + struct _send_rd { + ib_net32_t remote_qp; + ib_net32_t remote_qkey; + ib_net32_t eecn; + } rd; + struct _send_raw_ether { + ib_net16_t dest_lid; + uint8_t path_bits; + uint8_t sl; + uint8_t max_static_rate; + ib_net16_t ether_type; + } raw_ether; + struct _send_raw_ipv6 { + ib_net16_t dest_lid; + uint8_t path_bits; + uint8_t sl; + uint8_t max_static_rate; + } raw_ipv6; + } dgrm; + struct _send_remote_ops { + uint64_t vaddr; + uint32_t rkey; + ib_net64_t atomic1; + ib_net64_t atomic2; + } remote_ops; +} ib_send_wr_t; +/* +* FIELDS +* p_next +* A pointer used to chain work requests together. This permits multiple +* work requests to be posted to a queue pair through a single function +* call. This value is set to NULL to mark the end of the chain. +* +* wr_id +* A 64-bit work request identifier that is returned to the consumer +* as part of the work completion. +* +* wr_type +* The type of work request being submitted to the send queue. +* +* send_opt +* Optional send control parameters. +* +* num_ds +* Number of local data segments specified by this work request. +* +* ds_array +* A reference to an array of local data segments used by the send +* operation. +* +* immediate_data +* 32-bit field sent as part of a message send or RDMA write operation. +* This field is only valid if the send_opt flag IB_SEND_OPT_IMMEDIATE +* has been set. +* +* dgrm.ud.remote_qp +* Identifies the destination queue pair of an unreliable datagram send +* operation. +* +* dgrm.ud.remote_qkey +* The qkey for the destination queue pair. +* +* dgrm.ud.h_av +* An address vector that specifies the path information used to route +* the outbound datagram to the destination queue pair. +* +* dgrm.rd.remote_qp +* Identifies the destination queue pair of a reliable datagram send +* operation. +* +* dgrm.rd.remote_qkey +* The qkey for the destination queue pair. +* +* dgrm.rd.eecn +* The local end-to-end context number to use with the reliable datagram +* send operation. +* +* dgrm.raw_ether.dest_lid +* The destination LID that will receive this raw ether send. +* +* dgrm.raw_ether.path_bits +* path bits... +* +* dgrm.raw_ether.sl +* service level... +* +* dgrm.raw_ether.max_static_rate +* static rate... +* +* dgrm.raw_ether.ether_type +* ether type... +* +* dgrm.raw_ipv6.dest_lid +* The destination LID that will receive this raw ether send. +* +* dgrm.raw_ipv6.path_bits +* path bits... +* +* dgrm.raw_ipv6.sl +* service level... +* +* dgrm.raw_ipv6.max_static_rate +* static rate... +* +* remote_ops.vaddr +* The registered virtual memory address of the remote memory to access +* with an RDMA or atomic operation. +* +* remote_ops.rkey +* The rkey associated with the specified remote vaddr. This data must +* be presented exactly as obtained from the remote node. No swapping +* of data must be performed. +* +* atomic1 +* The first operand for an atomic operation. +* +* atomic2 +* The second operand for an atomic operation. +* +* NOTES +* The format of data sent over the fabric is user-defined and is considered +* opaque to the access layer. The sole exception to this are MADs posted +* to a MAD QP service. MADs are expected to match the format defined by +* the Infiniband specification and must be in network-byte order when posted +* to the MAD QP service. +* +* SEE ALSO +* ib_wr_type_t, ib_local_ds_t, ib_send_opt_t +*****/ + +/****s* Access Layer/ib_recv_wr_t +* NAME +* ib_recv_wr_t +* +* DESCRIPTION +* Information used to submit a work request to the receive queue of a queue +* pair. +* +* SYNOPSIS +*/ +typedef struct _ib_recv_wr { + struct _ib_recv_wr *p_next; + uint64_t wr_id; + uint32_t num_ds; + ib_local_ds_t *ds_array; +} ib_recv_wr_t; +/* +* FIELDS +* p_next +* A pointer used to chain work requests together. This permits multiple +* work requests to be posted to a queue pair through a single function +* call. This value is set to NULL to mark the end of the chain. +* +* wr_id +* A 64-bit work request identifier that is returned to the consumer +* as part of the work completion. +* +* num_ds +* Number of local data segments specified by this work request. +* +* ds_array +* A reference to an array of local data segments used by the send +* operation. +* +* SEE ALSO +* ib_local_ds_t +*****/ + +/****s* Access Layer/ib_bind_wr_t +* NAME +* ib_bind_wr_t +* +* DESCRIPTION +* Information used to submit a memory window bind work request to the send +* queue of a queue pair. +* +* SYNOPSIS +*/ +typedef struct _ib_bind_wr { + uint64_t wr_id; + ib_send_opt_t send_opt; + ib_mr_handle_t h_mr; + ib_access_t access_ctrl; + uint32_t current_rkey; + ib_local_ds_t local_ds; +} ib_bind_wr_t; +/* +* FIELDS +* wr_id +* A 64-bit work request identifier that is returned to the consumer +* as part of the work completion. +* +* send_opt +* Optional send control parameters. +* +* h_mr +* Handle to the memory region to which this window is being bound. +* +* access_ctrl +* Access rights for this memory window. +* +* current_rkey +* The current rkey assigned to this window for remote access. +* +* local_ds +* A reference to a local data segment used by the bind operation. +* +* SEE ALSO +* ib_send_opt_t, ib_access_t, ib_local_ds_t +*****/ + +/****d* Access Layer/ib_wc_status_t +* NAME +* ib_wc_status_t +* +* DESCRIPTION +* Indicates the status of a completed work request. These VALUES are +* returned to the user when retrieving completions. Note that success is +* identified as IB_WCS_SUCCESS, which is always zero. +* +* SYNOPSIS +*/ +typedef enum _ib_wc_status_t { + IB_WCS_SUCCESS, + IB_WCS_LOCAL_LEN_ERR, + IB_WCS_LOCAL_OP_ERR, + IB_WCS_LOCAL_EEC_OP_ERR, + IB_WCS_LOCAL_PROTECTION_ERR, + IB_WCS_WR_FLUSHED_ERR, + IB_WCS_MEM_WINDOW_BIND_ERR, + IB_WCS_REM_ACCESS_ERR, + IB_WCS_REM_OP_ERR, + IB_WCS_RNR_RETRY_ERR, + IB_WCS_TIMEOUT_RETRY_ERR, + IB_WCS_REM_INVALID_REQ_ERR, + IB_WCS_REM_INVALID_RD_REQ_ERR, + IB_WCS_INVALID_EECN, + IB_WCS_INVALID_EEC_STATE, + IB_WCS_UNMATCHED_RESPONSE, /* InfiniBand Access Layer */ + IB_WCS_CANCELED, /* InfiniBand Access Layer */ + IB_WCS_UNKNOWN /* Must be last. */ +} ib_wc_status_t; +/* +* VALUES +* IB_WCS_SUCCESS +* Work request completed successfully. +* +* IB_WCS_MAD +* The completed work request was associated with a managmenet datagram +* that requires post processing. The MAD will be returned to the user +* through a callback once all post processing has completed. +* +* IB_WCS_LOCAL_LEN_ERR +* Generated for a work request posted to the send queue when the +* total of the data segment lengths exceeds the message length of the +* channel. Generated for a work request posted to the receive queue when +* the total of the data segment lengths is too small for a +* valid incoming message. +* +* IB_WCS_LOCAL_OP_ERR +* An internal QP consistency error was generated while processing this +* work request. This may indicate that the QP was in an incorrect state +* for the requested operation. +* +* IB_WCS_LOCAL_EEC_OP_ERR +* An internal EEC consistency error was generated while processing +* this work request. This may indicate that the EEC was in an incorrect +* state for the requested operation. +* +* IB_WCS_LOCAL_PROTECTION_ERR +* The data segments of the locally posted work request did not refer to +* a valid memory region. The memory may not have been properly +* registered for the requested operation. +* +* IB_WCS_WR_FLUSHED_ERR +* The work request was flushed from the QP before being completed. +* +* IB_WCS_MEM_WINDOW_BIND_ERR +* A memory window bind operation failed due to insufficient access +* rights. +* +* IB_WCS_REM_ACCESS_ERR, +* A protection error was detected at the remote node for a RDMA or atomic +* operation. +* +* IB_WCS_REM_OP_ERR, +* The operation could not be successfully completed at the remote node. +* This may indicate that the remote QP was in an invalid state or +* contained an invalid work request. +* +* IB_WCS_RNR_RETRY_ERR, +* The RNR retry count was exceeded while trying to send this message. +* +* IB_WCS_TIMEOUT_RETRY_ERR +* The local transport timeout counter expired while trying to send this +* message. +* +* IB_WCS_REM_INVALID_REQ_ERR, +* The remote node detected an invalid message on the channel. This error +* is usually a result of one of the following: +* - The operation was not supported on receive queue. +* - There was insufficient buffers to receive a new RDMA request. +* - There was insufficient buffers to receive a new atomic operation. +* - An RDMA request was larger than 2^31 bytes. +* +* IB_WCS_REM_INVALID_RD_REQ_ERR, +* Responder detected an invalid RD message. This may be the result of an +* invalid qkey or an RDD mismatch. +* +* IB_WCS_INVALID_EECN +* An invalid EE context number was detected. +* +* IB_WCS_INVALID_EEC_STATE +* The EEC was in an invalid state for the specified request. +* +* IB_WCS_UNMATCHED_RESPONSE +* A response MAD was received for which there was no matching send. The +* send operation may have been canceled by the user or may have timed +* out. +* +* IB_WCS_CANCELED +* The completed work request was canceled by the user. +*****/ + +OSM_EXPORT const char *ib_wc_status_str[]; + +/****f* IBA Base: Types/ib_get_wc_status_str +* NAME +* ib_get_wc_status_str +* +* DESCRIPTION +* Returns a string for the specified work completion status. +* +* SYNOPSIS +*/ +static inline const char *OSM_API +ib_get_wc_status_str(IN ib_wc_status_t wc_status) +{ + if (wc_status > IB_WCS_UNKNOWN) + wc_status = IB_WCS_UNKNOWN; + return (ib_wc_status_str[wc_status]); +} + +/* +* PARAMETERS +* wc_status +* [in] work completion status value +* +* RETURN VALUES +* Pointer to the work completion status description string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****d* Access Layer/ib_wc_type_t +* NAME +* ib_wc_type_t +* +* DESCRIPTION +* Indicates the type of work completion. +* +* SYNOPSIS +*/ +typedef enum _ib_wc_type_t { + IB_WC_SEND, + IB_WC_RDMA_WRITE, + IB_WC_RECV, + IB_WC_RDMA_READ, + IB_WC_MW_BIND, + IB_WC_FETCH_ADD, + IB_WC_COMPARE_SWAP, + IB_WC_RECV_RDMA_WRITE +} ib_wc_type_t; +/*****/ + +/****d* Access Layer/ib_recv_opt_t +* NAME +* ib_recv_opt_t +* +* DESCRIPTION +* Indicates optional fields valid in a receive work completion. +* +* SYNOPSIS +*/ +typedef uint32_t ib_recv_opt_t; +#define IB_RECV_OPT_IMMEDIATE 0x00000001 +#define IB_RECV_OPT_FORWARD 0x00000002 +#define IB_RECV_OPT_GRH_VALID 0x00000004 +#define IB_RECV_OPT_VEND_MASK 0xFFFF0000 +/* +* VALUES +* IB_RECV_OPT_IMMEDIATE +* Indicates that immediate data is valid for this work completion. +* +* IB_RECV_OPT_FORWARD +* Indicates that the received trap should be forwarded to the SM. +* +* IB_RECV_OPT_GRH_VALID +* Indicates presence of the global route header. When set, the +* first 40 bytes received are the GRH. +* +* IB_RECV_OPT_VEND_MASK +* This mask indicates bits reserved in the receive options that may be +* used by the verbs provider to indicate vendor specific options. Bits +* set in this area of the receive options are ignored by the Access Layer, +* but may have specific meaning to the underlying VPD. +*****/ + +/****s* Access Layer/ib_wc_t +* NAME +* ib_wc_t +* +* DESCRIPTION +* Work completion information. +* +* SYNOPSIS +*/ +typedef struct _ib_wc { + struct _ib_wc *p_next; + uint64_t wr_id; + ib_wc_type_t wc_type; + uint32_t length; + ib_wc_status_t status; + uint64_t vendor_specific; + union _wc_recv { + struct _wc_conn { + ib_recv_opt_t recv_opt; + ib_net32_t immediate_data; + } conn; + struct _wc_ud { + ib_recv_opt_t recv_opt; + ib_net32_t immediate_data; + ib_net32_t remote_qp; + uint16_t pkey_index; + ib_net16_t remote_lid; + uint8_t remote_sl; + uint8_t path_bits; + } ud; + struct _wc_rd { + ib_net32_t remote_eecn; + ib_net32_t remote_qp; + ib_net16_t remote_lid; + uint8_t remote_sl; + uint32_t free_cnt; + + } rd; + struct _wc_raw_ipv6 { + ib_net16_t remote_lid; + uint8_t remote_sl; + uint8_t path_bits; + } raw_ipv6; + struct _wc_raw_ether { + ib_net16_t remote_lid; + uint8_t remote_sl; + uint8_t path_bits; + ib_net16_t ether_type; + } raw_ether; + } recv; +} ib_wc_t; +/* +* FIELDS +* p_next +* A pointer used to chain work completions. This permits multiple +* work completions to be retrieved from a completion queue through a +* single function call. This value is set to NULL to mark the end of +* the chain. +* +* wr_id +* The 64-bit work request identifier that was specified when posting the +* work request. +* +* wc_type +* Indicates the type of work completion. +* +* +* length +* The total length of the data sent or received with the work request. +* +* status +* The result of the work request. +* +* vendor_specific +* HCA vendor specific information returned as part of the completion. +* +* recv.conn.recv_opt +* Indicates optional fields valid as part of a work request that +* completed on a connected (reliable or unreliable) queue pair. +* +* recv.conn.immediate_data +* 32-bit field received as part of an inbound message on a connected +* queue pair. This field is only valid if the recv_opt flag +* IB_RECV_OPT_IMMEDIATE has been set. +* +* recv.ud.recv_opt +* Indicates optional fields valid as part of a work request that +* completed on an unreliable datagram queue pair. +* +* recv.ud.immediate_data +* 32-bit field received as part of an inbound message on a unreliable +* datagram queue pair. This field is only valid if the recv_opt flag +* IB_RECV_OPT_IMMEDIATE has been set. +* +* recv.ud.remote_qp +* Identifies the source queue pair of a received datagram. +* +* recv.ud.pkey_index +* The pkey index for the source queue pair. This is valid only for +* GSI type QP's. +* +* recv.ud.remote_lid +* The source LID of the received datagram. +* +* recv.ud.remote_sl +* The service level used by the source of the received datagram. +* +* recv.ud.path_bits +* path bits... +* +* recv.rd.remote_eecn +* The remote end-to-end context number that sent the received message. +* +* recv.rd.remote_qp +* Identifies the source queue pair of a received message. +* +* recv.rd.remote_lid +* The source LID of the received message. +* +* recv.rd.remote_sl +* The service level used by the source of the received message. +* +* recv.rd.free_cnt +* The number of available entries in the completion queue. Reliable +* datagrams may complete out of order, so this field may be used to +* determine the number of additional completions that may occur. +* +* recv.raw_ipv6.remote_lid +* The source LID of the received message. +* +* recv.raw_ipv6.remote_sl +* The service level used by the source of the received message. +* +* recv.raw_ipv6.path_bits +* path bits... +* +* recv.raw_ether.remote_lid +* The source LID of the received message. +* +* recv.raw_ether.remote_sl +* The service level used by the source of the received message. +* +* recv.raw_ether.path_bits +* path bits... +* +* recv.raw_ether.ether_type +* ether type... +* NOTES +* When the work request completes with error, the only values that the +* consumer can depend on are the wr_id field, and the status of the +* operation. +* +* If the consumer is using the same CQ for completions from more than +* one type of QP (i.e Reliable Connected, Datagram etc), then the consumer +* must have additional information to decide what fields of the union are +* valid. +* SEE ALSO +* ib_wc_type_t, ib_qp_type_t, ib_wc_status_t, ib_recv_opt_t +*****/ + +/****s* Access Layer/ib_mr_create_t +* NAME +* ib_mr_create_t +* +* DESCRIPTION +* Information required to create a registered memory region. +* +* SYNOPSIS +*/ +typedef struct _ib_mr_create { + void *vaddr; + uint64_t length; + ib_access_t access_ctrl; +} ib_mr_create_t; +/* +* FIELDS +* vaddr +* Starting virtual address of the region being registered. +* +* length +* Length of the buffer to register. +* +* access_ctrl +* Access rights of the registered region. +* +* SEE ALSO +* ib_access_t +*****/ + +/****s* Access Layer/ib_phys_create_t +* NAME +* ib_phys_create_t +* +* DESCRIPTION +* Information required to create a physical memory region. +* +* SYNOPSIS +*/ +typedef struct _ib_phys_create { + uint64_t length; + uint32_t num_bufs; + uint64_t *buf_array; + uint32_t buf_offset; + uint32_t page_size; + ib_access_t access_ctrl; +} ib_phys_create_t; +/* +* length +* The length of the memory region in bytes. +* +* num_bufs +* Number of buffers listed in the specified buffer array. +* +* buf_array +* An array of physical buffers to be registered as a single memory +* region. +* +* buf_offset +* The offset into the first physical page of the specified memory +* region to start the virtual address. +* +* page_size +* The physical page size of the memory being registered. +* +* access_ctrl +* Access rights of the registered region. +* +* SEE ALSO +* ib_access_t +*****/ + +/****s* Access Layer/ib_mr_attr_t +* NAME +* ib_mr_attr_t +* +* DESCRIPTION +* Attributes of a registered memory region. +* +* SYNOPSIS +*/ +typedef struct _ib_mr_attr { + ib_pd_handle_t h_pd; + void *local_lb; + void *local_ub; + void *remote_lb; + void *remote_ub; + ib_access_t access_ctrl; + uint32_t lkey; + uint32_t rkey; +} ib_mr_attr_t; +/* +* DESCRIPTION +* h_pd +* Handle to the protection domain for this memory region. +* +* local_lb +* The virtual address of the lower bound of protection for local +* memory access. +* +* local_ub +* The virtual address of the upper bound of protection for local +* memory access. +* +* remote_lb +* The virtual address of the lower bound of protection for remote +* memory access. +* +* remote_ub +* The virtual address of the upper bound of protection for remote +* memory access. +* +* access_ctrl +* Access rights for the specified memory region. +* +* lkey +* The lkey associated with this memory region. +* +* rkey +* The rkey associated with this memory region. +* +* NOTES +* The remote_lb, remote_ub, and rkey are only valid if remote memory access +* is enabled for this memory region. +* +* SEE ALSO +* ib_access_t +*****/ + +/****d* Access Layer/ib_ca_mod_t +* NAME +* ib_ca_mod_t -- Modify port attributes and error counters +* +* DESCRIPTION +* Specifies modifications to the port attributes of a channel adapter. +* +* SYNOPSIS +*/ +typedef uint32_t ib_ca_mod_t; +#define IB_CA_MOD_IS_CM_SUPPORTED 0x00000001 +#define IB_CA_MOD_IS_SNMP_SUPPORTED 0x00000002 +#define IB_CA_MOD_IS_DEV_MGMT_SUPPORTED 0x00000004 +#define IB_CA_MOD_IS_VEND_SUPPORTED 0x00000008 +#define IB_CA_MOD_IS_SM 0x00000010 +#define IB_CA_MOD_IS_SM_DISABLED 0x00000020 +#define IB_CA_MOD_QKEY_CTR 0x00000040 +#define IB_CA_MOD_PKEY_CTR 0x00000080 +#define IB_CA_MOD_IS_NOTICE_SUPPORTED 0x00000100 +#define IB_CA_MOD_IS_TRAP_SUPPORTED 0x00000200 +#define IB_CA_MOD_IS_APM_SUPPORTED 0x00000400 +#define IB_CA_MOD_IS_SLMAP_SUPPORTED 0x00000800 +#define IB_CA_MOD_IS_PKEY_NVRAM_SUPPORTED 0x00001000 +#define IB_CA_MOD_IS_MKEY_NVRAM_SUPPORTED 0x00002000 +#define IB_CA_MOD_IS_SYSGUID_SUPPORTED 0x00004000 +#define IB_CA_MOD_IS_DR_NOTICE_SUPPORTED 0x00008000 +#define IB_CA_MOD_IS_BOOT_MGMT_SUPPORTED 0x00010000 +#define IB_CA_MOD_IS_CAPM_NOTICE_SUPPORTED 0x00020000 +#define IB_CA_MOD_IS_REINIT_SUPORTED 0x00040000 +#define IB_CA_MOD_IS_LEDINFO_SUPPORTED 0x00080000 +#define IB_CA_MOD_SHUTDOWN_PORT 0x00100000 +#define IB_CA_MOD_INIT_TYPE_VALUE 0x00200000 +#define IB_CA_MOD_SYSTEM_IMAGE_GUID 0x00400000 +/* +* VALUES +* IB_CA_MOD_IS_CM_SUPPORTED +* Indicates if there is a communication manager accessible through +* the port. +* +* IB_CA_MOD_IS_SNMP_SUPPORTED +* Indicates if there is an SNMP agent accessible through the port. +* +* IB_CA_MOD_IS_DEV_MGMT_SUPPORTED +* Indicates if there is a device management agent accessible +* through the port. +* +* IB_CA_MOD_IS_VEND_SUPPORTED +* Indicates if there is a vendor supported agent accessible +* through the port. +* +* IB_CA_MOD_IS_SM +* Indicates if there is a subnet manager accessible through +* the port. +* +* IB_CA_MOD_IS_SM_DISABLED +* Indicates if the port has been disabled for configuration by the +* subnet manager. +* +* IB_CA_MOD_QKEY_CTR +* Used to reset the qkey violation counter associated with the +* port. +* +* IB_CA_MOD_PKEY_CTR +* Used to reset the pkey violation counter associated with the +* port. +* +* IB_CA_MOD_IS_NOTICE_SUPPORTED +* Indicates that this CA supports ability to generate Notices for +* Port State changes. (only applicable to switches) +* +* IB_CA_MOD_IS_TRAP_SUPPORTED +* Indicates that this management port supports ability to generate +* trap messages. (only applicable to switches) +* +* IB_CA_MOD_IS_APM_SUPPORTED +* Indicates that this port is capable of performing Automatic +* Path Migration. +* +* IB_CA_MOD_IS_SLMAP_SUPPORTED +* Indicates this port supports SLMAP capability. +* +* IB_CA_MOD_IS_PKEY_NVRAM_SUPPORTED +* Indicates that PKEY is supported in NVRAM +* +* IB_CA_MOD_IS_MKEY_NVRAM_SUPPORTED +* Indicates that MKEY is supported in NVRAM +* +* IB_CA_MOD_IS_SYSGUID_SUPPORTED +* Indicates System Image GUID support. +* +* IB_CA_MOD_IS_DR_NOTICE_SUPPORTED +* Indicate support for generating Direct Routed Notices +* +* IB_CA_MOD_IS_BOOT_MGMT_SUPPORTED +* Indicates support for Boot Management +* +* IB_CA_MOD_IS_CAPM_NOTICE_SUPPORTED +* Indicates capability to generate notices for changes to CAPMASK +* +* IB_CA_MOD_IS_REINIT_SUPORTED +* Indicates type of node init supported. Refer to Chapter 14 for +* Initialization actions. +* +* IB_CA_MOD_IS_LEDINFO_SUPPORTED +* Indicates support for LED info. +* +* IB_CA_MOD_SHUTDOWN_PORT +* Used to modify the port active indicator. +* +* IB_CA_MOD_INIT_TYPE_VALUE +* Used to modify the init_type value for the port. +* +* IB_CA_MOD_SYSTEM_IMAGE_GUID +* Used to modify the system image GUID for the port. +*****/ + +/****d* Access Layer/ib_mr_mod_t +* NAME +* ib_mr_mod_t +* +* DESCRIPTION +* Mask used to specify which attributes of a registered memory region are +* being modified. +* +* SYNOPSIS +*/ +typedef uint32_t ib_mr_mod_t; +#define IB_MR_MOD_ADDR 0x00000001 +#define IB_MR_MOD_PD 0x00000002 +#define IB_MR_MOD_ACCESS 0x00000004 +/* +* PARAMETERS +* IB_MEM_MOD_ADDR +* The address of the memory region is being modified. +* +* IB_MEM_MOD_PD +* The protection domain associated with the memory region is being +* modified. +* +* IB_MEM_MOD_ACCESS +* The access rights the memory region are being modified. +*****/ + +/****d* IBA Base: Constants/IB_SMINFO_ATTR_MOD_HANDOVER +* NAME +* IB_SMINFO_ATTR_MOD_HANDOVER +* +* DESCRIPTION +* Encoded attribute modifier value used on SubnSet(SMInfo) SMPs. +* +* SOURCE +*/ +#define IB_SMINFO_ATTR_MOD_HANDOVER (CL_HTON32(0x000001)) +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_ATTR_MOD_ACKNOWLEDGE +* NAME +* IB_SMINFO_ATTR_MOD_ACKNOWLEDGE +* +* DESCRIPTION +* Encoded attribute modifier value used on SubnSet(SMInfo) SMPs. +* +* SOURCE +*/ +#define IB_SMINFO_ATTR_MOD_ACKNOWLEDGE (CL_HTON32(0x000002)) +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_ATTR_MOD_DISABLE +* NAME +* IB_SMINFO_ATTR_MOD_DISABLE +* +* DESCRIPTION +* Encoded attribute modifier value used on SubnSet(SMInfo) SMPs. +* +* SOURCE +*/ +#define IB_SMINFO_ATTR_MOD_DISABLE (CL_HTON32(0x000003)) +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_ATTR_MOD_STANDBY +* NAME +* IB_SMINFO_ATTR_MOD_STANDBY +* +* DESCRIPTION +* Encoded attribute modifier value used on SubnSet(SMInfo) SMPs. +* +* SOURCE +*/ +#define IB_SMINFO_ATTR_MOD_STANDBY (CL_HTON32(0x000004)) +/**********/ + +/****d* IBA Base: Constants/IB_SMINFO_ATTR_MOD_DISCOVER +* NAME +* IB_SMINFO_ATTR_MOD_DISCOVER +* +* DESCRIPTION +* Encoded attribute modifier value used on SubnSet(SMInfo) SMPs. +* +* SOURCE +*/ +#define IB_SMINFO_ATTR_MOD_DISCOVER (CL_HTON32(0x000005)) +/**********/ + +/****s* Access Layer/ib_ci_op_t +* NAME +* ib_ci_op_t +* +* DESCRIPTION +* A structure used for vendor specific CA interface communication. +* +* SYNOPSIS +*/ +typedef struct _ib_ci_op { + IN uint32_t command; + IN OUT void *p_buf OPTIONAL; + IN uint32_t buf_size; + IN OUT uint32_t num_bytes_ret; + IN OUT int32_t status; +} ib_ci_op_t; +/* +* FIELDS +* command +* A command code that is understood by the verbs provider. +* +* p_buf +* A reference to a buffer containing vendor specific data. The verbs +* provider must not access pointers in the p_buf between user-mode and +* kernel-mode. Any pointers embedded in the p_buf are invalidated by +* the user-mode/kernel-mode transition. +* +* buf_size +* The size of the buffer in bytes. +* +* num_bytes_ret +* The size in bytes of the vendor specific data returned in the buffer. +* This field is set by the verbs provider. The verbs provider should +* verify that the buffer size is sufficient to hold the data being +* returned. +* +* status +* The completion status from the verbs provider. This field should be +* initialize to indicate an error to allow detection and cleanup in +* case a communication error occurs between user-mode and kernel-mode. +* +* NOTES +* This structure is provided to allow the exchange of vendor specific +* data between the originator and the verbs provider. Users of this +* structure are expected to know the format of data in the p_buf based +* on the structure command field or the usage context. +*****/ + +/****s* IBA Base: Types/ib_cc_mad_t +* NAME +* ib_cc_mad_t +* +* DESCRIPTION +* IBA defined Congestion Control MAD format. (A10.4.1) +* +* SYNOPSIS +*/ +#define IB_CC_LOG_DATA_SIZE 32 +#define IB_CC_MGT_DATA_SIZE 192 +#define IB_CC_MAD_HDR_SIZE (sizeof(ib_sa_mad_t) - IB_CC_LOG_DATA_SIZE \ + - IB_CC_MGT_DATA_SIZE) + +typedef struct _ib_cc_mad { + ib_mad_t header; + ib_net64_t cc_key; + uint8_t log_data[IB_CC_LOG_DATA_SIZE]; + uint8_t mgt_data[IB_CC_MGT_DATA_SIZE]; +} ib_cc_mad_t; +/* +* FIELDS +* header +* Common MAD header. +* +* cc_key +* CC_Key of the Congestion Control MAD. +* +* log_data +* Congestion Control log data of the CC MAD. +* +* mgt_data +* Congestion Control management data of the CC MAD. +* +* SEE ALSO +* ib_mad_t +*********/ + +/****f* IBA Base: Types/ib_cc_mad_get_cc_key +* NAME +* ib_cc_mad_get_cc_key +* +* DESCRIPTION +* Gets a CC_Key of the CC MAD. +* +* SYNOPSIS +*/ +static inline ib_net64_t OSM_API +ib_cc_mad_get_cc_key(IN const ib_cc_mad_t * const p_cc_mad) +{ + return p_cc_mad->cc_key; +} +/* +* PARAMETERS +* p_cc_mad +* [in] Pointer to the CC MAD packet. +* +* RETURN VALUES +* CC_Key of the provided CC MAD packet. +* +* NOTES +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****f* IBA Base: Types/ib_cc_mad_get_log_data_ptr +* NAME +* ib_cc_mad_get_log_data_ptr +* +* DESCRIPTION +* Gets a pointer to the CC MAD's log data area. +* +* SYNOPSIS +*/ +static inline void * OSM_API +ib_cc_mad_get_log_data_ptr(IN const ib_cc_mad_t * const p_cc_mad) +{ + return ((void *)p_cc_mad->log_data); +} +/* +* PARAMETERS +* p_cc_mad +* [in] Pointer to the CC MAD packet. +* +* RETURN VALUES +* Pointer to CC MAD log data area. +* +* NOTES +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****f* IBA Base: Types/ib_cc_mad_get_mgt_data_ptr +* NAME +* ib_cc_mad_get_mgt_data_ptr +* +* DESCRIPTION +* Gets a pointer to the CC MAD's management data area. +* +* SYNOPSIS +*/ +static inline void * OSM_API +ib_cc_mad_get_mgt_data_ptr(IN const ib_cc_mad_t * const p_cc_mad) +{ + return ((void *)p_cc_mad->mgt_data); +} +/* +* PARAMETERS +* p_cc_mad +* [in] Pointer to the CC MAD packet. +* +* RETURN VALUES +* Pointer to CC MAD management data area. +* +* NOTES +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_cong_info_t +* NAME +* ib_cong_info_t +* +* DESCRIPTION +* IBA defined CongestionInfo attribute (A10.4.3.3) +* +* SYNOPSIS +*/ +typedef struct _ib_cong_info { + uint8_t cong_info; + uint8_t resv; + uint8_t ctrl_table_cap; +} ib_cong_info_t; +/* +* FIELDS +* cong_info +* Congestion control capabilities of the node. +* +* ctrl_table_cap +* Number of 64 entry blocks in the CongestionControlTable. +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_cong_key_info_t +* NAME +* ib_cong_key_info_t +* +* DESCRIPTION +* IBA defined CongestionKeyInfo attribute (A10.4.3.4) +* +* SYNOPSIS +*/ +#include +typedef struct _ib_cong_key_info { + ib_net64_t cc_key; + ib_net16_t protect_bit; + ib_net16_t lease_period; + ib_net16_t violations; +} PACK_SUFFIX ib_cong_key_info_t; +#include +/* +* FIELDS +* cc_key +* 8-byte CC Key. +* +* protect_bit +* Bit 0 is a CC Key Protect Bit, other 15 bits are reserved. +* +* lease_period +* How long the CC Key protect bit is to remain non-zero. +* +* violations +* Number of received MADs that violated CC Key. +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_cong_log_event_sw_t +* NAME +* ib_cong_log_event_sw_t +* +* DESCRIPTION +* IBA defined CongestionLogEvent (SW) entry (A10.4.3.5) +* +* SYNOPSIS +*/ +typedef struct _ib_cong_log_event_sw { + ib_net16_t slid; + ib_net16_t dlid; + ib_net32_t sl; + ib_net32_t time_stamp; +} ib_cong_log_event_sw_t; +/* +* FIELDS +* slid +* Source LID of congestion event. +* +* dlid +* Destination LID of congestion event. +* +* sl +* 4 bits - SL of congestion event. +* rest of the bits are reserved. +* +* time_stamp +* Timestamp of congestion event. +* +* SEE ALSO +* ib_cc_mad_t, ib_cong_log_t +*********/ + +/****s* IBA Base: Types/ib_cong_log_event_ca_t +* NAME +* ib_cong_log_event_ca_t +* +* DESCRIPTION +* IBA defined CongestionLogEvent (CA) entry (A10.4.3.5) +* +* SYNOPSIS +*/ +typedef struct _ib_cong_log_event_ca { + ib_net32_t local_qp_resv0; + ib_net32_t remote_qp_sl_service_type; + ib_net16_t remote_lid; + ib_net16_t resv1; + ib_net32_t time_stamp; +} ib_cong_log_event_ca_t; +/* +* FIELDS +* resv0_local_qp +* bits [31:8] local QP that reached CN threshold. +* bits [7:0] reserved. +* +* remote_qp_sl_service_type +* bits [31:8] remote QP that is connected to local QP. +* bits [7:4] SL of the local QP. +* bits [3:0] Service Type of the local QP. +* +* remote_lid +* LID of the remote port that is connected to local QP. +* +* time_stamp +* Timestamp when threshold reached. +* +* SEE ALSO +* ib_cc_mad_t, ib_cong_log_t +*********/ + +/****s* IBA Base: Types/ib_cong_log_t +* NAME +* ib_cong_log_t +* +* DESCRIPTION +* IBA defined CongestionLog attribute (A10.4.3.5) +* +* SYNOPSIS +*/ +#include +typedef struct _ib_cong_log { + uint8_t log_type; + union _log_details + { + struct _log_sw { + uint8_t cong_flags; + ib_net16_t event_counter; + ib_net32_t time_stamp; + uint8_t port_map[32]; + ib_cong_log_event_sw_t entry_list[15]; + } PACK_SUFFIX log_sw; + + struct _log_ca { + uint8_t cong_flags; + ib_net16_t event_counter; + ib_net16_t event_map; + ib_net16_t resv; + ib_net32_t time_stamp; + ib_cong_log_event_ca_t log_event[13]; + } PACK_SUFFIX log_ca; + + } log_details; +} PACK_SUFFIX ib_cong_log_t; +#include +/* +* FIELDS +* +* log_{sw,ca}.log_type +* Log type: 0x1 is for Switch, 0x2 is for CA +* +* log_{sw,ca}.cong_flags +* Congestion Flags. +* +* log_{sw,ca}.event_counter +* Number of events since log last sent. +* +* log_{sw,ca}.time_stamp +* Timestamp when log sent. +* +* log_sw.port_map +* If a bit set to 1, then the corresponding port +* has marked packets with a FECN. +* bits 0 and 255 - reserved +* bits [254..1] - ports [254..1]. +* +* log_sw.entry_list +* Array of 13 most recent congestion log events. +* +* log_ca.event_map +* array 16 bits, one for each SL. +* +* log_ca.log_event +* Array of 13 most recent congestion log events. +* +* SEE ALSO +* ib_cc_mad_t, ib_cong_log_event_sw_t, ib_cong_log_event_ca_t +*********/ + +/****s* IBA Base: Types/ib_sw_cong_setting_t +* NAME +* ib_sw_cong_setting_t +* +* DESCRIPTION +* IBA defined SwitchCongestionSetting attribute (A10.4.3.6) +* +* SYNOPSIS +*/ +#define IB_CC_PORT_MASK_DATA_SIZE 32 +typedef struct _ib_sw_cong_setting { + ib_net32_t control_map; + uint8_t victim_mask[IB_CC_PORT_MASK_DATA_SIZE]; + uint8_t credit_mask[IB_CC_PORT_MASK_DATA_SIZE]; + uint8_t threshold_resv; + uint8_t packet_size; + ib_net16_t cs_threshold_resv; + ib_net16_t cs_return_delay; + ib_net16_t marking_rate; +} ib_sw_cong_setting_t; +/* +* FIELDS +* +* control_map +* Indicates which components of this attribute are valid +* +* victim_mask +* If the bit set to 1, then the port corresponding to +* that bit shall mark packets that encounter congestion +* with a FECN, whether they are the source or victim +* of congestion. (See A10.2.1.1.1) +* bit 0: port 0 (enhanced port 0 only) +* bits [254..1]: ports [254..1] +* bit 255: reserved +* +* credit_mask +* If the bit set to 1, then the port corresponding +* to that bit shall apply Credit Starvation. +* bit 0: port 0 (enhanced port 0 only) +* bits [254..1]: ports [254..1] +* bit 255: reserved +* +* threshold_resv +* bits [7..4] Indicates how aggressive cong. marking should be +* bits [3..0] Reserved +* +* packet_size +* Any packet less than this size won't be marked with FECN +* +* cs_threshold_resv +* bits [15..12] How aggressive Credit Starvation should be +* bits [11..0] Reserved +* +* cs_return_delay +* Value that controls credit return rate. +* +* marking_rate +* The value that provides the mean number of packets +* between marking eligible packets with FECN. +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_sw_port_cong_setting_element_t +* NAME +* ib_sw_port_cong_setting_element_t +* +* DESCRIPTION +* IBA defined SwitchPortCongestionSettingElement (A10.4.3.7) +* +* SYNOPSIS +*/ +typedef struct _ib_sw_port_cong_setting_element { + uint8_t valid_ctrl_type_res_threshold; + uint8_t packet_size; + ib_net16_t cong_param; +} ib_sw_port_cong_setting_element_t; +/* +* FIELDS +* +* valid_ctrl_type_res_threshold +* bit 7: "Valid" +* when set to 1, indicates this switch +* port congestion setting element is valid. +* bit 6: "Control Type" +* Indicates which type of attribute is being set: +* 0b = Congestion Control parameters are being set. +* 1b = Credit Starvation parameters are being set. +* bits [5..4]: reserved +* bits [3..0]: "Threshold" +* When Control Type is 0, contains the congestion +* threshold value (Threshold) for this port. +* When Control Type is 1, contains the credit +* starvation threshold (CS_Threshold) value for +* this port. +* +* packet_size +* When Control Type is 0, this field contains the minimum +* size of packets that may be marked with a FECN. +* When Control Type is 1, this field is reserved. +* +* cong_parm +* When Control Type is 0, this field contains the port +* marking_rate. +* When Control Type is 1, this field is reserved. +* +* SEE ALSO +* ib_cc_mad_t, ib_sw_port_cong_setting_t +*********/ + +/****d* IBA Base: Types/ib_sw_port_cong_setting_block_t +* NAME +* ib_sw_port_cong_setting_block_t +* +* DESCRIPTION +* Defines the SwitchPortCongestionSetting Block (A10.4.3.7). +* +* SOURCE +*/ +#define IB_CC_SW_PORT_SETTING_ELEMENTS 32 +typedef ib_sw_port_cong_setting_element_t ib_sw_port_cong_setting_block_t[IB_CC_SW_PORT_SETTING_ELEMENTS]; +/**********/ + +/****s* IBA Base: Types/ib_sw_port_cong_setting_t +* NAME +* ib_sw_port_cong_setting_t +* +* DESCRIPTION +* IBA defined SwitchPortCongestionSetting attribute (A10.4.3.7) +* +* SYNOPSIS +*/ + +typedef struct _ib_sw_port_cong_setting { + ib_sw_port_cong_setting_block_t block; +} ib_sw_port_cong_setting_t; +/* +* FIELDS +* +* block +* SwitchPortCongestionSetting block. +* +* SEE ALSO +* ib_cc_mad_t, ib_sw_port_cong_setting_element_t +*********/ + +/****s* IBA Base: Types/ib_ca_cong_entry_t +* NAME +* ib_ca_cong_entry_t +* +* DESCRIPTION +* IBA defined CACongestionEntry (A10.4.3.8) +* +* SYNOPSIS +*/ +typedef struct _ib_ca_cong_entry { + ib_net16_t ccti_timer; + uint8_t ccti_increase; + uint8_t trigger_threshold; + uint8_t ccti_min; + uint8_t resv0; + ib_net16_t resv1; +} ib_ca_cong_entry_t; +/* +* FIELDS +* +* ccti_timer +* When the timer expires it will be reset to its specified +* value, and 1 will be decremented from the CCTI. +* +* ccti_increase +* The number to be added to the table Index (CCTI) +* on the receipt of a BECN. +* +* trigger_threshold +* When the CCTI is equal to this value, an event +* is logged in the CAs cyclic event log. +* +* ccti_min +* The minimum value permitted for the CCTI. +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_ca_cong_setting_t +* NAME +* ib_ca_cong_setting_t +* +* DESCRIPTION +* IBA defined CACongestionSetting attribute (A10.4.3.8) +* +* SYNOPSIS +*/ +#define IB_CA_CONG_ENTRY_DATA_SIZE 16 +typedef struct _ib_ca_cong_setting { + ib_net16_t port_control; + ib_net16_t control_map; + ib_ca_cong_entry_t entry_list[IB_CA_CONG_ENTRY_DATA_SIZE]; +} ib_ca_cong_setting_t; +/* +* FIELDS +* +* port_control +* Congestion attributes for this port: +* bit0 = 0: QP based CC +* bit0 = 1: SL/Port based CC +* All other bits are reserved +* +* control_map +* An array of sixteen bits, one for each SL. Each bit indicates +* whether or not the corresponding entry is to be modified. +* +* entry_list +* List of 16 CACongestionEntries, one per SL. +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_cc_tbl_entry_t +* NAME +* ib_cc_tbl_entry_t +* +* DESCRIPTION +* IBA defined CongestionControlTableEntry (A10.4.3.9) +* +* SYNOPSIS +*/ +typedef struct _ib_cc_tbl_entry { + ib_net16_t shift_multiplier; +} ib_cc_tbl_entry_t; +/* +* FIELDS +* +* shift_multiplier +* bits [15..14] - CCT Shift +* used when calculating the injection rate delay +* bits [13..0] - CCT Multiplier +* used when calculating the injection rate delay +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_cc_tbl_t +* NAME +* ib_cc_tbl_t +* +* DESCRIPTION +* IBA defined CongestionControlTable attribute (A10.4.3.9) +* +* SYNOPSIS +*/ +#define IB_CC_TBL_ENTRY_LIST_MAX 64 +typedef struct _ib_cc_tbl { + ib_net16_t ccti_limit; + ib_net16_t resv; + ib_cc_tbl_entry_t entry_list[IB_CC_TBL_ENTRY_LIST_MAX]; +} ib_cc_tbl_t; +/* +* FIELDS +* +* ccti_limit +* Maximum valid CCTI for this table. +* +* entry_list +* List of up to 64 CongestionControlTableEntries. +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +/****s* IBA Base: Types/ib_time_stamp_t +* NAME +* ib_time_stamp_t +* +* DESCRIPTION +* IBA defined TimeStamp attribute (A10.4.3.10) +* +* SOURCE +*/ +typedef struct _ib_time_stamp { + ib_net32_t value; +} ib_time_stamp_t; +/* +* FIELDS +* +* value +* Free running clock that provides relative time info +* for a device. Time is kept in 1.024 usec units. +* +* SEE ALSO +* ib_cc_mad_t +*********/ + +END_C_DECLS +#else /* ndef __WIN__ */ +#include +#endif +#endif /* __IB_TYPES_H__ */ diff --git a/include/opensm/osm_base.h b/include/opensm/osm_base.h new file mode 100644 index 0000000..7d0addd --- /dev/null +++ b/include/opensm/osm_base.h @@ -0,0 +1,1053 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Basic OpenSM definitions. + */ + +#ifndef _OSM_BASE_H_ +#define _OSM_BASE_H_ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef __WIN__ +#include +#endif + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Constants +* NAME +* Constants +* +* DESCRIPTION +* The following constants are used throughout the OpenSM. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****d* OpenSM: OSM_DEFAULT_M_KEY +* NAME +* OSM_DEFAULT_M_KEY +* +* DESCRIPTION +* Managment key value used by the OpenSM. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_M_KEY 0 +/********/ +/****d* OpenSM: OSM_DEFAULT_SM_KEY +* NAME +* OSM_DEFAULT_SM_KEY +* +* DESCRIPTION +* Subnet Manager key value used by the OpenSM. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SM_KEY CL_HTON64(1) +/********/ +/****d* OpenSM: OSM_DEFAULT_SA_KEY +* NAME +* OSM_DEFAULT_SA_KEY +* +* DESCRIPTION +* Subnet Adminstration key value. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SA_KEY OSM_DEFAULT_SM_KEY +/********/ +/****d* OpenSM: OSM_DEFAULT_LMC +* NAME +* OSM_DEFAULT_LMC +* +* DESCRIPTION +* Default LMC value used by the OpenSM. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_LMC 0 +/********/ +/****d* OpenSM: OSM_DEFAULT_MAX_OP_VLS +* NAME +* OSM_DEFAULT_MAX_OP_VLS +* +* DESCRIPTION +* Default Maximal Operational VLs to be initialized on +* the link ports PortInfo by the OpenSM. +* Default value provides backward compatibility. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MAX_OP_VLS 5 +/********/ +/****d* OpenSM: OSM_DEFAULT_SL +* NAME +* OSM_DEFAULT_SL +* +* DESCRIPTION +* Default SL value used by the OpenSM. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SL 0 +/********/ +/****d* OpenSM: OSM_DEFAULT_SCATTER_PORTS +* NAME +* OSM_DEFAULT_SCATTER_PORTS +* +* DESCRIPTION +* Default Scatter Ports value used by OpenSM. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SCATTER_PORTS 0 +/********/ +/****d* OpenSM: OSM_DEFAULT_SM_PRIORITY +* NAME +* OSM_DEFAULT_SM_PRIORITY +* +* DESCRIPTION +* Default SM priority value used by the OpenSM, +* as defined in the SMInfo attribute. 0 is the lowest priority. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SM_PRIORITY 0 +/********/ +/****d* OpenSM: OSM_DEFAULT_TMP_DIR +* NAME +* OSM_DEFAULT_TMP_DIR +* +* DESCRIPTION +* Specifies the default temporary directory for the log file, +* osm-subnet.lst, and other log files. +* +* SYNOPSIS +*/ +#ifdef __WIN__ +#define OSM_DEFAULT_TMP_DIR "%TEMP%\\" +#else +#define OSM_DEFAULT_TMP_DIR "/var/log/" +#endif +/***********/ +/****d* OpenSM: OSM_DEFAULT_CACHE_DIR +* NAME +* OSM_DEFAULT_CACHE_DIR +* +* DESCRIPTION +* Specifies the default cache directory for the db files. +* +* SYNOPSIS +*/ +#ifdef __WIN__ +#define OSM_DEFAULT_CACHE_DIR "%TEMP%" +#else +#define OSM_DEFAULT_CACHE_DIR "/var/cache/opensm" +#endif +/***********/ +/****d* OpenSM: OSM_DEFAULT_LOG_FILE +* NAME +* OSM_DEFAULT_LOG_FILE +* +* DESCRIPTION +* Specifies the default log file name +* +* SYNOPSIS +*/ +#ifdef __WIN__ +#define OSM_DEFAULT_LOG_FILE OSM_DEFAULT_TMP_DIR "osm.log" +#else +#define OSM_DEFAULT_LOG_FILE "/var/log/opensm.log" +#endif +/***********/ + +/****d* OpenSM: OSM_DEFAULT_CONFIG_FILE +* NAME +* OSM_DEFAULT_CONFIG_FILE +* +* DESCRIPTION +* Specifies the default OpenSM config file name +* +* SYNOPSIS +*/ +#if defined(HAVE_DEFAULT_OPENSM_CONFIG_FILE) +#define OSM_DEFAULT_CONFIG_FILE HAVE_DEFAULT_OPENSM_CONFIG_FILE +#elif defined (OPENSM_CONFIG_DIR) +#define OSM_DEFAULT_CONFIG_FILE OPENSM_CONFIG_DIR "/opensm.conf" +#else +#define OSM_DEFAULT_CONFIG_FILE "/etc/opensm/opensm.conf" +#endif +/***********/ + +/****d* OpenSM: OSM_DEFAULT_PARTITION_CONFIG_FILE +* NAME +* OSM_DEFAULT_PARTITION_CONFIG_FILE +* +* DESCRIPTION +* Specifies the default partition config file name +* +* SYNOPSIS +*/ +#if defined(HAVE_DEFAULT_PARTITION_CONFIG_FILE) +#define OSM_DEFAULT_PARTITION_CONFIG_FILE HAVE_DEFAULT_PARTITION_CONFIG_FILE +#elif defined(OPENSM_CONFIG_DIR) +#define OSM_DEFAULT_PARTITION_CONFIG_FILE OPENSM_CONFIG_DIR "/partitions.conf" +#else +#define OSM_DEFAULT_PARTITION_CONFIG_FILE "/etc/opensm/partitions.conf" +#endif +/***********/ + +/****d* OpenSM: OSM_DEFAULT_QOS_POLICY_FILE +* NAME +* OSM_DEFAULT_QOS_POLICY_FILE +* +* DESCRIPTION +* Specifies the default QoS policy file name +* +* SYNOPSIS +*/ +#if defined(HAVE_DEFAULT_QOS_POLICY_FILE) +#define OSM_DEFAULT_QOS_POLICY_FILE HAVE_DEFAULT_QOS_POLICY_FILE +#elif defined(OPENSM_CONFIG_DIR) +#define OSM_DEFAULT_QOS_POLICY_FILE OPENSM_CONFIG_DIR "/qos-policy.conf" +#else +#define OSM_DEFAULT_QOS_POLICY_FILE "/etc/opensm/qos-policy.conf" +#endif +/***********/ + +/****d* OpenSM: OSM_DEFAULT_TORUS_CONF_FILE +* NAME +* OSM_DEFAULT_TORUS_CONF_FILE +* +* DESCRIPTION +* Specifies the default file name for extra torus-2QoS configuration +* +* SYNOPSIS +*/ +#ifdef __WIN__ +#define OSM_DEFAULT_TORUS_CONF_FILE strcat(GetOsmCachePath(), "osm-torus-2QoS.conf") +#elif defined(OPENSM_CONFIG_DIR) +#define OSM_DEFAULT_TORUS_CONF_FILE OPENSM_CONFIG_DIR "/torus-2QoS.conf" +#else +#define OSM_DEFAULT_TORUS_CONF_FILE "/etc/opensm/torus-2QoS.conf" +#endif /* __WIN__ */ +/***********/ + +/****d* OpenSM: OSM_DEFAULT_PREFIX_ROUTES_FILE +* NAME +* OSM_DEFAULT_PREFIX_ROUTES_FILE +* +* DESCRIPTION +* Specifies the default prefix routes file name +* +* SYNOPSIS +*/ +#if defined(HAVE_DEFAULT_PREFIX_ROUTES_FILE) +#define OSM_DEFAULT_PREFIX_ROUTES_FILE HAVE_DEFAULT_PREFIX_ROUTES_FILE +#elif defined(OPENSM_CONFIG_DIR) +#define OSM_DEFAULT_PREFIX_ROUTES_FILE OPENSM_CONFIG_DIR "/prefix-routes.conf" +#else +#define OSM_DEFAULT_PREFIX_ROUTES_FILE "/etc/opensm/prefix-routes.conf" +#endif +/***********/ + +/****d* OpenSM: OSM_DEFAULT_PER_MOD_LOGGING_CONF_FILE +* NAME +* OSM_DEFAULT_PER_MOD_LOGGING_CONF_FILE +* +* DESCRIPTION +* Specifies the default file name for per module logging configuration +* +* SYNOPSIS +*/ +#ifdef __WIN__ +#define OSM_DEFAULT_PER_MOD_LOGGING_CONF_FILE strcat(GetOsmCachePath(), "per-module-logging.conf") +#elif defined(OPENSM_CONFIG_DIR) +#define OSM_DEFAULT_PER_MOD_LOGGING_CONF_FILE OPENSM_CONFIG_DIR "/per-module-logging.conf" +#else +#define OSM_DEFAULT_PER_MOD_LOGGING_CONF_FILE "/etc/opensm/per-module-logging.conf" +#endif /* __WIN__ */ +/***********/ + +/****d* OpenSM: OSM_DEFAULT_SWEEP_INTERVAL_SECS +* NAME +* OSM_DEFAULT_SWEEP_INTERVAL_SECS +* +* DESCRIPTION +* Specifies the default number of seconds between subnet sweeps. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SWEEP_INTERVAL_SECS 10 +/***********/ +/****d* OpenSM: OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC +* NAME +* OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC +* +* DESCRIPTION +* Specifies the default transaction timeout in milliseconds. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC 200 +/***********/ +/****d* OpenSM: OSM_DEFAULT_LONG_TRANS_TIMEOUT_MILLISEC +* NAME +* OSM_DEFAULT_LONG_TRANS_TIMEOUT_MILLISEC +* +* DESCRIPTION +* Specifies the default "long" transaction timeout in milliseconds. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_LONG_TRANS_TIMEOUT_MILLISEC 500 +/***********/ +/****d* OpenSM: OSM_DEFAULT_SUBNET_TIMEOUT +* NAME +* OSM_DEFAULT_SUBNET_TIMEOUT +* +* DESCRIPTION +* Specifies the default subnet timeout. +* timeout time = 4us * 2^timeout. +* We use here ~1sec. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SUBNET_TIMEOUT 0x12 +/***********/ +/****d* OpenSM: OSM_DEFAULT_SWITCH_PACKET_LIFE +* NAME +* OSM_DEFAULT_SWITCH_PACKET_LIFE +* +* DESCRIPTION +* Specifies the default max life time for a pcket on the switch. +* timeout time = 4us * 2^timeout. +* We use here the value of ~1sec +* A Value > 19dec disables this mechanism. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SWITCH_PACKET_LIFE 0x12 +/***********/ +/****d* OpenSM: OSM_DEFAULT_HEAD_OF_QUEUE_LIFE +* NAME +* OSM_DEFAULT_HEAD_OF_QUEUE_LIFE +* +* DESCRIPTION +* Sets the time a packet can live in the head of the VL Queue +* We use here the value of ~1sec +* A Value > 19dec disables this mechanism. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_HEAD_OF_QUEUE_LIFE 0x12 +/***********/ +/****d* OpenSM: OSM_DEFAULT_LEAF_HEAD_OF_QUEUE_LIFE +* NAME +* OSM_DEFAULT_LEAF_HEAD_OF_QUEUE_LIFE +* +* DESCRIPTION +* Sets the time a packet can live in the head of the VL Queue +* of a port that drives a CA port. +* We use here the value of ~256msec +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_LEAF_HEAD_OF_QUEUE_LIFE 0x10 +/***********/ +/****d* OpenSM: OSM_DEFAULT_VL_STALL_COUNT +* NAME +* OSM_DEFAULT_LEAF_VL_COUNT +* +* DESCRIPTION +* Sets the number of consecutive head of queue life time drops that +* puts the VL into stalled state. In stalled state, the port is supposed +* to drop everything for 8*(head of queue lifetime) +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_VL_STALL_COUNT 0x7 +/***********/ +/****d* OpenSM: OSM_DEFAULT_LEAF_VL_STALL_COUNT +* NAME +* OSM_DEFAULT_LEAF_VL_STALL_COUNT +* +* DESCRIPTION +* Sets the number of consecutive head of queue life time drops that +* puts the VL into stalled state. In stalled state, the port is supposed +* to drop everything for 8*(head of queue lifetime). This value is for +* switch ports driving a CA port. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_LEAF_VL_STALL_COUNT 0x7 +/***********/ +/****d* OpenSM: OSM_DEFAULT_TRAP_SUPPRESSION_TIMEOUT +* NAME +* OSM_DEFAULT_TRAP_SUPPRESSION_TIMEOUT +* +* DESCRIPTION +* Specifies the default timeout for ignoring same trap. +* timeout time = 5000000us +* We use here ~5sec. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_TRAP_SUPPRESSION_TIMEOUT 5000000 +#define OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT OSM_DEFAULT_TRAP_SUPPRESSION_TIMEOUT +/***********/ +/****d* OpenSM: OSM_DEFAULT_UNHEALTHY_TIMEOUT +* NAME +* OSM_DEFAULT_UNHEALTHY_TIMEOUT +* +* DESCRIPTION +* Specifies the default timeout for setting port as unhealthy. +* timeout time = 60000000us +* We use here ~60sec. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_UNHEALTHY_TIMEOUT 60000000 +/***********/ +/****d* OpenSM: OSM_DEFAULT_ERROR_THRESHOLD +* NAME +* OSM_DEFAULT_ERROR_THRESHOLD +* +* DESCRIPTION +* Specifies default link error threshold to be set by SubnSet(PortInfo). +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_ERROR_THRESHOLD 0x08 +/***********/ +/****d* OpenSM: OSM_DEFAULT_SMP_MAX_ON_WIRE +* NAME +* OSM_DEFAULT_SMP_MAX_ON_WIRE +* +* DESCRIPTION +* Specifies the default number of VL15 SMP MADs allowed on +* the wire at any one time. +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_SMP_MAX_ON_WIRE 4 +/***********/ +/****d* OpenSM: OSM_SM_DEFAULT_QP0_RCV_SIZE +* NAME +* OSM_SM_DEFAULT_QP0_RCV_SIZE +* +* DESCRIPTION +* Specifies the default size (in MADs) of the QP0 receive queue +* +* SYNOPSIS +*/ +#define OSM_SM_DEFAULT_QP0_RCV_SIZE 256 +/***********/ +/****d* OpenSM: OSM_SM_DEFAULT_QP0_SEND_SIZE +* NAME +* OSM_SM_DEFAULT_QP0_SEND_SIZE +* +* DESCRIPTION +* Specifies the default size (in MADs) of the QP0 send queue +* +* SYNOPSIS +*/ +#define OSM_SM_DEFAULT_QP0_SEND_SIZE 256 +/***********/ +/****d* OpenSM: OSM_SM_DEFAULT_QP1_RCV_SIZE +* NAME +* OSM_SM_DEFAULT_QP1_RCV_SIZE +* +* DESCRIPTION +* Specifies the default size (in MADs) of the QP1 receive queue +* +* SYNOPSIS +*/ +#define OSM_SM_DEFAULT_QP1_RCV_SIZE 256 +/***********/ +/****d* OpenSM: OSM_SM_DEFAULT_QP1_SEND_SIZE +* NAME +* OSM_SM_DEFAULT_QP1_SEND_SIZE +* +* DESCRIPTION +* Specifies the default size (in MADs) of the QP1 send queue +* +* SYNOPSIS +*/ +#define OSM_SM_DEFAULT_QP1_SEND_SIZE 256 +/****d* OpenSM: OSM_PM_DEFAULT_QP1_RCV_SIZE +* NAME +* OSM_PM_DEFAULT_QP1_RCV_SIZE +* +* DESCRIPTION +* Specifies the default size (in MADs) of the QP1 receive queue +* +* SYNOPSIS +*/ +#define OSM_PM_DEFAULT_QP1_RCV_SIZE 256 +/***********/ +/****d* OpenSM: OSM_PM_DEFAULT_QP1_SEND_SIZE +* NAME +* OSM_PM_DEFAULT_QP1_SEND_SIZE +* +* DESCRIPTION +* Specifies the default size (in MADs) of the QP1 send queue +* +* SYNOPSIS +*/ +#define OSM_PM_DEFAULT_QP1_SEND_SIZE 256 +/****d* OpenSM: OSM_SM_DEFAULT_POLLING_TIMEOUT_MILLISECS +* NAME +* OSM_SM_DEFAULT_POLLING_TIMEOUT_MILLISECS +* +* DESCRIPTION +* Specifies the polling timeout (in miliseconds) - the timeout +* between one poll to another. +* +* SYNOPSIS +*/ +#define OSM_SM_DEFAULT_POLLING_TIMEOUT_MILLISECS 10000 +/**********/ +/****d* OpenSM: OSM_SM_DEFAULT_POLLING_RETRY_NUMBER +* NAME +* OSM_SM_DEFAULT_POLLING_RETRY_NUMBER +* +* DESCRIPTION +* Specifies the number of polling retries before the SM goes back +* to DISCOVERY stage. So the default total time for handoff is 40 sec. +* +* SYNOPSIS +*/ +#define OSM_SM_DEFAULT_POLLING_RETRY_NUMBER 4 +/**********/ +/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_MTU +* Name +* OSM_DEFAULT_MGRP_MTU +* +* DESCRIPTION +* Default MTU used for new MGRP creation (2048 bytes) +* Note it includes the MTUSelector which is set to "Greater Than" +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MGRP_MTU 0x04 +/***********/ +/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_RATE +* Name +* OSM_DEFAULT_MGRP_RATE +* +* DESCRIPTION +* Default RATE used for new MGRP creation (10Gb/sec) +* Note it includes the RateSelector which is set to "Greater Than" +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MGRP_RATE 0x03 +/***********/ +/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_SCOPE +* Name +* OSM_DEFAULT_MGRP_SCOPE +* +* DESCRIPTION +* Default SCOPE used for new MGRP creation (link local) +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MGRP_SCOPE IB_MC_SCOPE_LINK_LOCAL +/***********/ +/****d* OpenSM: OSM_DEFAULT_QOS_MAX_VLS + * Name + * OSM_DEFAULT_QOS_MAX_VLS + * + * DESCRIPTION + * Default Maximum VLs used by the OpenSM. + * + * SYNOPSIS + */ +#define OSM_DEFAULT_QOS_MAX_VLS 15 +/***********/ +/****d* OpenSM: OSM_DEFAULT_QOS_HIGH_LIMIT + * Name + * OSM_DEFAULT_QOS_HIGH_LIMIT + * + * DESCRIPTION + * Default Limit of High Priority in VL Arbitration used by OpenSM. + * + * SYNOPSIS + */ +#define OSM_DEFAULT_QOS_HIGH_LIMIT 0 +/***********/ +/****d* OpenSM: OSM_DEFAULT_QOS_VLARB_HIGH + * Name + * OSM_DEFAULT_QOS_VLARB_HIGH + * + * DESCRIPTION + * Default High Priority VL Arbitration table used by the OpenSM. + * + * SYNOPSIS + */ +#define OSM_DEFAULT_QOS_VLARB_HIGH "0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0" +/***********/ +/****d* OpenSM: OSM_DEFAULT_QOS_VLARB_LOW + * Name + * OSM_DEFAULT_QOS_VLARB_LOW + * + * DESCRIPTION + * Default Low Priority VL Arbitration table used by the OpenSM. + * + * SYNOPSIS + */ +#define OSM_DEFAULT_QOS_VLARB_LOW "0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4" +/***********/ +/****d* OpenSM: OSM_DEFAULT_QOS_SL2VL + * Name + * OSM_DEFAULT_QOS_SL2VL + * + * DESCRIPTION + * Default QoS SL2VL Mapping Table used by the OpenSM. + * + * SYNOPSIS + */ +#define OSM_DEFAULT_QOS_SL2VL "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7" +/***********/ +/****d* OpenSM: OSM_NO_PATH +* NAME +* OSM_NO_PATH +* +* DESCRIPTION +* Value indicating there is no path to the given LID. +* +* SYNOPSIS +*/ +#define OSM_NO_PATH 0xFF +/**********/ +/****d* OpenSM: OSM_NODE_DESC_UNKNOWN +* NAME +* OSM_NODE_DESC_UNKNOWN +* +* DESCRIPTION +* Value indicating the Node Description is not set and is "unknown" +* +* SYNOPSIS +*/ +#define OSM_NODE_DESC_UNKNOWN "" +/**********/ +/****d* OpenSM: osm_thread_state_t +* NAME +* osm_thread_state_t +* +* DESCRIPTION +* Enumerates the possible states of worker threads, such +* as the subnet sweeper. +* +* SYNOPSIS +*/ +typedef enum _osm_thread_state { + OSM_THREAD_STATE_NONE = 0, + OSM_THREAD_STATE_INIT, + OSM_THREAD_STATE_RUN, + OSM_THREAD_STATE_EXIT +} osm_thread_state_t; +/***********/ + +/* + * OSM_CAP are from IBA 1.2.1 Table 117 and Table 188 + */ + +/****d* OpenSM: OSM_CAP_IS_TRAP_SUP +* Name +* OSM_CAP_IS_SUBN_TRAP_SUP +* +* DESCRIPTION +* Management class generates Trap() MADs +* +* SYNOPSIS +*/ +#define OSM_CAP_IS_SUBN_TRAP_SUP (1 << 0) +/***********/ + +/****d* OpenSM: OSM_CAP_IS_GET_SET_NOTICE_SUP +* Name +* OSM_CAP_IS_GET_SET_NOTICE_SUP +* +* DESCRIPTION +* Management class supports Get/Set(Notice) +* +* SYNOPSIS +*/ +#define OSM_CAP_IS_SUBN_GET_SET_NOTICE_SUP (1 << 1) +/***********/ + +/****d* OpenSM: OSM_CAP_IS_SUBN_OPT_RECS_SUP +* Name +* OSM_CAP_IS_SUBN_OPT_RECS_SUP +* +* DESCRIPTION +* Support all optional attributes except: +* MCMemberRecord, TraceRecord, MultiPathRecord +* +* SYNOPSIS +*/ +#define OSM_CAP_IS_SUBN_OPT_RECS_SUP (1 << 8) +/***********/ + +/****d* OpenSM: OSM_CAP_IS_UD_MCAST_SUP +* Name +* OSM_CAP_IS_UD_MCAST_SUP +* +* DESCRIPTION +* Multicast is supported +* +* SYNOPSIS +*/ +#define OSM_CAP_IS_UD_MCAST_SUP (1 << 9) +/***********/ + +/****d* OpenSM: OSM_CAP_IS_MULTIPATH_SUP +* Name +* OSM_CAP_IS_MULTIPATH_SUP +* +* DESCRIPTION +* MultiPathRecord and TraceRecord are supported +* +* SYNOPSIS +*/ +#define OSM_CAP_IS_MULTIPATH_SUP (1 << 10) +/***********/ + +/****d* OpenSM: OSM_CAP_IS_REINIT_SUP +* Name +* OSM_CAP_IS_REINIT_SUP +* +* DESCRIPTION +* SM/SA supports re-initialization supported +* +* SYNOPSIS +*/ +#define OSM_CAP_IS_REINIT_SUP (1 << 11) +/***********/ + +/****d* OpenSM: OSM_CAP_IS_PORT_INFO_CAPMASK_MATCH_SUPPORTED +* Name +* OSM_CAP_IS_PORT_INFO_CAPMASK_MATCH_SUPPORTED +* +* DESCRIPTION +* SM/SA supports enhanced SA PortInfoRecord searches per 1.2 Errata: +* ClassPortInfo:CapabilityMask.IsPortInfoCapMaskMatchSupported is 1, +* then the AttributeModifier of the SubnAdmGet() and SubnAdmGetTable() +* methods affects the matching behavior on the PortInfo:CapabilityMask +* component. If the high-order bit (bit 31) of the AttributeModifier +* is set to 1, matching on the CapabilityMask component will not be an +* exact bitwise match as described in . Instead, +* matching will only be performed on those bits which are set to 1 in +* the PortInfo:CapabilityMask embedded in the query. +* +* SYNOPSIS +*/ +#define OSM_CAP_IS_PORT_INFO_CAPMASK_MATCH_SUPPORTED (1 << 13) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_QOS_SUPPORTED +* Name +* OSM_CAP2_IS_QOS_SUPPORTED +* +* DESCRIPTION +* QoS is supported +* +* SYNOPSIS +*/ +#define OSM_CAP2_IS_QOS_SUPPORTED (1 << 1) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_REVERSE_PATH_PKEY_SUPPPORTED +* Name +* OSM_CAP2_IS_REVERSE_PATH_PKEY_SUPPPORTED +* +* DESCRIPTION +* Reverse path PKeys indicate in PathRecord responses +* +* SYNOPSIS +*/ +#define OSM_CAP2_IS_REVERSE_PATH_PKEY_SUPPPORTED (1 << 2) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_MCAST_TOP_SUPPORTED +* Name +* OSM_CAP2_IS_MCAST_TOP_SUPPORTED +* +* DESCRIPTION +* SwitchInfo.MulticastFDBTop is supported +* +* SYNOPSIS +*/ +#define OSM_CAP2_IS_MCAST_TOP_SUPPORTED (1 << 3) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_HIERARCHY_SUPPORTED +* Name +* +* DESCRIPTION +* Hierarchy info supported +* +* SYNOPSIS +*/ +#define OSM_CAP2_IS_HIERARCHY_SUPPORTED (1 << 4) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_ALIAS_GUIDS_SUPPORTED +* Name +* +* DESCRIPTION +* Alias GUIDs supported +* +* SYNOPSIS +*/ +#define OSM_CAP2_IS_ALIAS_GUIDS_SUPPORTED (1 << 5) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_FULL_PORTINFO_REC_SUPPORTED +* Name +* OSM_CAP2_IS_FULL_PORTINFO_REC_SUPPORTED +* +* DESCRIPTION +* Full PortInfoRecords supported +* +* SYNOPSIS +*/ +#define OSM_CAP2_IS_FULL_PORTINFO_REC_SUPPORTED (1 << 6) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_EXTENDED_SPEEDS_SUPPORTED +* Name +* OSM_CAP2_IS_EXTENDED_SPEEDS_SUPPORTED +* +* DESCRIPTION +* Extended Link Speeds supported +* +* SYNOPSIS +*/ +#define OSM_CAP2_IS_EXTENDED_SPEEDS_SUPPORTED (1 << 7) +/***********/ + +/****d* OpenSM: OSM_CAP2_IS_MULTICAST_SERVICE_RECS_SUPPORTED + * Name + * OSM_CAP2_IS_MULTICAST_SERVICE_RECS_SUPPORTED + * + * DESCRIPTION + * Multicast Service Records supported + * + * SYNOPSIS + */ +#define OSM_CAP2_IS_MULTICAST_SERVICE_RECS_SUPPORTED (1 << 8) + +/****d* OpenSM: OSM_CAP2_IS_PORT_INFO_CAPMASK2_MATCH_SUPPORTED + * Name + * OSM_CAP2_IS_PORT_INFO_CAPMASK2_MATCH_SUPPORTED + * + * DESCRIPTION + * CapMask2 matching for PortInfoRecord supported + * + * SYNOPSIS + */ +#define OSM_CAP2_IS_PORT_INFO_CAPMASK2_MATCH_SUPPORTED (1 << 10) + +/****d* OpenSM: OSM_CAP2_IS_SEND_ONLY_FULL_MEMBER_SUPPORTED + * Name + * OSM_CAP2_IS_SEND_ONLY_FULL_MEMBER_SUPPORTED + * + * DESCRIPTION + * Send only full member multicast join supported + * + * SYNOPSIS + */ +#define OSM_CAP2_IS_SEND_ONLY_FULL_MEMBER_SUPPORTED (1 << 12) + +/****d* OpenSM: OSM_CAP2_IS_LINK_WIDTH_2X_SUPPORTED + * Name + * OSM_CAP2_IS_LINK_WIDTH_2X_SUPPORTED + * + * DESCRIPTION + * 2x link widths supported + * + * SYNOPSIS + */ +#define OSM_CAP2_IS_LINK_WIDTH_2X_SUPPORTED (1 << 13) + +/****d* OpenSM: OSM_CAP2_IS_LINK_SPEED_HDR_SUPPORTED + * Name + * OSM_CAP2_IS_LINK_SPEED_HDR_SUPPORTED + * + * DESCRIPTION + * HDR link speed supported + * + * SYNOPSIS + */ +#define OSM_CAP2_IS_LINK_SPEED_HDR_SUPPORTED (1 << 15) + +/****d* OpenSM: osm_signal_t +* NAME +* osm_signal_t +* +* DESCRIPTION +* Enumerates the possible signal codes used by the OSM managers +* This cannot be an enum type, since conversion to and from +* integral types is necessary when passing signals through +* the dispatcher. +* +* SYNOPSIS +*/ +#define OSM_SIGNAL_NONE 0 +#define OSM_SIGNAL_SWEEP 1 +#define OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST 2 +#define OSM_SIGNAL_PERFMGR_SWEEP 3 +#define OSM_SIGNAL_GUID_PROCESS_REQUEST 4 +#define OSM_SIGNAL_MAX 5 + +typedef unsigned int osm_signal_t; +/***********/ + +/****d* OpenSM: osm_sm_signal_t +* NAME +* osm_sm_signal_t +* +* DESCRIPTION +* Enumerates the possible signals used by the OSM_SM_MGR +* +* SYNOPSIS +*/ +typedef enum _osm_sm_signal { + OSM_SM_SIGNAL_NONE = 0, + OSM_SM_SIGNAL_DISCOVERY_COMPLETED, + OSM_SM_SIGNAL_POLLING_TIMEOUT, + OSM_SM_SIGNAL_DISCOVER, + OSM_SM_SIGNAL_DISABLE, + OSM_SM_SIGNAL_HANDOVER, + OSM_SM_SIGNAL_HANDOVER_SENT, + OSM_SM_SIGNAL_ACKNOWLEDGE, + OSM_SM_SIGNAL_STANDBY, + OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED, + OSM_SM_SIGNAL_WAIT_FOR_HANDOVER, + OSM_SM_SIGNAL_MAX +} osm_sm_signal_t; +/***********/ + +/****d* OpenSM: MAX_GUID_FILE_LINE_LENGTH +* NAME +* MAX_GUID_FILE_LINE_LENGTH +* +* DESCRIPTION +* The maximum line number when reading guid file +* +* SYNOPSIS +*/ +#define MAX_GUID_FILE_LINE_LENGTH 120 +/**********/ + +/****d* OpenSM: VendorOUIs +* NAME +* VendorOUIs +* +* DESCRIPTION +* Known device vendor ID and GUID OUIs +* +* SYNOPSIS +*/ +#define OSM_VENDOR_ID_INTEL 0x00D0B7 +#define OSM_VENDOR_ID_MELLANOX 0x0002C9 +#define OSM_VENDOR_ID_REDSWITCH 0x000617 +#define OSM_VENDOR_ID_SILVERSTORM 0x00066A +#define OSM_VENDOR_ID_TOPSPIN 0x0005AD +#define OSM_VENDOR_ID_FUJITSU 0x00E000 +#define OSM_VENDOR_ID_FUJITSU2 0x000B5D +#define OSM_VENDOR_ID_VOLTAIRE 0x0008F1 +#define OSM_VENDOR_ID_YOTTAYOTTA 0x000453 +#define OSM_VENDOR_ID_PATHSCALE 0x001175 +#define OSM_VENDOR_ID_IBM 0x000255 +#define OSM_VENDOR_ID_DIVERGENET 0x00084E +#define OSM_VENDOR_ID_FLEXTRONICS 0x000B8C +#define OSM_VENDOR_ID_AGILENT 0x0030D3 +#define OSM_VENDOR_ID_OBSIDIAN 0x001777 +#define OSM_VENDOR_ID_BAYMICRO 0x000BC1 +#define OSM_VENDOR_ID_LSILOGIC 0x00A0B8 +#define OSM_VENDOR_ID_DDN 0x0001FF +#define OSM_VENDOR_ID_PANTA 0x001393 +#define OSM_VENDOR_ID_HP 0x001708 +#define OSM_VENDOR_ID_RIOWORKS 0x005045 +#define OSM_VENDOR_ID_SUN 0x0003BA +#define OSM_VENDOR_ID_SUN2 0x002128 +#define OSM_VENDOR_ID_3LEAFNTWKS 0x0016A1 +#define OSM_VENDOR_ID_XSIGO 0x001397 +#define OSM_VENDOR_ID_HP2 0x0018FE +#define OSM_VENDOR_ID_DELL 0x00188B +#define OSM_VENDOR_ID_SUPERMICRO 0x003048 +#define OSM_VENDOR_ID_HP3 0x0019BB +#define OSM_VENDOR_ID_HP4 0x00237D +#define OSM_VENDOR_ID_OPENIB 0x001405 +#define OSM_VENDOR_ID_IBM2 0x5CF3FC +#define OSM_VENDOR_ID_MELLANOX2 0xF45214 +#define OSM_VENDOR_ID_MELLANOX3 0x00258B +#define OSM_VENDOR_ID_MELLANOX4 0xE41D2D +#define OSM_VENDOR_ID_MELLANOX5 0x7CFE90 +#define OSM_VENDOR_ID_MELLANOX6 0xEC0D9A +#define OSM_VENDOR_ID_MELLANOX7 0x248A07 +#define OSM_VENDOR_ID_MELLANOX8 0x506B4B +#define OSM_VENDOR_ID_MELLANOX9 0x98039B +#define OSM_VENDOR_ID_BULL 0x080038 + +/* IPoIB Broadcast Defaults */ +#define OSM_IPOIB_BROADCAST_MGRP_QKEY 0x0b1b +extern const ib_gid_t osm_ipoib_broadcast_mgid; + +/**********/ + +END_C_DECLS +#endif /* _OSM_BASE_H_ */ diff --git a/include/opensm/osm_config.h.in b/include/opensm/osm_config.h.in new file mode 100644 index 0000000..e499554 --- /dev/null +++ b/include/opensm/osm_config.h.in @@ -0,0 +1,68 @@ +/* include/osm_config.h.in + * + * Defines various OpenSM configuration parameters to be used by various + * plugins and third party tools. + * + * NOTE: Defines used in header files MUST be included here to ensure plugin + * compatibility. + */ + +#ifndef _OSM_CONFIG_H_ +#define _OSM_CONFIG_H_ + +/* define 1 if OpenSM build is in a debug mode */ +#undef OSM_DEBUG +#undef _DEBUG_ + +/* Define as 1 if you want Dual Sided RMPP Support */ +#undef DUAL_SIDED_RMPP + +/* Define as 1 if you want to enable a console on a socket connection */ +#undef ENABLE_OSM_CONSOLE_SOCKET + +/* Define as 1 if you want to enable the event plugin */ +#undef ENABLE_OSM_DEFAULT_EVENT_PLUGIN + +/* Define as 1 if you want to enable the performance manager */ +#undef ENABLE_OSM_PERF_MGR + +/* Define as 1 if you want to enable the performance manager profiling code */ +#undef ENABLE_OSM_PERF_MGR_PROFILE + +/* Define a default node name map file */ +#undef HAVE_DEFAULT_NODENAME_MAP + +/* Define a default OpenSM config file */ +#undef HAVE_DEFAULT_OPENSM_CONFIG_FILE + +/* Define a Partition config file */ +#undef HAVE_DEFAULT_PARTITION_CONFIG_FILE + +/* Define a Prefix Routes config file */ +#undef HAVE_DEFAULT_PREFIX_ROUTES_FILE + +/* Define a QOS policy config file */ +#undef HAVE_DEFAULT_QOS_POLICY_FILE + +/* Define OpenSM config directory */ +#undef OPENSM_CONFIG_DIR + +/* Define as 1 for vapi vendor */ +#undef OSM_VENDOR_INTF_MTL + +/* Define as 1 for OpenIB vendor */ +#undef OSM_VENDOR_INTF_OPENIB + +/* Define as 1 for sim vendor */ +#undef OSM_VENDOR_INTF_SIM + +/* Define as 1 for ts vendor */ +#undef OSM_VENDOR_INTF_TS + +/* Define as 1 if you want Vendor RMPP Support */ +#undef VENDOR_RMPP_SUPPORT + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#undef HAVE_LIBPTHREAD + +#endif /* _OSM_CONFIG_H_ */ diff --git a/include/opensm/osm_congestion_control.h b/include/opensm/osm_congestion_control.h new file mode 100644 index 0000000..c59bf88 --- /dev/null +++ b/include/opensm/osm_congestion_control.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2012 Lawrence Livermore National Lab. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * OSM Congestion Control types and prototypes + * + * Author: + * Albert Chu, LLNL + */ + +#ifndef OSM_CONGESTION_CONTROL_H +#define OSM_CONGESTION_CONTROL_H + +#include +#include +#include +#include +#include +#include +#include + +/****s* OpenSM: Base/OSM_DEFAULT_CC_KEY + * NAME + * OSM_DEFAULT_CC_KEY + * + * DESCRIPTION + * Congestion Control Key used by OpenSM. + * + * SYNOPSIS + */ +#define OSM_DEFAULT_CC_KEY 0 + +#define OSM_CC_DEFAULT_MAX_OUTSTANDING_QUERIES 500 + +#define OSM_CC_TIMEOUT_COUNT_THRESHOLD 3 + +/****s* OpenSM: CongestionControl/osm_congestion_control_t +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +*/ +typedef struct osm_congestion_control { + struct osm_opensm *osm; + osm_subn_t *subn; + osm_sm_t *sm; + osm_log_t *log; + osm_mad_pool_t *mad_pool; + atomic32_t trans_id; + osm_vendor_t *vendor; + osm_bind_handle_t bind_handle; + cl_disp_reg_handle_t cc_disp_h; + ib_net64_t port_guid; + atomic32_t outstanding_mads; + atomic32_t outstanding_mads_on_wire; + cl_qlist_t mad_queue; + cl_spinlock_t mad_queue_lock; + cl_event_t cc_poller_wakeup; + cl_event_t outstanding_mads_done_event; + cl_event_t sig_mads_on_wire_continue; + cl_thread_t cc_poller; + osm_thread_state_t thread_state; + ib_sw_cong_setting_t sw_cong_setting; + ib_ca_cong_setting_t ca_cong_setting; + ib_cc_tbl_t cc_tbl[OSM_CCT_ENTRY_MAD_BLOCKS]; + unsigned int cc_tbl_mads; +} osm_congestion_control_t; +/* +* FIELDS +* subn +* Subnet object for this subnet. +* +* log +* Pointer to the log object. +* +* mad_pool +* Pointer to the MAD pool. +* +* mad_ctrl +* Mad Controller +*********/ + +struct osm_opensm; + +int osm_congestion_control_setup(struct osm_opensm *osm); + +int osm_congestion_control_wait_pending_transactions(struct osm_opensm *osm); + +ib_api_status_t osm_congestion_control_init(osm_congestion_control_t * p_cc, + struct osm_opensm *osm, + const osm_subn_opt_t * p_opt); + +ib_api_status_t osm_congestion_control_bind(osm_congestion_control_t * p_cc, + ib_net64_t port_guid); + +void osm_congestion_control_shutdown(osm_congestion_control_t * p_cc); + +void osm_congestion_control_destroy(osm_congestion_control_t * p_cc); + + +#endif /* ifndef OSM_CONGESTION_CONTROL_H */ diff --git a/include/opensm/osm_console.h b/include/opensm/osm_console.h new file mode 100644 index 0000000..9144ae1 --- /dev/null +++ b/include/opensm/osm_console.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2005-2007 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_CONSOLE_H_ +#define _OSM_CONSOLE_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +int osm_console(osm_opensm_t * p_osm); +END_C_DECLS +#endif /* _OSM_CONSOLE_H_ */ diff --git a/include/opensm/osm_console_io.h b/include/opensm/osm_console_io.h new file mode 100644 index 0000000..7bf1313 --- /dev/null +++ b/include/opensm/osm_console_io.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2005-2007 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +/* + * Abstract: + * Declaration of osm_console_t. + * This object represents the OpenSM Console object. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_CONSOLE_IO_H_ +#define _OSM_CONSOLE_IO_H_ + +#include +#include + +#define OSM_DISABLE_CONSOLE "off" +#define OSM_LOCAL_CONSOLE "local" +#ifdef ENABLE_OSM_CONSOLE_SOCKET +#define OSM_REMOTE_CONSOLE "socket" +#endif +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK +#define OSM_LOOPBACK_CONSOLE "loopback" +#endif +#define OSM_CONSOLE_NAME "OSM Console" + +#define OSM_DEFAULT_CONSOLE OSM_DISABLE_CONSOLE +#define OSM_DEFAULT_CONSOLE_PORT 10000 +#define OSM_DAEMON_NAME "opensm" + +#define OSM_COMMAND_PROMPT "$ " + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +typedef struct osm_console { + int socket; + int in_fd; + int out_fd; + int authorized; + FILE *in; + FILE *out; + char client_type[32]; + char client_ip[64]; + char client_hn[128]; +} osm_console_t; + +void osm_console_prompt(FILE * out); +int osm_console_init(osm_subn_opt_t * opt, osm_console_t * p_oct, osm_log_t * p_log); +void osm_console_exit(osm_console_t * p_oct, osm_log_t * p_log); +int is_console_enabled(osm_subn_opt_t *p_opt); + +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK +int cio_open(osm_console_t * p_oct, int new_fd, osm_log_t * p_log); +int cio_close(osm_console_t * p_oct, osm_log_t * p_log); +int is_authorized(osm_console_t * p_oct); +#else +#define cio_close(c, log) +#endif + +END_C_DECLS +#endif /* _OSM_CONSOLE_IO_H_ */ diff --git a/include/opensm/osm_db.h b/include/opensm/osm_db.h new file mode 100644 index 0000000..e8860f3 --- /dev/null +++ b/include/opensm/osm_db.h @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_DB_H_ +#define _OSM_DB_H_ + +/* + * Abstract: + * Declaration of the DB interface. + */ + +#include +#include + +struct osm_log; + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Database +* NAME +* Database +* +* DESCRIPTION +* The OpenSM database interface provide the means to restore persistent +* data, query, modify, delete and eventually commit it back to the +* persistent media. +* +* The interface is defined such that it can is not "data dependent": +* All keys and data items are texts. +* +* The DB implementation should be thread safe, thus callers do not need to +* provide serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Eitan Zahavi, Mellanox Technologies LTD +* +*********/ +/****s* OpenSM: Database/osm_db_domain_t +* NAME +* osm_db_domain_t +* +* DESCRIPTION +* A domain of the database. Can be viewed as a database table. +* +* The osm_db_domain_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_db_domain { + struct osm_db *p_db; + void *p_domain_imp; +} osm_db_domain_t; +/* +* FIELDS +* p_db +* Pointer to the parent database object. +* +* p_domain_imp +* Pointer to the db implementation object +* +* SEE ALSO +* osm_db_t +*********/ + +/****s* OpenSM: Database/osm_db_t +* NAME +* osm_db_t +* +* DESCRIPTION +* The main database object. +* +* The osm_db_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_db { + void *p_db_imp; + struct osm_log *p_log; + cl_list_t domains; +} osm_db_t; +/* +* FIELDS +* p_db_imp +* Pointer to the database implementation object +* +* p_log +* Pointer to the OSM logging facility +* +* domains +* List of initialize domains +* +* SEE ALSO +*********/ + +/****f* OpenSM: Database/osm_db_construct +* NAME +* osm_db_construct +* +* DESCRIPTION +* Construct a database. +* +* SYNOPSIS +*/ +void osm_db_construct(IN osm_db_t * p_db); +/* +* PARAMETERS +* p_db +* [in] Pointer to the database object to construct +* +* RETURN VALUES +* NONE +* +* SEE ALSO +* Database, osm_db_init, osm_db_destroy +*********/ + +/****f* OpenSM: Database/osm_db_destroy +* NAME +* osm_db_destroy +* +* DESCRIPTION +* Destroys the osm_db_t structure. +* +* SYNOPSIS +*/ +void osm_db_destroy(IN osm_db_t * p_db); +/* +* PARAMETERS +* p_db +* [in] Pointer to osm_db_t structure to destroy +* +* SEE ALSO +* Database, osm_db_construct, osm_db_init +*********/ + +/****f* OpenSM: Database/osm_db_init +* NAME +* osm_db_init +* +* DESCRIPTION +* Initializes the osm_db_t structure. +* +* SYNOPSIS +*/ +int osm_db_init(IN osm_db_t * p_db, IN struct osm_log * p_log); +/* +* PARAMETERS +* +* p_db +* [in] Pointer to the database object to initialize +* +* p_log +* [in] Pointer to the OSM logging facility +* +* RETURN VALUES +* 0 on success 1 otherwise +* +* SEE ALSO +* Database, osm_db_construct, osm_db_destroy +*********/ + +/****f* OpenSM: Database/osm_db_domain_init +* NAME +* osm_db_domain_init +* +* DESCRIPTION +* Initializes the osm_db_domain_t structure. +* +* SYNOPSIS +*/ +osm_db_domain_t *osm_db_domain_init(IN osm_db_t * p_db, IN const char *domain_name); +/* +* PARAMETERS +* +* p_db +* [in] Pointer to the database object to initialize +* +* domain_name +* [in] a char array with the domain name. +* +* RETURN VALUES +* pointer to the new domain object or NULL if failed. +* +* SEE ALSO +* Database, osm_db_construct, osm_db_destroy +*********/ + +/****f* OpenSM: Database/osm_db_restore +* NAME +* osm_db_restore +* +* DESCRIPTION +* Reads the entire domain from persistent storage - overrides all +* existing cached data (if any). +* +* SYNOPSIS +*/ +int osm_db_restore(IN osm_db_domain_t * p_domain); +/* +* PARAMETERS +* +* p_domain +* [in] Pointer to the database domain object to restore +* from persistent db +* +* RETURN VALUES +* 0 if successful 1 otherwize +* +* SEE ALSO +* Database, osm_db_domain_init, osm_db_clear, osm_db_store, +* osm_db_keys, osm_db_lookup, osm_db_update, osm_db_delete +*********/ + +/****f* OpenSM: Database/osm_db_clear +* NAME +* osm_db_clear +* +* DESCRIPTION +* Clears the entire domain values from/in the cache +* +* SYNOPSIS +*/ +int osm_db_clear(IN osm_db_domain_t * p_domain); +/* +* PARAMETERS +* +* p_domain +* [in] Pointer to the database domain object to clear +* +* RETURN VALUES +* 0 if successful 1 otherwize +* +* SEE ALSO +* Database, osm_db_domain_init, osm_db_restore, osm_db_store, +* osm_db_keys, osm_db_lookup, osm_db_update, osm_db_delete +*********/ + +/****f* OpenSM: Database/osm_db_store +* NAME +* osm_db_store +* +* DESCRIPTION +* Store the domain cache back to the database (commit) +* +* SYNOPSIS +*/ +int osm_db_store(IN osm_db_domain_t * p_domain, + IN boolean_t fsync_high_avail_files); +/* +* PARAMETERS +* +* p_domain +* [in] Pointer to the database domain object to restore from +* persistent db +* +* fsync_high_avail_files +* [in] Boolean that indicates whether or not to synchronize +* in-memory high availability files with storage +* +* RETURN VALUES +* 0 if successful 1 otherwize +* +* SEE ALSO +* Database, osm_db_domain_init, osm_db_restore, osm_db_clear, +* osm_db_keys, osm_db_lookup, osm_db_update, osm_db_delete +*********/ + +/****f* OpenSM: Database/osm_db_keys +* NAME +* osm_db_keys +* +* DESCRIPTION +* Retrive all keys of the domain +* +* SYNOPSIS +*/ +int osm_db_keys(IN osm_db_domain_t * p_domain, OUT cl_list_t * p_key_list); +/* +* PARAMETERS +* +* p_domain +* [in] Pointer to the database domain object +* +* p_key_list +* [out] List of key values. It should be PRE constructed and initialized. +* +* RETURN VALUES +* 0 if successful 1 otherwize +* +* NOTE: the caller needs to free and destruct the list, +* the keys returned are intrnal to the hash and should NOT be free'ed +* +* SEE ALSO +* Database, osm_db_domain_init, osm_db_restore, osm_db_clear, osm_db_store, +* osm_db_lookup, osm_db_update, osm_db_delete +*********/ + +/****f* OpenSM: Database/osm_db_lookup +* NAME +* osm_db_lookup +* +* DESCRIPTION +* Lookup an entry in the domain by the given key +* +* SYNOPSIS +*/ +/* lookup value by key */ +char *osm_db_lookup(IN osm_db_domain_t * p_domain, IN char *p_key); +/* +* PARAMETERS +* +* p_domain +* [in] Pointer to the database domain object +* +* key +* [in] The key to look for +* +* RETURN VALUES +* the value as char * or NULL if not found +* +* SEE ALSO +* Database, osm_db_domain_init, osm_db_restore, osm_db_clear, osm_db_store, +* osm_db_keys, osm_db_update, osm_db_delete +*********/ + +/****f* OpenSM: Database/osm_db_update +* NAME +* osm_db_update +* +* DESCRIPTION +* Set the value of the given key +* +* SYNOPSIS +*/ +int osm_db_update(IN osm_db_domain_t * p_domain, IN char *p_key, IN char *p_val); +/* +* PARAMETERS +* +* p_domain +* [in] Pointer to the database domain object +* +* p_key +* [in] The key to update +* +* p_val +* [in] The value to update +* +* RETURN VALUES +* 0 on success +* +* NOTE: the value will be duplicated so can be free'ed +* +* SEE ALSO +* Database, osm_db_domain_init, osm_db_restore, osm_db_clear, osm_db_store, +* osm_db_keys, osm_db_lookup, osm_db_delete +*********/ + +/****f* OpenSM: Database/osm_db_delete +* NAME +* osm_db_delete +* +* DESCRIPTION +* Delete an entry by the given key +* +* SYNOPSIS +*/ +int osm_db_delete(IN osm_db_domain_t * p_domain, IN char *p_key); +/* +* PARAMETERS +* +* p_domain +* [in] Pointer to the database domain object +* +* p_key +* [in] The key to look for +* +* RETURN VALUES +* 0 on success +* +* SEE ALSO +* Database, osm_db_domain_init, osm_db_restore, osm_db_clear, osm_db_store, +* osm_db_keys, osm_db_lookup, osm_db_update +*********/ + +END_C_DECLS +#endif /* _OSM_DB_H_ */ diff --git a/include/opensm/osm_db_pack.h b/include/opensm/osm_db_pack.h new file mode 100644 index 0000000..f2d7af2 --- /dev/null +++ b/include/opensm/osm_db_pack.h @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/****h* OpenSM/DB-Pack +* NAME +* Database Types +* +* DESCRIPTION +* This module provides packing and unpacking of the database +* storage into specific types. +* +* The following domains/conversions are supported: +* guid2lid - key is a guid and data is a lid. +* +* AUTHOR +* Eitan Zahavi, Mellanox Technologies LTD +* +*********/ + +#ifndef _OSM_DB_PACK_H_ +#define _OSM_DB_PACK_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****f* OpenSM: DB-Pack/osm_db_guid2lid_init +* NAME +* osm_db_guid2lid_init +* +* DESCRIPTION +* Initialize a domain for the guid2lid table +* +* SYNOPSIS +*/ +static inline osm_db_domain_t *osm_db_guid2lid_init(IN osm_db_t * p_db) +{ + return (osm_db_domain_init(p_db, "guid2lid")); +} + +/* +* PARAMETERS +* p_db +* [in] Pointer to the database object to construct +* +* RETURN VALUES +* The pointer to the new allocated domain object or NULL. +* +* NOTE: DB domains are destroyed by the osm_db_destroy +* +* SEE ALSO +* Database, osm_db_init, osm_db_destroy +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2lid_init +* NAME +* osm_db_guid2lid_init +* +* DESCRIPTION +* Initialize a domain for the guid2lid table +* +* SYNOPSIS +*/ +typedef struct osm_db_guid_elem { + cl_list_item_t item; + uint64_t guid; +} osm_db_guid_elem_t; +/* +* FIELDS +* item +* required for list manipulations +* +* guid +* +************/ + +/****f* OpenSM: DB-Pack/osm_db_guid2lid_guids +* NAME +* osm_db_guid2lid_guids +* +* DESCRIPTION +* Provides back a list of guid elements. +* +* SYNOPSIS +*/ +int osm_db_guid2lid_guids(IN osm_db_domain_t * p_g2l, + OUT cl_qlist_t * p_guid_list); +/* +* PARAMETERS +* p_g2l +* [in] Pointer to the guid2lid domain +* +* p_guid_list +* [out] A quick list of guid elements of type osm_db_guid_elem_t +* +* RETURN VALUES +* 0 if successful +* +* NOTE: the output qlist should be initialized and each item freed +* by the caller, then destroyed. +* +* SEE ALSO +* osm_db_guid2lid_init, osm_db_guid2lid_guids, osm_db_guid2lid_get +* osm_db_guid2lid_set, osm_db_guid2lid_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2lid_get +* NAME +* osm_db_guid2lid_get +* +* DESCRIPTION +* Get a lid range by given guid. +* +* SYNOPSIS +*/ +int osm_db_guid2lid_get(IN osm_db_domain_t * p_g2l, IN uint64_t guid, + OUT uint16_t * p_min_lid, OUT uint16_t * p_max_lid); +/* +* PARAMETERS +* p_g2l +* [in] Pointer to the guid2lid domain +* +* guid +* [in] The guid to look for +* +* p_min_lid +* [out] Pointer to the resulting min lid in host order. +* +* p_max_lid +* [out] Pointer to the resulting max lid in host order. +* +* RETURN VALUES +* 0 if successful. The lid will be set to 0 if not found. +* +* SEE ALSO +* osm_db_guid2lid_init, osm_db_guid2lid_guids +* osm_db_guid2lid_set, osm_db_guid2lid_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2lid_set +* NAME +* osm_db_guid2lid_set +* +* DESCRIPTION +* Set a lid range for the given guid. +* +* SYNOPSIS +*/ +int osm_db_guid2lid_set(IN osm_db_domain_t * p_g2l, IN uint64_t guid, + IN uint16_t min_lid, IN uint16_t max_lid); +/* +* PARAMETERS +* p_g2l +* [in] Pointer to the guid2lid domain +* +* guid +* [in] The guid to look for +* +* min_lid +* [in] The min lid value to set +* +* max_lid +* [in] The max lid value to set +* +* RETURN VALUES +* 0 if successful +* +* SEE ALSO +* osm_db_guid2lid_init, osm_db_guid2lid_guids +* osm_db_guid2lid_get, osm_db_guid2lid_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2lid_delete +* NAME +* osm_db_guid2lid_delete +* +* DESCRIPTION +* Delete the entry by the given guid +* +* SYNOPSIS +*/ +int osm_db_guid2lid_delete(IN osm_db_domain_t * p_g2l, IN uint64_t guid); +/* +* PARAMETERS +* p_g2l +* [in] Pointer to the guid2lid domain +* +* guid +* [in] The guid to look for +* +* RETURN VALUES +* 0 if successful otherwise 1 +* +* SEE ALSO +* osm_db_guid2lid_init, osm_db_guid2lid_guids +* osm_db_guid2lid_get, osm_db_guid2lid_set +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2mkey_init +* NAME +* osm_db_guid2mkey_init +* +* DESCRIPTION +* Initialize a domain for the guid2mkey table +* +* SYNOPSIS +*/ +static inline osm_db_domain_t *osm_db_guid2mkey_init(IN osm_db_t * p_db) +{ + return osm_db_domain_init(p_db, "guid2mkey"); +} + +/* +* PARAMETERS +* p_db +* [in] Pointer to the database object to construct +* +* RETURN VALUES +* The pointer to the new allocated domain object or NULL. +* +* NOTE: DB domains are destroyed by the osm_db_destroy +* +* SEE ALSO +* Database, osm_db_init, osm_db_destroy +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2mkey_guids +* NAME +* osm_db_guid2mkey_guids +* +* DESCRIPTION +* Provides back a list of guid elements. +* +* SYNOPSIS +*/ +int osm_db_guid2mkey_guids(IN osm_db_domain_t * p_g2m, + OUT cl_qlist_t * p_guid_list); +/* +* PARAMETERS +* p_g2l +* [in] Pointer to the guid2mkey domain +* +* p_guid_list +* [out] A quick list of guid elements of type osm_db_guid_elem_t +* +* RETURN VALUES +* 0 if successful +* +* NOTE: the output qlist should be initialized and each item freed +* by the caller, then destroyed. +* +* SEE ALSO +* osm_db_guid2mkey_init, osm_db_guid2mkey_guids, osm_db_guid2mkey_get +* osm_db_guid2mkey_set, osm_db_guid2mkey_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2mkey_get +* NAME +* osm_db_guid2mkey_get +* +* DESCRIPTION +* Get the mkey for the given guid. +* +* SYNOPSIS +*/ +int osm_db_guid2mkey_get(IN osm_db_domain_t * p_g2m, IN uint64_t guid, + OUT uint64_t * p_mkey); +/* +* PARAMETERS +* p_g2m +* [in] Pointer to the guid2mkey domain +* +* guid +* [in] The guid to look for +* +* p_mkey +* [out] Pointer to the resulting mkey in host order. +* +* RETURN VALUES +* 0 if successful. The lid will be set to 0 if not found. +* +* SEE ALSO +* osm_db_guid2mkey_init, osm_db_guid2mkey_guids +* osm_db_guid2mkey_set, osm_db_guid2mkey_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2mkey_set +* NAME +* osm_db_guid2mkey_set +* +* DESCRIPTION +* Set the mkey for the given guid. +* +* SYNOPSIS +*/ +int osm_db_guid2mkey_set(IN osm_db_domain_t * p_g2m, IN uint64_t guid, + IN uint64_t mkey); +/* +* PARAMETERS +* p_g2m +* [in] Pointer to the guid2mkey domain +* +* guid +* [in] The guid to look for +* +* mkey +* [in] The mkey value to set, in host order +* +* RETURN VALUES +* 0 if successful +* +* SEE ALSO +* osm_db_guid2mkey_init, osm_db_guid2mkey_guids +* osm_db_guid2mkey_get, osm_db_guid2mkey_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_guid2mkey_delete +* NAME +* osm_db_guid2mkey_delete +* +* DESCRIPTION +* Delete the entry by the given guid +* +* SYNOPSIS +*/ +int osm_db_guid2mkey_delete(IN osm_db_domain_t * p_g2m, IN uint64_t guid); +/* +* PARAMETERS +* p_g2m +* [in] Pointer to the guid2mkey domain +* +* guid +* [in] The guid to look for +* +* RETURN VALUES +* 0 if successful otherwise 1 +* +* SEE ALSO +* osm_db_guid2mkey_init, osm_db_guid2mkey_guids +* osm_db_guid2mkey_get, osm_db_guid2mkey_set +*********/ + +/****f* OpenSM: DB-Pack/osm_db_neighbor_init +* NAME +* osm_db_neighbor_init +* +* DESCRIPTION +* Initialize a domain for the neighbors table +* +* SYNOPSIS +*/ +static inline osm_db_domain_t *osm_db_neighbor_init(IN osm_db_t * p_db) +{ + return osm_db_domain_init(p_db, "neighbors"); +} + +/* +* PARAMETERS +* p_db +* [in] Pointer to the database object to construct +* +* RETURN VALUES +* The pointer to the new allocated domain object or NULL. +* +* NOTE: DB domains are destroyed by the osm_db_destroy +* +* SEE ALSO +* Database, osm_db_init, osm_db_destroy +*********/ + +/****f* OpenSM: DB-Pack/osm_db_neighbor_elem +* NAME +* osm_db_neighbor_elem +* +* DESCRIPTION +* Initialize a domain for the neighbor table +* +* SYNOPSIS +*/ +typedef struct osm_db_neighbor_elem { + cl_list_item_t item; + uint64_t guid; + uint8_t portnum; +} osm_db_neighbor_elem_t; +/* +* FIELDS +* item +* required for list manipulations +* +* guid +* portnum +* +************/ + +/****f* OpenSM: DB-Pack/osm_db_neighbor_guids +* NAME +* osm_db_neighbor_guids +* +* DESCRIPTION +* Provides back a list of neighbor elements. +* +* SYNOPSIS +*/ +int osm_db_neighbor_guids(IN osm_db_domain_t * p_neighbor, + OUT cl_qlist_t * p_guid_list); +/* +* PARAMETERS +* p_neighbor +* [in] Pointer to the neighbor domain +* +* p_guid_list +* [out] A quick list of neighbor elements of type osm_db_neighbor_elem_t +* +* RETURN VALUES +* 0 if successful +* +* NOTE: the output qlist should be initialized and each item freed +* by the caller, then destroyed. +* +* SEE ALSO +* osm_db_neighbor_init, osm_db_neighbor_guids, osm_db_neighbor_get +* osm_db_neighbor_set, osm_db_neighbor_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_neighbor_get +* NAME +* osm_db_neighbor_get +* +* DESCRIPTION +* Get a neighbor's guid by given guid/port. +* +* SYNOPSIS +*/ +int osm_db_neighbor_get(IN osm_db_domain_t * p_neighbor, IN uint64_t guid1, + IN uint8_t port1, OUT uint64_t * p_guid2, + OUT uint8_t * p_port2); +/* +* PARAMETERS +* p_neighbor +* [in] Pointer to the neighbor domain +* +* guid1 +* [in] The guid to look for +* +* port1 +* [in] The port to look for +* +* p_guid2 +* [out] Pointer to the resulting guid of the neighboring port. +* +* p_port2 +* [out] Pointer to the resulting port of the neighboring port. +* +* RETURN VALUES +* 0 if successful. The lid will be set to 0 if not found. +* +* SEE ALSO +* osm_db_neighbor_init, osm_db_neighbor_guids +* osm_db_neighbor_set, osm_db_neighbor_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_neighbor_set +* NAME +* osm_db_neighbor_set +* +* DESCRIPTION +* Set up a relationship between two ports +* +* SYNOPSIS +*/ +int osm_db_neighbor_set(IN osm_db_domain_t * p_neighbor, IN uint64_t guid1, + IN uint8_t port1, IN uint64_t guid2, IN uint8_t port2); +/* +* PARAMETERS +* p_neighbor +* [in] Pointer to the neighbor domain +* +* guid1 +* [in] The first guid in the relationship +* +* port1 +* [in] The first port in the relationship +* +* guid2 +* [in] The second guid in the relationship +* +* port2 +* [in] The second port in the relationship +* +* RETURN VALUES +* 0 if successful +* +* SEE ALSO +* osm_db_neighbor_init, osm_db_neighbor_guids +* osm_db_neighbor_get, osm_db_neighbor_delete +*********/ + +/****f* OpenSM: DB-Pack/osm_db_neighbor_delete +* NAME +* osm_db_neighbor_delete +* +* DESCRIPTION +* Delete the relationship between two ports +* +* SYNOPSIS +*/ +int osm_db_neighbor_delete(IN osm_db_domain_t * p_neighbor, + IN uint64_t guid, IN uint8_t port); +/* +* PARAMETERS +* p_neighbor +* [in] Pointer to the neighbor domain +* +* guid +* [in] The guid to look for +* +* port +* [in] The port to look for +* +* RETURN VALUES +* 0 if successful otherwise 1 +* +* SEE ALSO +* osm_db_neighbor_init, osm_db_neighbor_guids +* osm_db_neighbor_get, osm_db_neighbor_set +*********/ + +END_C_DECLS +#endif /* _OSM_DB_PACK_H_ */ diff --git a/include/opensm/osm_errors.h b/include/opensm/osm_errors.h new file mode 100644 index 0000000..aff4300 --- /dev/null +++ b/include/opensm/osm_errors.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of error code ranges for the various OpenSM modules. + */ + +#ifndef _OSM_ERRORS_H_ +#define _OSM_ERRORS_H_ + +/* + Generic Request Controller + 0100 - 01FF + + Node Info Receive Controller + 0200 - 02FF + + Generic Requester + 0300 - 03FF + + Node Info Receiver + 0400 - 04FF + + Node Description Receiver + 0500 - 05FF + + Node Description Receive Controller + 0600 - 06FF + + Port Info Receiver + 0700 - 07FF + + Port Info Receive Controller + 0800 - 08FF + + Mad Pool + 0900 - 09FF + + SM + 1000 - 10FF + + SM MAD Controller + 1100 - 11FF + + VL15 Interface + 1200 - 12FF + + Switch Info Receive Controller + 1300 - 13FF + + Switch Info Receiver + 1400 - 14FF + + State Manager + 1500 - 15FF + + State Manager Controller + 1600 - 16FF + + LID Manager + 1700 - 17FF + + Link Manager + 1800 - 18FF + + Drop Manager + 1900 - 19FF + + Linear Forwarding Receive Controller + 2000 - 20FF + + Linear Forwarding Receiver + 2100 - 21FF + + Vendor Specific + 2200 - 22FF + + SMInfo Receive Controller + 2300 - 23FF + + SMInfo Info Receiver + 2400 - 24FF + + Generic Responder + 2500 - 25FF + + Linear Forwarding Receive Controller + 2600 - 26FF + + Linear Forwarding Receiver + 2700 - 27FF + + SA MAD controller + 2800 - 28FF + + Node Record Controller + 2900 - 29FF + + PortInfo Record Controller + 3000 - 30FF + + Link Record Controller + 3100 - 31FF + + Path Record Controller + 3200 - 32FF + + SMInfo Record Controller + 3300 - 33FF + + Multicast Record Controller + 3400 - 34FF + + Unicast Manager + 3500 - 35FF + + Multicast Manager + 3600 - 36FF + + SA Response + 3700 - 37FF + + Link Record Receiver + 3800 - 38FF + + Multicast Forwarding Receive Controller + 3900 - 39FF + + Multicast Forwarding Receiver + 4000 - 40FF + + SMInfo Record Receiver + 4100 - 41FF + + PortInfo Record Receiver + 4200 - 42FF + + Service Record Receiver + 4300 - 43FF + +*/ + +#endif /* _OSM_ERRORS_H_ */ diff --git a/include/opensm/osm_event_plugin.h b/include/opensm/osm_event_plugin.h new file mode 100644 index 0000000..db5ebca --- /dev/null +++ b/include/opensm/osm_event_plugin.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2013 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 The Regents of the University of California. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_EVENT_PLUGIN_H_ +#define _OSM_EVENT_PLUGIN_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM Event plugin interface +* DESCRIPTION +* Database interface to record subnet events +* +* Implementations of this object _MUST_ be thread safe. +* +* AUTHOR +* Ira Weiny, LLNL +* +*********/ + +#define OSM_EPI_NODE_NAME_LEN (65) + +struct osm_opensm; +/** ========================================================================= + * Event types + */ +typedef enum { + OSM_EVENT_ID_PORT_ERRORS = 0, + OSM_EVENT_ID_PORT_DATA_COUNTERS, + OSM_EVENT_ID_PORT_SELECT, + OSM_EVENT_ID_TRAP, + OSM_EVENT_ID_SUBNET_UP, + OSM_EVENT_ID_HEAVY_SWEEP_START, + OSM_EVENT_ID_HEAVY_SWEEP_DONE, + OSM_EVENT_ID_UCAST_ROUTING_DONE, + OSM_EVENT_ID_STATE_CHANGE, + OSM_EVENT_ID_SA_DB_DUMPED, + OSM_EVENT_ID_LFT_CHANGE, + OSM_EVENT_ID_MAX +} osm_epi_event_id_t; + +typedef struct osm_epi_port_id { + uint64_t node_guid; + uint8_t port_num; + char node_name[OSM_EPI_NODE_NAME_LEN]; +} osm_epi_port_id_t; + +typedef enum { + LFT_CHANGED_LFT_TOP = (1 << 0), + LFT_CHANGED_BLOCK = (1 << 1) +} osm_epi_lft_change_flags_t; + +typedef enum { + UCAST_ROUTING_NONE, + UCAST_ROUTING_HEAVY_SWEEP, + UCAST_ROUTING_REROUTE +} osm_epi_ucast_routing_flags_t; + +typedef struct osm_epi_lft_change_event { + osm_switch_t *p_sw; + osm_epi_lft_change_flags_t flags; + uint16_t lft_top; + uint32_t block_num; +} osm_epi_lft_change_event_t; + +/** ========================================================================= + * Port error event + * OSM_EVENT_ID_PORT_COUNTER + * This is a difference from the last reading. NOT an absolute reading. + */ +typedef struct osm_epi_pe_event { + osm_epi_port_id_t port_id; + uint64_t symbol_err_cnt; + uint64_t link_err_recover; + uint64_t link_downed; + uint64_t rcv_err; + uint64_t rcv_rem_phys_err; + uint64_t rcv_switch_relay_err; + uint64_t xmit_discards; + uint64_t xmit_constraint_err; + uint64_t rcv_constraint_err; + uint64_t link_integrity; + uint64_t buffer_overrun; + uint64_t vl15_dropped; + uint64_t xmit_wait; + time_t time_diff_s; +} osm_epi_pe_event_t; + +/** ========================================================================= + * Port data counter event + * This is a difference from the last reading. NOT an absolute reading. + */ +typedef struct osm_epi_dc_event { + osm_epi_port_id_t port_id; + uint64_t xmit_data; + uint64_t rcv_data; + uint64_t xmit_pkts; + uint64_t rcv_pkts; + uint64_t unicast_xmit_pkts; + uint64_t unicast_rcv_pkts; + uint64_t multicast_xmit_pkts; + uint64_t multicast_rcv_pkts; + time_t time_diff_s; +} osm_epi_dc_event_t; + +/** ========================================================================= + * Port select event + * This is a difference from the last reading. NOT an absolute reading. + */ +typedef struct osm_api_ps_event { + osm_epi_port_id_t port_id; + uint64_t xmit_wait; + time_t time_diff_s; +} osm_epi_ps_event_t; + +/** ========================================================================= + * Plugin creators should allocate an object of this type + * (named OSM_EVENT_PLUGIN_IMPL_NAME) + * The version should be set to OSM_EVENT_PLUGIN_INTERFACE_VER + */ +#define OSM_EVENT_PLUGIN_IMPL_NAME "osm_event_plugin" +#define OSM_ORIG_EVENT_PLUGIN_INTERFACE_VER 1 +#define OSM_EVENT_PLUGIN_INTERFACE_VER 2 +typedef struct osm_event_plugin { + const char *osm_version; + void *(*create) (struct osm_opensm *osm); + void (*delete) (void *plugin_data); + void (*report) (void *plugin_data, osm_epi_event_id_t event_id, + void *event_data); +} osm_event_plugin_t; + +/** ========================================================================= + * The plugin structure should be considered opaque + */ +typedef struct osm_epi_plugin { + cl_list_item_t list; + void *handle; + osm_event_plugin_t *impl; + void *plugin_data; + char *plugin_name; +} osm_epi_plugin_t; + +/** + * functions + */ +osm_epi_plugin_t *osm_epi_construct(struct osm_opensm *osm, char *plugin_name); +void osm_epi_destroy(osm_epi_plugin_t * plugin); + +/** ========================================================================= + * Helper functions + */ +static inline void +osm_epi_create_port_id(osm_epi_port_id_t * port_id, uint64_t node_guid, + uint8_t port_num, char *node_name) +{ + port_id->node_guid = node_guid; + port_id->port_num = port_num; + strncpy(port_id->node_name, node_name, OSM_EPI_NODE_NAME_LEN); + port_id->node_name[OSM_EPI_NODE_NAME_LEN - 1] = '\0'; +} + +END_C_DECLS +#endif /* _OSM_EVENT_PLUGIN_H_ */ diff --git a/include/opensm/osm_file_ids.h b/include/opensm/osm_file_ids.h new file mode 100644 index 0000000..20058c1 --- /dev/null +++ b/include/opensm/osm_file_ids.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_file_ids_enum. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_FILE_ID_H_ +#define _OSM_FILE_ID_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +/****d* OpenSM: osm_file_ids_enum +* NAME +* osm_file_ids_enum +* +* DESCRIPTION +* Enumerates all FILE_IDs used for logging support. +* +* SYNOPSIS +*/ +typedef enum _osm_file_ids_enum { + OSM_FILE_MAIN_C = 0, + OSM_FILE_CONSOLE_C, + OSM_FILE_CONSOLE_IO_C, + OSM_FILE_DB_FILES_C, + OSM_FILE_DB_PACK_C, + OSM_FILE_DROP_MGR_C, + OSM_FILE_DUMP_C, + OSM_FILE_EVENT_PLUGIN_C, + OSM_FILE_GUID_INFO_RCV_C, + OSM_FILE_GUID_MGR_C, + OSM_FILE_HELPER_C, + OSM_FILE_INFORM_C, + OSM_FILE_LID_MGR_C, + OSM_FILE_LIN_FWD_RCV_C, + OSM_FILE_LINK_MGR_C, + OSM_FILE_LOG_C, + OSM_FILE_MAD_POOL_C, + OSM_FILE_MCAST_FWD_RCV_C, + OSM_FILE_MCAST_MGR_C, + OSM_FILE_MCAST_TBL_C, + OSM_FILE_MCM_PORT_C, + OSM_FILE_MESH_C, + OSM_FILE_MLNX_EXT_PORT_INFO_RCV_C, + OSM_FILE_MTREE_C, + OSM_FILE_MULTICAST_C, + OSM_FILE_NODE_C, + OSM_FILE_NODE_DESC_RCV_C, + OSM_FILE_NODE_INFO_RCV_C, + OSM_FILE_OPENSM_C, + OSM_FILE_PERFMGR_C, + OSM_FILE_PERFMGR_DB_C, + OSM_FILE_PKEY_C, + OSM_FILE_PKEY_MGR_C, + OSM_FILE_PKEY_RCV_C, + OSM_FILE_PORT_C, + OSM_FILE_PORT_INFO_RCV_C, + OSM_FILE_PRTN_C, + OSM_FILE_PRTN_CONFIG_C, + OSM_FILE_QOS_C, + OSM_FILE_QOS_PARSER_L_L, + OSM_FILE_QOS_PARSER_Y_Y, + OSM_FILE_QOS_POLICY_C, + OSM_FILE_REMOTE_SM_C, + OSM_FILE_REQ_C, + OSM_FILE_RESP_C, + OSM_FILE_ROUTER_C, + OSM_FILE_SA_C, + OSM_FILE_SA_CLASS_PORT_INFO_C, + OSM_FILE_SA_GUIDINFO_RECORD_C, + OSM_FILE_SA_INFORMINFO_C, + OSM_FILE_SA_LFT_RECORD_C, + OSM_FILE_SA_LINK_RECORD_C, + OSM_FILE_SA_MAD_CTRL_C, + OSM_FILE_SA_MCMEMBER_RECORD_C, + OSM_FILE_SA_MFT_RECORD_C, + OSM_FILE_SA_MULTIPATH_RECORD_C, + OSM_FILE_SA_NODE_RECORD_C, + OSM_FILE_SA_PATH_RECORD_C, + OSM_FILE_SA_PKEY_RECORD_C, + OSM_FILE_SA_PORTINFO_RECORD_C, + OSM_FILE_SA_SERVICE_RECORD_C, + OSM_FILE_SA_SLVL_RECORD_C, + OSM_FILE_SA_SMINFO_RECORD_C, + OSM_FILE_SA_SW_INFO_RECORD_C, + OSM_FILE_SA_VLARB_RECORD_C, + OSM_FILE_SERVICE_C, + OSM_FILE_SLVL_MAP_RCV_C, + OSM_FILE_SM_C, + OSM_FILE_SMINFO_RCV_C, + OSM_FILE_SM_MAD_CTRL_C, + OSM_FILE_SM_STATE_MGR_C, + OSM_FILE_STATE_MGR_C, + OSM_FILE_SUBNET_C, + OSM_FILE_SW_INFO_RCV_C, + OSM_FILE_SWITCH_C, + OSM_FILE_TORUS_C, + OSM_FILE_TRAP_RCV_C, + OSM_FILE_UCAST_CACHE_C, + OSM_FILE_UCAST_DNUP_C, + OSM_FILE_UCAST_FILE_C, + OSM_FILE_UCAST_FTREE_C, + OSM_FILE_UCAST_LASH_C, + OSM_FILE_UCAST_MGR_C, + OSM_FILE_UCAST_UPDN_C, + OSM_FILE_VENDOR_IBUMAD_C, + OSM_FILE_VL15INTF_C, + OSM_FILE_VL_ARB_RCV_C, + OSM_FILE_ST_C, + OSM_FILE_UCAST_DFSSSP_C, + OSM_FILE_CONGESTION_CONTROL_C, + OSM_FILE_UCAST_NUE_C, +} osm_file_ids_enum; +/***********/ + +END_C_DECLS +#endif /* _OSM_FILE_ID_H_ */ diff --git a/include/opensm/osm_guid.h b/include/opensm/osm_guid.h new file mode 100644 index 0000000..2fa5f7f --- /dev/null +++ b/include/opensm/osm_guid.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2011 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_GUID_H_ +#define _OSM_GUID_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef struct osm_guidinfo_work_obj { + cl_list_item_t list_item; + osm_port_t *p_port; + uint8_t block_num; +} osm_guidinfo_work_obj_t; + +osm_guidinfo_work_obj_t *osm_guid_work_obj_new(IN osm_port_t * p_port, + IN uint8_t block_num); + +void osm_guid_work_obj_delete(IN osm_guidinfo_work_obj_t * p_wobj); + +int osm_queue_guidinfo(IN osm_sa_t *sa, IN osm_port_t *p_port, + IN uint8_t block_num); + +END_C_DECLS +#endif /* _OSM_GUID_H_ */ diff --git a/include/opensm/osm_helper.h b/include/opensm/osm_helper.h new file mode 100644 index 0000000..9029d40 --- /dev/null +++ b/include/opensm/osm_helper.h @@ -0,0 +1,936 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_HELPER_H_ +#define _OSM_HELPER_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + * Abstract: + * Declaration of helpful functions. + */ +/****f* OpenSM: Helper/ib_get_sa_method_str + * NAME + * ib_get_sa_method_str + * + * DESCRIPTION + * Returns a string for the specified SA Method value. + * + * SYNOPSIS + */ +const char *ib_get_sa_method_str(IN uint8_t method); +/* + * PARAMETERS + * method + * [in] Network order METHOD ID value. + * + * RETURN VALUES + * Pointer to the method string. + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OpenSM: Helper/ib_get_sm_method_str +* NAME +* ib_get_sm_method_str +* +* DESCRIPTION +* Returns a string for the specified SM Method value. +* +* SYNOPSIS +*/ +const char *ib_get_sm_method_str(IN uint8_t method); +/* +* PARAMETERS +* method +* [in] Network order METHOD ID value. +* +* RETURN VALUES +* Pointer to the method string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/ib_get_sm_attr_str +* NAME +* ib_get_sm_attr_str +* +* DESCRIPTION +* Returns a string for the specified SM attribute value. +* +* SYNOPSIS +*/ +const char *ib_get_sm_attr_str(IN ib_net16_t attr); +/* +* PARAMETERS +* attr +* [in] Network order attribute ID value. +* +* RETURN VALUES +* Pointer to the attribute string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/ib_get_sa_attr_str +* NAME +* ib_get_sa_attr_str +* +* DESCRIPTION +* Returns a string for the specified SA attribute value. +* +* SYNOPSIS +*/ +const char *ib_get_sa_attr_str(IN ib_net16_t attr); +/* +* PARAMETERS +* attr +* [in] Network order attribute ID value. +* +* RETURN VALUES +* Pointer to the attribute string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/ib_get_trap_str +* NAME +* ib_get_trap_str +* +* DESCRIPTION +* Returns a name for the specified trap. +* +* SYNOPSIS +*/ +const char *ib_get_trap_str(uint16_t trap_num); +/* +* PARAMETERS +* trap_num +* [in] Network order trap number. +* +* RETURN VALUES +* Name of the trap. +* +*********/ + +extern const ib_gid_t ib_zero_gid; + +/****f* IBA Base: Types/ib_gid_is_notzero +* NAME +* ib_gid_is_notzero +* +* DESCRIPTION +* Returns a boolean indicating whether or not the GID is zero. +* +* SYNOPSIS +*/ +static inline boolean_t ib_gid_is_notzero(IN const ib_gid_t * p_gid) +{ + return memcmp(p_gid, &ib_zero_gid, sizeof(*p_gid)); +} + +/* +* PARAMETERS +* p_gid +* [in] Pointer to the GID object. +* +* RETURN VALUES +* Returns TRUE if GID is not zero. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* ib_gid_t +*********/ + +/****f* OpenSM: Helper/osm_dump_port_info +* NAME +* osm_dump_port_info +* +* DESCRIPTION +* Dumps the PortInfo attribute to the log. +* +* SYNOPSIS +*/ +void osm_dump_port_info(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_port_info_t * p_pi, + IN osm_log_level_t log_level); + +void osm_dump_port_info_v2(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_port_info_t * p_pi, + IN const int file_id, + IN osm_log_level_t log_level); + +/* +* PARAMETERS +* p_log +* [in] Pointer to the osm_log_t object +* +* node_guid +* [in] Node GUID that owns this port. +* +* port_guid +* [in] Port GUID for this port. +* +* port_num +* [in] Port number for this port. +* +* p_pi +* [in] Pointer to the PortInfo attribute +* +* log_level +* [in] Log verbosity level with which to dump the data. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/osm_dump_guid_info +* NAME +* osm_dump_guid_info +* +* DESCRIPTION +* Dumps the GUIDInfo attribute to the log. +* +* SYNOPSIS +*/ +void osm_dump_guid_info(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t block_num, + IN const ib_guid_info_t * p_gi, + IN osm_log_level_t log_level); + +void osm_dump_guid_info_v2(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t block_num, + IN const ib_guid_info_t * p_gi, + IN const int file_id, + IN osm_log_level_t log_level); +/* +* PARAMETERS +* p_log +* [in] Pointer to the osm_log_t object. +* +* node_guid +* [in] Node GUID that owns this port. +* +* port_guid +* [in] Port GUID for this port. +* +* block_num +* [in] Block number. +* +* p_gi +* [in] Pointer to the GUIDInfo attribute. +* +* log_level +* [in] Log verbosity level with which to dump the data. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +void osm_dump_mlnx_ext_port_info(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_mlnx_ext_port_info_t * p_pi, + IN osm_log_level_t log_level); + +void osm_dump_mlnx_ext_port_info_v2(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_mlnx_ext_port_info_t * p_pi, + IN const int file_id, + IN osm_log_level_t log_level); + +void osm_dump_path_record(IN osm_log_t * p_log, IN const ib_path_rec_t * p_pr, + IN osm_log_level_t log_level); + +void osm_dump_path_record_v2(IN osm_log_t * p_log, IN const ib_path_rec_t * p_pr, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_multipath_record(IN osm_log_t * p_log, + IN const ib_multipath_rec_t * p_mpr, + IN osm_log_level_t log_level); + +void osm_dump_multipath_record_v2(IN osm_log_t * p_log, + IN const ib_multipath_rec_t * p_mpr, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_node_record(IN osm_log_t * p_log, + IN const ib_node_record_t * p_nr, + IN osm_log_level_t log_level); + +void osm_dump_node_record_v2(IN osm_log_t * p_log, + IN const ib_node_record_t * p_nr, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_mc_record(IN osm_log_t * p_log, IN const ib_member_rec_t * p_mcmr, + IN osm_log_level_t log_level); + +void osm_dump_mc_record_v2(IN osm_log_t * p_log, IN const ib_member_rec_t * p_mcmr, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_link_record(IN osm_log_t * p_log, + IN const ib_link_record_t * p_lr, + IN osm_log_level_t log_level); + +void osm_dump_link_record_v2(IN osm_log_t * p_log, + IN const ib_link_record_t * p_lr, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_service_record(IN osm_log_t * p_log, + IN const ib_service_record_t * p_sr, + IN osm_log_level_t log_level); + +void osm_dump_service_record_v2(IN osm_log_t * p_log, + IN const ib_service_record_t * p_sr, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_portinfo_record(IN osm_log_t * p_log, + IN const ib_portinfo_record_t * p_pir, + IN osm_log_level_t log_level); + +void osm_dump_portinfo_record_v2(IN osm_log_t * p_log, + IN const ib_portinfo_record_t * p_pir, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_guidinfo_record(IN osm_log_t * p_log, + IN const ib_guidinfo_record_t * p_gir, + IN osm_log_level_t log_level); + +void osm_dump_guidinfo_record_v2(IN osm_log_t * p_log, + IN const ib_guidinfo_record_t * p_gir, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_inform_info(IN osm_log_t * p_log, + IN const ib_inform_info_t * p_ii, + IN osm_log_level_t log_level); + +void osm_dump_inform_info_v2(IN osm_log_t * p_log, + IN const ib_inform_info_t * p_ii, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_inform_info_record(IN osm_log_t * p_log, + IN const ib_inform_info_record_t * p_iir, + IN osm_log_level_t log_level); + +void osm_dump_inform_info_record_v2(IN osm_log_t * p_log, + IN const ib_inform_info_record_t * p_iir, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_switch_info_record(IN osm_log_t * p_log, + IN const ib_switch_info_record_t * p_sir, + IN osm_log_level_t log_level); + +void osm_dump_switch_info_record_v2(IN osm_log_t * p_log, + IN const ib_switch_info_record_t * p_sir, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_sm_info_record(IN osm_log_t * p_log, + IN const ib_sminfo_record_t * p_smir, + IN osm_log_level_t log_level); + +void osm_dump_sm_info_record_v2(IN osm_log_t * p_log, + IN const ib_sminfo_record_t * p_smir, + IN const int file_id, IN osm_log_level_t log_level); + +void osm_dump_pkey_block(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint16_t block_num, IN uint8_t port_num, + IN const ib_pkey_table_t * p_pkey_tbl, + IN osm_log_level_t log_level); + +void osm_dump_pkey_block_v2(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint16_t block_num, IN uint8_t port_num, + IN const ib_pkey_table_t * p_pkey_tbl, + IN const int file_id, + IN osm_log_level_t log_level); + +void osm_dump_slvl_map_table(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t in_port_num, IN uint8_t out_port_num, + IN const ib_slvl_table_t * p_slvl_tbl, + IN osm_log_level_t log_level); + +void osm_dump_slvl_map_table_v2(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t in_port_num, IN uint8_t out_port_num, + IN const ib_slvl_table_t * p_slvl_tbl, + IN const int file_id, + IN osm_log_level_t log_level); + + +void osm_dump_vl_arb_table(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t block_num, IN uint8_t port_num, + IN const ib_vl_arb_table_t * p_vla_tbl, + IN osm_log_level_t log_level); + +void osm_dump_vl_arb_table_v2(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t block_num, IN uint8_t port_num, + IN const ib_vl_arb_table_t * p_vla_tbl, + IN const int file_id, + IN osm_log_level_t log_level); + +/****f* OpenSM: Helper/osm_dump_port_info +* NAME +* osm_dump_port_info +* +* DESCRIPTION +* Dumps the PortInfo attribute to the log. +* +* SYNOPSIS +*/ +void osm_dump_node_info(IN osm_log_t * p_log, + IN const ib_node_info_t * p_ni, + IN osm_log_level_t log_level); + +void osm_dump_node_info_v2(IN osm_log_t * p_log, + IN const ib_node_info_t * p_ni, + IN const int file_id, + IN osm_log_level_t log_level); +/* +* PARAMETERS +* p_log +* [in] Pointer to the osm_log_t object +* +* p_ni +* [in] Pointer to the NodeInfo attribute +* +* log_level +* [in] Log verbosity level with which to dump the data. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/osm_dump_sm_info +* NAME +* osm_dump_sm_info +* +* DESCRIPTION +* Dumps the SMInfo attribute to the log. +* +* SYNOPSIS +*/ +void osm_dump_sm_info(IN osm_log_t * p_log, IN const ib_sm_info_t * p_smi, + IN osm_log_level_t log_level); + +void osm_dump_sm_info_v2(IN osm_log_t * p_log, IN const ib_sm_info_t * p_smi, + IN const int file_id, IN osm_log_level_t log_level); + +/* +* PARAMETERS +* p_log +* [in] Pointer to the osm_log_t object +* +* p_smi +* [in] Pointer to the SMInfo attribute +* +* log_level +* [in] Log verbosity level with which to dump the data. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/osm_dump_switch_info +* NAME +* osm_dump_switch_info +* +* DESCRIPTION +* Dumps the SwitchInfo attribute to the log. +* +* SYNOPSIS +*/ +void osm_dump_switch_info(IN osm_log_t * p_log, + IN const ib_switch_info_t * p_si, + IN osm_log_level_t log_level); + +void osm_dump_switch_info_v2(IN osm_log_t * p_log, + IN const ib_switch_info_t * p_si, + IN const int file_id, + IN osm_log_level_t log_level); +/* +* PARAMETERS +* p_log +* [in] Pointer to the osm_log_t object +* +* p_si +* [in] Pointer to the SwitchInfo attribute +* +* log_level +* [in] Log verbosity level with which to dump the data. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/osm_dump_notice +* NAME +* osm_dump_notice +* +* DESCRIPTION +* Dumps the Notice attribute to the log. +* +* SYNOPSIS +*/ +void osm_dump_notice(IN osm_log_t * p_log, + IN const ib_mad_notice_attr_t * p_ntci, + IN osm_log_level_t log_level); + +void osm_dump_notice_v2(IN osm_log_t * p_log, + IN const ib_mad_notice_attr_t * p_ntci, + IN const int file_id, + IN osm_log_level_t log_level); +/* +* PARAMETERS +* p_log +* [in] Pointer to the osm_log_t object +* +* p_ntci +* [in] Pointer to the Notice attribute +* +* log_level +* [in] Log verbosity level with which to dump the data. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/osm_get_disp_msg_str +* NAME +* osm_get_disp_msg_str +* +* DESCRIPTION +* Returns a string for the specified Dispatcher message. +* +* SYNOPSIS +*/ +const char *osm_get_disp_msg_str(IN cl_disp_msgid_t msg); +/* +* PARAMETERS +* msg +* [in] Dispatcher message ID value. +* +* RETURN VALUES +* Pointer to the message description string. +* +* NOTES +* +* SEE ALSO +*********/ + +void osm_dump_dr_path(IN osm_log_t * p_log, IN const osm_dr_path_t * p_path, + IN osm_log_level_t level); + +void osm_dump_dr_path_v2(IN osm_log_t * p_log, IN const osm_dr_path_t * p_path, + IN const int file_id, IN osm_log_level_t level); + + +void osm_dump_smp_dr_path(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN osm_log_level_t level); + +void osm_dump_smp_dr_path_v2(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN const int file_id, IN osm_log_level_t level); + +void osm_dump_dr_smp(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN osm_log_level_t level); + +void osm_dump_dr_smp_v2(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN const int file_id, IN osm_log_level_t level); + +void osm_dump_sa_mad(IN osm_log_t * p_log, IN const ib_sa_mad_t * p_smp, + IN osm_log_level_t level); + +void osm_dump_sa_mad_v2(IN osm_log_t * p_log, IN const ib_sa_mad_t * p_smp, + IN const int file_id, IN osm_log_level_t level); + +void osm_dump_dr_path_as_buf(IN size_t max_len, IN const osm_dr_path_t * p_path, + OUT char* buf); + + +/****f* IBA Base: Types/osm_get_sm_signal_str +* NAME +* osm_get_sm_signal_str +* +* DESCRIPTION +* Returns a string for the specified SM state. +* +* SYNOPSIS +*/ +const char *osm_get_sm_signal_str(IN osm_signal_t signal); +/* +* PARAMETERS +* state +* [in] Signal value +* +* RETURN VALUES +* Pointer to the signal description string. +* +* NOTES +* +* SEE ALSO +*********/ + +const char *osm_get_port_state_str_fixed_width(IN uint8_t port_state); + +const char *osm_get_node_type_str_fixed_width(IN uint8_t node_type); + +const char *osm_get_manufacturer_str(IN uint64_t guid_ho); + +const char *osm_get_mtu_str(IN uint8_t mtu); + +const char *osm_get_lwa_str(IN uint8_t lwa); + +const char *osm_get_lsa_str(IN uint8_t lsa, IN uint8_t lsea, IN uint8_t state, + IN uint8_t fdr10); + +/****f* IBA Base: Types/osm_get_sm_mgr_signal_str +* NAME +* osm_get_sm_mgr_signal_str +* +* DESCRIPTION +* Returns a string for the specified SM manager signal. +* +* SYNOPSIS +*/ +const char *osm_get_sm_mgr_signal_str(IN osm_sm_signal_t signal); +/* +* PARAMETERS +* signal +* [in] SM manager signal +* +* RETURN VALUES +* Pointer to the signal description string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/osm_get_sm_mgr_state_str +* NAME +* osm_get_sm_mgr_state_str +* +* DESCRIPTION +* Returns a string for the specified SM manager state. +* +* SYNOPSIS +*/ +const char *osm_get_sm_mgr_state_str(IN uint16_t state); +/* +* PARAMETERS +* state +* [in] SM manager state +* +* RETURN VALUES +* Pointer to the state description string. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_mtu_is_valid +* NAME +* ib_mtu_is_valid +* +* DESCRIPTION +* Validates encoded MTU +* +* SYNOPSIS +*/ +int ib_mtu_is_valid(IN const int mtu); +/* +* PARAMETERS +* mtu +* [in] Encoded path mtu. +* +* RETURN VALUES +* Returns an int indicating mtu is valid (1) +* or invalid (0). +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_rate_is_valid +* NAME +* ib_rate_is_valid +* +* DESCRIPTION +* Validates encoded rate +* +* SYNOPSIS +*/ +int ib_rate_is_valid(IN const int rate); +/* +* PARAMETERS +* rate +* [in] Encoded path rate. +* +* RETURN VALUES +* Returns an int indicating rate is valid (1) +* or invalid (0). +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_path_compare_rates +* NAME +* ib_path_compare_rates +* +* DESCRIPTION +* Compares the encoded values for two path rates and +* return value is based on the ordered comparison of +* the path rates (or path rate equivalents). +* +* SYNOPSIS +*/ +int ib_path_compare_rates(IN const int rate1, IN const int rate2); + +/* +* PARAMETERS +* rate1 +* [in] Encoded path rate 1. +* +* rate2 +* [in] Encoded path rate 2. +* +* RETURN VALUES +* Returns an int indicating less than (-1), equal to (0), or +* greater than (1) rate1 as compared with rate2. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_path_rate_get_prev +* NAME +* ib_path_rate_get_prev +* +* DESCRIPTION +* Obtains encoded rate for the rate previous to the one requested. +* +* SYNOPSIS +*/ +int ib_path_rate_get_prev(IN const int rate); + +/* +* PARAMETERS +* rate +* [in] Encoded path rate. +* +* RETURN VALUES +* Returns an int indicating encoded rate or +* 0 if none can be found. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_path_rate_get_next +* NAME +* ib_path_rate_get_next +* +* DESCRIPTION +* Obtains encoded rate for the rate subsequent to the one requested. +* +* SYNOPSIS +*/ +int ib_path_rate_get_next(IN const int rate); + +/* +* PARAMETERS +* rate +* [in] Encoded path rate. +* +* RETURN VALUES +* Returns an int indicating encoded rate or +* 0 if none can be found. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_path_rate_max_12xedr +* NAME +* ib_path_rate_max_12xedr +* +* DESCRIPTION +* Obtains encoded rate from the set of "original" extended +* SA rates (up through and including 300 Gbps - 12x EDR). +* +* SYNOPSIS +*/ +int ib_path_rate_max_12xedr(IN const int rate); + +/* +* PARAMETERS +* rate +* [in] Encoded path rate. +* +* RETURN VALUES +* Returns an int indicating the encoded rate +* with a maximum of 300 Gbps (12x EDR). +* For new rates (relating to 2x and HDR), the +* nearest "original" extended rate lower than +* the 2x or HDR related rate is returned. +* 0 if none can be found. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* IBA Base: Types/ib_path_rate_2x_hdr_fixups +* NAME +* ib_path_rate_2x_hdr_fixups +* +* DESCRIPTION +* Fixes encoded rate based on whether 2x link width +* and/or HDR are supported. +* +* SYNOPSIS +*/ +int ib_path_rate_2x_hdr_fixups(IN const ib_port_info_t * p_pi, + IN const int rate); + +/* +* PARAMETERS +* p_pi +* [in] Pointer to the PortInfo attribute +* rate +* [in] Encoded path rate. +* +* RETURN VALUES +* Returns an int indicating the fixed up encoded rate +* based on whether 2x link width and/or HDR are supported. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/sprint_uint8_arr +* NAME +* sprint_uint8_arr +* +* DESCRIPTION +* Create the comma-separated string of numbers +* from input array of uint8 numbers +* (e.g. "1,2,3,4") +* +* SYNOPSIS +*/ +int sprint_uint8_arr(IN char *buf, IN size_t size, + IN const uint8_t * arr, IN size_t len); + +/* +* PARAMETERS +* buf +* [in] Pointer to the output buffer +* +* size +* [in] Size of the output buffer +* +* arr +* [in] Pointer to the input array of uint8 +* +* len +* [in] Size of the input array +* +* RETURN VALUES +* Return the number of characters printed to the buffer +* +* NOTES +* +* SEE ALSO +*********/ + + +END_C_DECLS +#endif /* _OSM_HELPER_H_ */ diff --git a/include/opensm/osm_inform.h b/include/opensm/osm_inform.h new file mode 100644 index 0000000..8cefc20 --- /dev/null +++ b/include/opensm/osm_inform.h @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_inform_rec_t. + * This object represents an IBA Inform Record. + * This object is part of the OpenSM family of objects. + * + * Author: + * Eitan Zahavi, Mellanox + */ + +#ifndef _OSM_INFR_H_ +#define _OSM_INFR_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Inform Record +* NAME +* Inform Record +* +* DESCRIPTION +* The Inform record encapsulates the information needed by the +* SA to manage InformInfo registrations and sending Reports(Notice) +* when SM receives Traps for registered LIDs. +* +* The inform records is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Eitan Zahavi, Mellanox +* +*********/ +/****s* OpenSM: Inform Record/osm_infr_t +* NAME +* osm_infr_t +* +* DESCRIPTION +* Inform Record structure. +* +* The osm_infr_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_infr { + cl_list_item_t list_item; + osm_bind_handle_t h_bind; + osm_sa_t *sa; + osm_mad_addr_t report_addr; + ib_inform_info_record_t inform_record; +} osm_infr_t; +/* +* FIELDS +* list_item +* List Item for qlist linkage. Must be first element!! +* +* h_bind +* A handle of lower level mad srvc +* +* sa +* A pointer to osm_sa object +* +* report_addr +* Report address +* +* inform_record +* The Inform Info Record +* +* SEE ALSO +*********/ + +/****f* OpenSM: Inform Record/osm_infr_new +* NAME +* osm_infr_new +* +* DESCRIPTION +* Allocates and initializes a Inform Record for use. +* +* SYNOPSIS +*/ +osm_infr_t *osm_infr_new(IN const osm_infr_t * p_infr_rec); +/* +* PARAMETERS +* p_inf_rec +* [in] Pointer to IB Inform Record +* +* RETURN VALUES +* pointer to osm_infr_t structure. +* +* NOTES +* Allows calling other inform record methods. +* +* SEE ALSO +* Inform Record, osm_infr_delete +*********/ + +/****f* OpenSM: Inform Record/osm_infr_delete +* NAME +* osm_infr_delete +* +* DESCRIPTION +* Destroys and deallocates the osm_infr_t structure. +* +* SYNOPSIS +*/ +void osm_infr_delete(IN osm_infr_t * p_infr); +/* +* PARAMETERS +* p_infr +* [in] Pointer to osm_infr_t structure +* +* SEE ALSO +* Inform Record, osm_infr_new +*********/ + +/****f* OpenSM: Inform Record/osm_infr_get_by_rec +* NAME +* osm_infr_get_by_rec +* +* DESCRIPTION +* Find a matching osm_infr_t in the subnet DB by inform_info_record +* +* SYNOPSIS +*/ +osm_infr_t *osm_infr_get_by_rec(IN osm_subn_t const *p_subn, + IN osm_log_t * p_log, + IN osm_infr_t * p_infr_rec); +/* +* PARAMETERS +* p_subn +* [in] Pointer to the subnet object +* +* p_log +* [in] Pointer to the log object +* +* p_inf_rec +* [in] Pointer to an inform_info record +* +* RETURN +* The matching osm_infr_t +* SEE ALSO +* Inform Record, osm_infr_new, osm_infr_delete +*********/ + +void osm_infr_insert_to_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_infr_t * p_infr); + +void osm_infr_remove_from_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_infr_t * p_infr); + +/****f* OpenSM: Inform Record/osm_infr_remove_subscriptions +* NAME +* osm_infr_remove_subscriptions +* +* DESCRIPTION +* Remove all event subscriptions of a port +* +* SYNOPSIS +*/ +ib_api_status_t +osm_infr_remove_subscriptions(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_subn +* [in] Pointer to the subnet object +* +* p_log +* [in] Pointer to the log object +* +* port_guid +* [in] PortGUID of the subscriber that should be removed +* +* RETURN +* CL_SUCCESS if port_guid had any subscriptions being removed +* CL_NOT_FOUND if port_guid did not have any active subscriptions +* SEE ALSO +*********/ + +/****f* OpenSM: Inform Record/osm_report_notice +* NAME +* osm_report_notice +* +* DESCRIPTION +* Once a Trap was received by the osm_trap_rcv, or a Trap sourced in +* the SM was sent (Traps 64-67) this routine is called with a copy of +* the notice data. +* Given a notice attribute - compare and see if it matches the InformInfo +* Element and if it does - call the Report(Notice) for the +* target QP registered by the address stored in the InformInfo element +* +* SYNOPSIS +*/ +ib_api_status_t osm_report_notice(IN osm_log_t * p_log, IN osm_subn_t * p_subn, + IN ib_mad_notice_attr_t * p_ntc); +/* +* PARAMETERS +* p_rcv +* [in] Pointer to the trap receiver +* +* p_ntc +* [in] Pointer to a copy of the incoming trap notice attribute. +* +* RETURN +* IB_SUCCESS on good completion +* +* SEE ALSO +* Inform Record, osm_trap_rcv +*********/ + +END_C_DECLS +#endif /* _OSM_INFR_H_ */ diff --git a/include/opensm/osm_lid_mgr.h b/include/opensm/osm_lid_mgr.h new file mode 100644 index 0000000..f95a2a1 --- /dev/null +++ b/include/opensm/osm_lid_mgr.h @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_lid_mgr_t. + * This object represents the LID Manager object. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_LID_MGR_H_ +#define _OSM_LID_MGR_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#define OSM_LID_MGR_LIST_SIZE_MIN 256 +/****h* OpenSM/LID Manager +* NAME +* LID Manager +* +* DESCRIPTION +* The LID Manager object encapsulates the information +* needed to control LID assignments on the subnet. +* +* The LID Manager object is thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +struct osm_sm; +/****s* OpenSM: LID Manager/osm_lid_mgr_t +* NAME +* osm_lid_mgr_t +* +* DESCRIPTION +* LID Manager structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_lid_mgr { + struct osm_sm *sm; + osm_subn_t *p_subn; + osm_db_t *p_db; + osm_log_t *p_log; + cl_plock_t *p_lock; + osm_db_domain_t *p_g2l; + cl_qlist_t free_ranges; + boolean_t dirty; + uint8_t used_lids[IB_LID_UCAST_END_HO + 1]; +} osm_lid_mgr_t; +/* +* FIELDS +* sm +* Pointer to the SM object. +* +* p_subn +* Pointer to the Subnet object for this subnet. +* +* p_db +* Pointer to the database (persistency) object +* +* p_log +* Pointer to the log object. +* +* p_lock +* Pointer to the serializing lock. +* +* p_g2l +* Pointer to the database domain storing guid to lid mapping. +* +* free_ranges +* A list of available free lid ranges. The list is initialized +* by the code that initializes the lid assignment and is consumed +* by the procedure that finds a free range. It holds elements of +* type osm_lid_mgr_range_t +* +* dirty +* Indicates that lid table was updated +* +* used_lids +* An array of used lids. keeps track of +* existing and non existing mapping of guid->lid +* +* SEE ALSO +* LID Manager object +*********/ + +/****f* OpenSM: LID Manager/osm_lid_mgr_construct +* NAME +* osm_lid_mgr_construct +* +* DESCRIPTION +* This function constructs a LID Manager object. +* +* SYNOPSIS +*/ +void osm_lid_mgr_construct(IN osm_lid_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to a LID Manager object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows osm_lid_mgr_destroy +* +* Calling osm_lid_mgr_construct is a prerequisite to calling any other +* method except osm_lid_mgr_init. +* +* SEE ALSO +* LID Manager object, osm_lid_mgr_init, +* osm_lid_mgr_destroy +*********/ + +/****f* OpenSM: LID Manager/osm_lid_mgr_destroy +* NAME +* osm_lid_mgr_destroy +* +* DESCRIPTION +* The osm_lid_mgr_destroy function destroys the object, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_lid_mgr_destroy(IN osm_lid_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified +* LID Manager object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to +* osm_lid_mgr_construct or osm_lid_mgr_init. +* +* SEE ALSO +* LID Manager object, osm_lid_mgr_construct, +* osm_lid_mgr_init +*********/ + +/****f* OpenSM: LID Manager/osm_lid_mgr_init +* NAME +* osm_lid_mgr_init +* +* DESCRIPTION +* The osm_lid_mgr_init function initializes a +* LID Manager object for use. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_lid_mgr_init(IN osm_lid_mgr_t * p_mgr, IN struct osm_sm * sm); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to an osm_lid_mgr_t object to initialize. +* +* sm +* [in] Pointer to the SM object for this subnet. +* +* RETURN VALUES +* CL_SUCCESS if the LID Manager object was initialized +* successfully. +* +* NOTES +* Allows calling other LID Manager methods. +* +* SEE ALSO +* LID Manager object, osm_lid_mgr_construct, +* osm_lid_mgr_destroy +*********/ + +/****f* OpenSM: LID Manager/osm_lid_mgr_process_sm +* NAME +* osm_lid_mgr_process_sm +* +* DESCRIPTION +* Configures the SM's port with its designated LID values. +* +* SYNOPSIS +*/ +int osm_lid_mgr_process_sm(IN osm_lid_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to an osm_lid_mgr_t object. +* +* RETURN VALUES +* Returns 0 on success and non-zero value otherwise. +* +* NOTES +* +* SEE ALSO +* LID Manager +*********/ + +/****f* OpenSM: LID Manager/osm_lid_mgr_process_subnet +* NAME +* osm_lid_mgr_process_subnet +* +* DESCRIPTION +* Configures subnet ports (except the SM port itself) with their +* designated LID values. +* +* SYNOPSIS +*/ +int osm_lid_mgr_process_subnet(IN osm_lid_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to an osm_lid_mgr_t object. +* +* RETURN VALUES +* Returns 0 on success and non-zero value otherwise. +* +* NOTES +* +* SEE ALSO +* LID Manager +*********/ + +END_C_DECLS +#endif /* _OSM_LID_MGR_H_ */ diff --git a/include/opensm/osm_log.h b/include/opensm/osm_log.h new file mode 100644 index 0000000..61ba750 --- /dev/null +++ b/include/opensm/osm_log.h @@ -0,0 +1,592 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_log_t. + * This object represents the log file. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_LOG_H_ +#define _OSM_LOG_H_ + +#ifndef __WIN__ +#include +#endif +#include +#include +#include +#include + +#ifdef __GNUC__ +#define STRICT_OSM_LOG_FORMAT __attribute__((format(printf, 3, 4))) +#define STRICT_OSM_LOG_V2_FORMAT __attribute__((format(printf, 4, 5))) +#else +#define STRICT_OSM_LOG_FORMAT +#define STRICT_OSM_LOG_V2_FORMAT +#endif + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#define LOG_ENTRY_SIZE_MAX 4096 +#define BUF_SIZE LOG_ENTRY_SIZE_MAX +#define __func__ __FUNCTION__ +#ifdef FILE_ID +#define OSM_LOG_ENTER( OSM_LOG_PTR ) \ + osm_log_v2( OSM_LOG_PTR, OSM_LOG_FUNCS, FILE_ID, \ + "%s: [\n", __func__); +#define OSM_LOG_EXIT( OSM_LOG_PTR ) \ + osm_log_v2( OSM_LOG_PTR, OSM_LOG_FUNCS, FILE_ID, \ + "%s: ]\n", __func__); +#define OSM_LOG_IS_ACTIVE_V2( OSM_LOG_PTR, OSM_LOG_FUNCS ) \ + osm_log_is_active_v2( OSM_LOG_PTR, OSM_LOG_FUNCS, FILE_ID) +#else +#define OSM_LOG_ENTER( OSM_LOG_PTR ) \ + osm_log( OSM_LOG_PTR, OSM_LOG_FUNCS, \ + "%s: [\n", __func__); +#define OSM_LOG_EXIT( OSM_LOG_PTR ) \ + osm_log( OSM_LOG_PTR, OSM_LOG_FUNCS, \ + "%s: ]\n", __func__); +#endif + +/****h* OpenSM/Log +* NAME +* Log +* +* DESCRIPTION +* +* AUTHOR +* +*********/ +typedef uint8_t osm_log_level_t; + +#define OSM_LOG_NONE 0x00 +#define OSM_LOG_ERROR 0x01 +#define OSM_LOG_INFO 0x02 +#define OSM_LOG_VERBOSE 0x04 +#define OSM_LOG_DEBUG 0x08 +#define OSM_LOG_FUNCS 0x10 +#define OSM_LOG_FRAMES 0x20 +#define OSM_LOG_ROUTING 0x40 +#define OSM_LOG_ALL 0x7f +#define OSM_LOG_SYS 0x80 + +/* + DEFAULT - turn on ERROR and INFO only +*/ +#define OSM_LOG_DEFAULT_LEVEL OSM_LOG_ERROR | OSM_LOG_INFO + +/****s* OpenSM: Log/osm_log_t +* NAME +* osm_log_t +* +* DESCRIPTION +* +* SYNOPSIS +*/ +typedef struct osm_log { + osm_log_level_t level; + cl_spinlock_t lock; + unsigned long count; + unsigned long max_size; + boolean_t flush; + FILE *out_port; + boolean_t accum_log_file; + boolean_t daemon; + char *log_file_name; + char *log_prefix; + osm_log_level_t per_mod_log_tbl[256]; +} osm_log_t; +/*********/ + +#define OSM_LOG_MOD_NAME_MAX 32 + +/****f* OpenSM: Log/osm_get_log_per_module + * NAME + * osm_get_log_per_module + * + * DESCRIPTION + * This looks up the given file ID in the per module log table. + * NOTE: this code is not thread safe. Need to grab the lock before + * calling it. + * + * SYNOPSIS + */ +osm_log_level_t osm_get_log_per_module(IN osm_log_t * p_log, + IN const int file_id); +/* + * PARAMETERS + * p_log + * [in] Pointer to a Log object to construct. + * + * file_id + * [in] File ID for module + * + * RETURN VALUES + * The log level from the per module logging structure for this file ID. + *********/ + +/****f* OpenSM: Log/osm_set_log_per_module + * NAME + * osm_set_log_per_module + * + * DESCRIPTION + * This sets log level for the given file ID in the per module log table. + * NOTE: this code is not thread safe. Need to grab the lock before + * calling it. + * + * SYNOPSIS + */ +void osm_set_log_per_module(IN osm_log_t * p_log, IN const int file_id, + IN osm_log_level_t level); +/* + * PARAMETERS + * p_log + * [in] Pointer to a Log object to construct. + * + * file_id + * [in] File ID for module + * + * level + * [in] Log level of the module + * + * RETURN VALUES + * This function does not return a value. + *********/ + +/****f* OpenSM: Log/osm_reset_log_per_module + * NAME + * osm_reset_log_per_module + * + * DESCRIPTION + * This resets log level for the entire per module log table. + * NOTE: this code is not thread safe. Need to grab the lock before + * calling it. + * + * SYNOPSIS + */ +void osm_reset_log_per_module(IN osm_log_t * p_log); +/* + * PARAMETERS + * p_log + * [in] Pointer to a Log object to construct. + * + * RETURN VALUES + * This function does not return a value. + *********/ + +/****f* OpenSM: Log/osm_log_construct +* NAME +* osm_log_construct +* +* DESCRIPTION +* This function constructs a Log object. +* +* SYNOPSIS +*/ +static inline void osm_log_construct(IN osm_log_t * p_log) +{ + cl_spinlock_construct(&p_log->lock); +} + +/* +* PARAMETERS +* p_log +* [in] Pointer to a Log object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_log_init, osm_log_init_v2, osm_log_destroy +* +* Calling osm_log_construct is a prerequisite to calling any other +* method except osm_log_init or osm_log_init_v2. +* +* SEE ALSO +* Log object, osm_log_init, osm_log_init_v2, +* osm_log_destroy +*********/ + +/****f* OpenSM: Log/osm_log_destroy +* NAME +* osm_log_destroy +* +* DESCRIPTION +* The osm_log_destroy function destroys the object, releasing +* all resources. +* +* SYNOPSIS +*/ +static inline void osm_log_destroy(IN osm_log_t * p_log) +{ + cl_spinlock_destroy(&p_log->lock); + if (p_log->out_port != stdout) { + fclose(p_log->out_port); + p_log->out_port = stdout; + } + closelog(); +} + +/* +* PARAMETERS +* p_log +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified +* Log object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to +* osm_log_construct, osm_log_init, or osm_log_init_v2. +* +* SEE ALSO +* Log object, osm_log_construct, +* osm_log_init, osm_log_init_v2 +*********/ + +/****f* OpenSM: Log/osm_log_init_v2 +* NAME +* osm_log_init_v2 +* +* DESCRIPTION +* The osm_log_init_v2 function initializes a +* Log object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_log_init_v2(IN osm_log_t * p_log, IN boolean_t flush, + IN uint8_t log_flags, IN const char *log_file, + IN unsigned long max_size, + IN boolean_t accum_log_file); +/* +* PARAMETERS +* p_log +* [in] Pointer to the log object. +* +* flush +* [in] Set to TRUE directs the log to flush all log messages +* immediately. This severely degrades log performance, +* and is normally used for debugging only. +* +* log_flags +* [in] The log verbosity level to be used. +* +* log_file +* [in] if not NULL defines the name of the log file. Otherwise +* it is stdout. +* +* RETURN VALUES +* CL_SUCCESS if the Log object was initialized +* successfully. +* +* NOTES +* Allows calling other Log methods. +* +* SEE ALSO +* Log object, osm_log_construct, +* osm_log_destroy +*********/ + +/****f* OpenSM: Log/osm_log_reopen_file +* NAME +* osm_log_reopen_file +* +* DESCRIPTION +* The osm_log_reopen_file function reopens the log file +* +* SYNOPSIS +*/ +int osm_log_reopen_file(osm_log_t * p_log); +/* +* PARAMETERS +* p_log +* [in] Pointer to the log object. +* +* RETURN VALUES +* 0 on success or nonzero value otherwise. +*********/ + +/****f* OpenSM: Log/osm_log_init +* NAME +* osm_log_init +* +* DESCRIPTION +* The osm_log_init function initializes a +* Log object for use. It is a wrapper for osm_log_init_v2(). +* +* SYNOPSIS +*/ +ib_api_status_t osm_log_init(IN osm_log_t * p_log, IN boolean_t flush, + IN uint8_t log_flags, IN const char *log_file, + IN boolean_t accum_log_file); +/* + * Same as osm_log_init_v2() but without max_size parameter + */ + +void osm_log(IN osm_log_t * p_log, IN osm_log_level_t verbosity, + IN const char *p_str, ...) STRICT_OSM_LOG_FORMAT; + +void osm_log_v2(IN osm_log_t * p_log, IN osm_log_level_t verbosity, + IN const int file_id, IN const char *p_str, ...) STRICT_OSM_LOG_V2_FORMAT; + +/****f* OpenSM: Log/osm_log_get_level +* NAME +* osm_log_get_level +* +* DESCRIPTION +* Returns the current log level. +* +* SYNOPSIS +*/ +static inline osm_log_level_t osm_log_get_level(IN const osm_log_t * p_log) +{ + return p_log->level; +} + +/* +* PARAMETERS +* p_log +* [in] Pointer to the log object. +* +* RETURN VALUES +* Returns the current log level. +* +* NOTES +* +* SEE ALSO +* Log object, osm_log_construct, +* osm_log_destroy +*********/ + +/****f* OpenSM: Log/osm_log_set_level +* NAME +* osm_log_set_level +* +* DESCRIPTION +* Sets the current log level. +* +* SYNOPSIS +*/ +static inline void osm_log_set_level(IN osm_log_t * p_log, + IN osm_log_level_t level) +{ + p_log->level = level; + osm_log(p_log, OSM_LOG_ALL, "Setting log level to: 0x%02x\n", level); +} + +/* +* PARAMETERS +* p_log +* [in] Pointer to the log object. +* +* level +* [in] New level to set. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* Log object, osm_log_construct, +* osm_log_destroy +*********/ + +/****f* OpenSM: Log/osm_log_is_active +* NAME +* osm_log_is_active +* +* DESCRIPTION +* Returns TRUE if the specified log level would be logged. +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t osm_log_is_active(IN const osm_log_t * p_log, + IN osm_log_level_t level) +{ + return ((p_log->level & level) != 0); +} + +/* +* PARAMETERS +* p_log +* [in] Pointer to the log object. +* +* level +* [in] Level to check. +* +* RETURN VALUES +* Returns TRUE if the specified log level would be logged. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Log object, osm_log_construct, +* osm_log_destroy +*********/ + +static inline boolean_t osm_log_is_active_v2(IN const osm_log_t * p_log, + IN osm_log_level_t level, + IN const int file_id) +{ + if ((p_log->level & level) != 0) + return 1; + if ((level & p_log->per_mod_log_tbl[file_id])) + return 1; + return 0; +} + +extern void osm_log_msg_box(osm_log_t *log, osm_log_level_t level, + const char *func_name, const char *msg); +extern void osm_log_msg_box_v2(osm_log_t *log, osm_log_level_t level, + const int file_id, const char *func_name, + const char *msg); +extern void osm_log_raw(IN osm_log_t * p_log, IN osm_log_level_t verbosity, + IN const char *p_buf); + +#ifdef FILE_ID +#define OSM_LOG(log, level, fmt, ...) do { \ + if (osm_log_is_active_v2(log, (level), FILE_ID)) \ + osm_log_v2(log, level, FILE_ID, "%s: " fmt, __func__, ## __VA_ARGS__); \ + } while (0) + +#define OSM_LOG_MSG_BOX(log, level, msg) \ + osm_log_msg_box_v2(log, level, FILE_ID, __func__, msg) +#else +#define OSM_LOG(log, level, fmt, ...) do { \ + if (osm_log_is_active(log, (level))) \ + osm_log(log, level, "%s: " fmt, __func__, ## __VA_ARGS__); \ + } while (0) + +#define OSM_LOG_MSG_BOX(log, level, msg) \ + osm_log_msg_box(log, level, __func__, msg) +#endif + +#define DBG_CL_LOCK 0 + +#define CL_PLOCK_EXCL_ACQUIRE( __exp__ ) \ +{ \ + if (DBG_CL_LOCK) \ + printf("cl_plock_excl_acquire: Acquiring %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ + cl_plock_excl_acquire( __exp__ ); \ + if (DBG_CL_LOCK) \ + printf("cl_plock_excl_acquire: Acquired %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ +} + +#define CL_PLOCK_ACQUIRE( __exp__ ) \ +{ \ + if (DBG_CL_LOCK) \ + printf("cl_plock_acquire: Acquiring %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ + cl_plock_acquire( __exp__ ); \ + if (DBG_CL_LOCK) \ + printf("cl_plock_acquire: Acquired %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ +} + +#define CL_PLOCK_RELEASE( __exp__ ) \ +{ \ + if (DBG_CL_LOCK) \ + printf("cl_plock_release: Releasing %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ + cl_plock_release( __exp__ ); \ + if (DBG_CL_LOCK) \ + printf("cl_plock_release: Released %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ +} + +#define DBG_CL_SPINLOCK 0 +#define CL_SPINLOCK_RELEASE( __exp__ ) \ +{ \ + if (DBG_CL_SPINLOCK) \ + printf("cl_spinlock_release: Releasing %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ + cl_spinlock_release( __exp__ ); \ + if (DBG_CL_SPINLOCK) \ + printf("cl_spinlock_release: Released %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ +} + +#define CL_SPINLOCK_ACQUIRE( __exp__ ) \ +{ \ + if (DBG_CL_SPINLOCK) \ + printf("cl_spinlock_acquire: Acquiring %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ + cl_spinlock_acquire( __exp__ ); \ + if (DBG_CL_SPINLOCK) \ + printf("cl_spinlock_acquire: Acquired %p file %s, line %d\n", \ + __exp__,__FILE__, __LINE__); \ +} + +/****f* OpenSM: Helper/osm_is_debug +* NAME +* osm_is_debug +* +* DESCRIPTION +* The osm_is_debug function returns TRUE if the opensm was compiled +* in debug mode, and FALSE otherwise. +* +* SYNOPSIS +*/ +boolean_t osm_is_debug(void); +/* +* PARAMETERS +* None +* +* RETURN VALUE +* TRUE if compiled in debug version. FALSE otherwise. +* +* NOTES +* +*********/ + +END_C_DECLS +#endif /* _OSM_LOG_H_ */ diff --git a/include/opensm/osm_mad_pool.h b/include/opensm/osm_mad_pool.h new file mode 100644 index 0000000..ccfdd6e --- /dev/null +++ b/include/opensm/osm_mad_pool.h @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mad_pool_t. + * This object represents a pool of management datagram (MAD) objects. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_MAD_POOL_H_ +#define _OSM_MAD_POOL_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/MAD Pool +* NAME +* MAD Pool +* +* DESCRIPTION +* The MAD Pool encapsulates the information needed by the +* OpenSM to manage a pool of MAD objects. The OpenSM allocates +* one MAD Pool per IBA subnet. +* +* The MAD Pool is thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: MAD Pool/osm_mad_pool_t +* NAME +* osm_mad_pool_t +* +* DESCRIPTION +* MAD Pool structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_mad_pool { + atomic32_t mads_out; +} osm_mad_pool_t; +/* +* FIELDS +* mads_out +* Running total of the number of MADs outstanding. +* +* SEE ALSO +* MAD Pool +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_construct +* NAME +* osm_mad_pool_construct +* +* DESCRIPTION +* This function constructs a MAD Pool. +* +* SYNOPSIS +*/ +void osm_mad_pool_construct(IN osm_mad_pool_t * p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a MAD Pool to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_mad_pool_init, osm_mad_pool_destroy +* +* Calling osm_mad_pool_construct is a prerequisite to calling any other +* method except osm_mad_pool_init. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_init, osm_mad_pool_destroy +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_destroy +* NAME +* osm_mad_pool_destroy +* +* DESCRIPTION +* The osm_mad_pool_destroy function destroys a node, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_mad_pool_destroy(IN osm_mad_pool_t * p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to a MAD Pool to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified MAD Pool. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_mad_pool_construct or +* osm_mad_pool_init. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_construct, osm_mad_pool_init +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_init +* NAME +* osm_mad_pool_init +* +* DESCRIPTION +* The osm_mad_pool_init function initializes a MAD Pool for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_mad_pool_init(IN osm_mad_pool_t * p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to an osm_mad_pool_t object to initialize. +* +* RETURN VALUES +* CL_SUCCESS if the MAD Pool was initialized successfully. +* +* NOTES +* Allows calling other MAD Pool methods. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_construct, osm_mad_pool_destroy +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_get +* NAME +* osm_mad_pool_get +* +* DESCRIPTION +* Gets a MAD wrapper and wire MAD from the pool. +* +* SYNOPSIS +*/ +osm_madw_t *osm_mad_pool_get(IN osm_mad_pool_t * p_pool, + IN osm_bind_handle_t h_bind, + IN uint32_t total_size, + IN const osm_mad_addr_t * p_mad_addr); +/* +* PARAMETERS +* p_pool +* [in] Pointer to an osm_mad_pool_t object. +* +* h_bind +* [in] Handle returned from osm_vendor_bind() call to the +* port over which this mad will be sent. +* +* total_size +* [in] Total size, including MAD header of the requested MAD. +* +* p_mad_addr +* [in] Pointer to the MAD address structure. This parameter +* may be NULL for directed route MADs. +* +* RETURN VALUES +* Returns a pointer to a MAD wrapper containing the MAD. +* A return value of NULL means no MADs are available. +* +* NOTES +* The MAD must eventually be returned to the pool with a call to +* osm_mad_pool_put. +* +* The osm_mad_pool_construct or osm_mad_pool_init must be called before +* using this function. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_put +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_put +* NAME +* osm_mad_pool_put +* +* DESCRIPTION +* Returns a MAD to the pool. +* +* SYNOPSIS +*/ +void osm_mad_pool_put(IN osm_mad_pool_t * p_pool, IN osm_madw_t * p_madw); +/* +* PARAMETERS +* p_pool +* [in] Pointer to an osm_mad_pool_t object. +* +* p_madw +* [in] Pointer to a MAD Wrapper for a MAD that was previously +* retrieved from the pool. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* The osm_mad_pool_construct or osm_mad_pool_init must be called before +* using this function. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_get +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_get_wrapper +* NAME +* osm_mad_pool_get_wrapper +* +* DESCRIPTION +* Gets a only MAD wrapper from the pool (no wire MAD). +* +* SYNOPSIS +*/ +osm_madw_t *osm_mad_pool_get_wrapper(IN osm_mad_pool_t * p_pool, + IN osm_bind_handle_t h_bind, + IN uint32_t total_size, + IN const ib_mad_t * p_mad, + IN const osm_mad_addr_t * p_mad_addr); +/* +* PARAMETERS +* p_pool +* [in] Pointer to an osm_mad_pool_t object. +* +* h_bind +* [in] Handle returned from osm_vendor_bind() call to the +* port for which this mad wrapper will be used. +* +* total_size +* [in] Total size, including MAD header of the MAD that will +* be attached to this wrapper. +* +* p_mad +* [in] Pointer to the MAD to attach to this wrapper. +* +* p_mad_addr +* [in] Pointer to the MAD address structure. This parameter +* may be NULL for directed route MADs. +* +* RETURN VALUES +* Returns a pointer to a MAD wrapper. +* A return value of NULL means no MAD wrappers are available. +* +* NOTES +* The MAD must eventually be returned to the pool with a call to +* osm_mad_pool_put. +* +* The osm_mad_pool_construct or osm_mad_pool_init must be called before +* using this function. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_put +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_get_wrapper_raw +* NAME +* osm_mad_pool_get_wrapper_raw +* +* DESCRIPTION +* Gets a only an uninitialized MAD wrapper from the pool (no wire MAD). +* +* SYNOPSIS +*/ +osm_madw_t *osm_mad_pool_get_wrapper_raw(IN osm_mad_pool_t * p_pool); +/* +* PARAMETERS +* p_pool +* [in] Pointer to an osm_mad_pool_t object. +* +* RETURN VALUES +* Returns a pointer to a MAD wrapper. +* A return value of NULL means no MAD wrappers are available. +* +* NOTES +* The MAD must eventually be returned to the pool with a call to +* osm_mad_pool_put. +* +* The osm_mad_pool_construct or osm_mad_pool_init must be called before +* using this function. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_put +*********/ + +/****f* OpenSM: MAD Pool/osm_mad_pool_get_outstanding +* NAME +* osm_mad_pool_get_count +* +* DESCRIPTION +* Returns the running count of MADs currently outstanding from the pool. +* +* SYNOPSIS +*/ +static inline uint32_t +osm_mad_pool_get_outstanding(IN const osm_mad_pool_t * p_pool) +{ + return p_pool->mads_out; +} + +/* +* PARAMETERS +* p_pool +* [in] Pointer to an osm_mad_pool_t object. +* +* RETURN VALUES +* Returns the running count of MADs currently outstanding from the pool. +* +* NOTES +* The osm_mad_pool_construct or osm_mad_pool_init must be called before +* using this function. +* +* SEE ALSO +* MAD Pool, osm_mad_pool_get +*********/ + +END_C_DECLS +#endif /* _OSM_MAD_POOL_H_ */ diff --git a/include/opensm/osm_madw.h b/include/opensm/osm_madw.h new file mode 100644 index 0000000..9ecd51c --- /dev/null +++ b/include/opensm/osm_madw.h @@ -0,0 +1,1212 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mad_wrapper_t. + * This object represents the context wrapper for OpenSM MAD processing. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_MADW_H_ +#define _OSM_MADW_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****s* OpenSM: MAD Wrapper/osm_bind_info_t +* NAME +* osm_bind_info_t +* +* DESCRIPTION +* +* SYNOPSIS +*/ +typedef struct osm_bind_info { + ib_net64_t port_guid; + uint8_t mad_class; + uint8_t class_version; + boolean_t is_responder; + boolean_t is_trap_processor; + boolean_t is_report_processor; + uint32_t send_q_size; + uint32_t recv_q_size; + uint32_t timeout; + uint32_t retries; +} osm_bind_info_t; +/* +* FIELDS +* portguid +* PortGuid of local port +* +* mad_class +* Mgmt Class ID +* +* class_version +* Mgmt Class version +* +* is_responder +* True if this is a GSI Agent +* +* is_trap_processor +* True if GSI Trap msgs are handled +* +* is_report_processor +* True if GSI Report msgs are handled +* +* send_q_size +* SendQueueSize +* +* recv_q_size +* Receive Queue Size +* +* timeout +* Transaction timeout +* +* retries +* Number of retries for transaction +* +* SEE ALSO +*********/ + +/****h* OpenSM/MAD Wrapper +* NAME +* MAD Wrapper +* +* DESCRIPTION +* The MAD Wrapper object encapsulates the information needed by the +* OpenSM to manage individual MADs. The OpenSM allocates one MAD Wrapper +* per MAD. +* +* The MAD Wrapper is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ + +/****s* OpenSM: MAD Wrapper/osm_ni_context_t +* NAME +* osm_ni_context_t +* +* DESCRIPTION +* Context needed by recipient of NodeInfo attribute. +* +* SYNOPSIS +*/ +typedef struct osm_ni_context { + ib_net64_t node_guid; + uint8_t port_num; + ib_net64_t dup_node_guid; + uint8_t dup_port_num; + unsigned dup_count; +} osm_ni_context_t; +/* +* FIELDS +* p_node +* Pointer to the node thru which we got to this node. +* +* p_sw +* Pointer to the switch object (if any) of the switch +* thru which we got to this node. +* +* port_num +* Port number on the node or switch thru which we got +* to this node. +* +* SEE ALSO +*********/ + +/****s* OpenSM: MAD Wrapper/osm_pi_context_t +* NAME +* osm_pi_context_t +* +* DESCRIPTION +* Context needed by recipient of PortInfo attribute. +* +* SYNOPSIS +*/ +typedef struct osm_pi_context { + ib_net64_t node_guid; + ib_net64_t port_guid; + boolean_t set_method; + boolean_t light_sweep; + boolean_t active_transition; + boolean_t client_rereg; +} osm_pi_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_gi_context_t +* NAME +* osm_gi_context_t +* +* DESCRIPTION +* Context needed by recipient of GUIDInfo attribute. +* +* SYNOPSIS +*/ +typedef struct osm_gi_context { + ib_net64_t node_guid; + ib_net64_t port_guid; + boolean_t set_method; + uint8_t port_num; +} osm_gi_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_nd_context_t +* NAME +* osm_nd_context_t +* +* DESCRIPTION +* Context needed by recipient of NodeDescription attribute. +* +* SYNOPSIS +*/ +typedef struct osm_nd_context { + ib_net64_t node_guid; +} osm_nd_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_si_context_t +* NAME +* osm_si_context_t +* +* DESCRIPTION +* Context needed by recipient of SwitchInfo attribute. +* +* SYNOPSIS +*/ +typedef struct osm_si_context { + ib_net64_t node_guid; + boolean_t set_method; + boolean_t light_sweep; + boolean_t lft_top_change; +} osm_si_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_lft_context_t +* NAME +* osm_lft_context_t +* +* DESCRIPTION +* Context needed by recipient of LinearForwardingTable attribute. +* +* SYNOPSIS +*/ +typedef struct osm_lft_context { + ib_net64_t node_guid; + boolean_t set_method; +} osm_lft_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_mft_context_t +* NAME +* osm_mft_context_t +* +* DESCRIPTION +* Context needed by recipient of MulticastForwardingTable attribute. +* +* SYNOPSIS +*/ +typedef struct osm_mft_context { + ib_net64_t node_guid; + boolean_t set_method; +} osm_mft_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_smi_context_t +* NAME +* osm_smi_context_t +* +* DESCRIPTION +* Context needed by recipient of SMInfo attribute. +* +* SYNOPSIS +*/ +typedef struct osm_smi_context { + ib_net64_t port_guid; + boolean_t set_method; + boolean_t light_sweep; +} osm_smi_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_pkey_context_t +* NAME +* osm_pkey_context_t +* +* DESCRIPTION +* Context needed by recipient of P_Key attribute. +* +* SYNOPSIS +*/ +typedef struct osm_pkey_context { + ib_net64_t node_guid; + ib_net64_t port_guid; + boolean_t set_method; +} osm_pkey_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_slvl_context_t +* NAME +* osm_slvl_context_t +* +* DESCRIPTION +* Context needed by recipient of PortInfo attribute. +* +* SYNOPSIS +*/ +typedef struct osm_slvl_context { + ib_net64_t node_guid; + ib_net64_t port_guid; + boolean_t set_method; +} osm_slvl_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_vla_context_t +* NAME +* osm_vla_context_t +* +* DESCRIPTION +* Context needed by recipient of VL Arb attribute. +* +* SYNOPSIS +*/ +typedef struct osm_vla_context { + ib_net64_t node_guid; + ib_net64_t port_guid; + boolean_t set_method; +} osm_vla_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_perfmgr_context_t +* DESCRIPTION +* Context for Performance manager queries +*/ +typedef struct osm_perfmgr_context { + uint64_t node_guid; + uint16_t port; + uint8_t mad_method; /* was this a get or a set */ + ib_net16_t mad_attr_id; +#ifdef ENABLE_OSM_PERF_MGR_PROFILE + struct timeval query_start; +#endif +} osm_perfmgr_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_cc_context_t +* DESCRIPTION +* Context for Congestion Control MADs +*/ +typedef struct osm_cc_context { + ib_net64_t node_guid; + ib_net64_t port_guid; + uint8_t port; + uint8_t mad_method; /* was this a get or a set */ + ib_net32_t attr_mod; +} osm_cc_context_t; +/*********/ + +#ifndef OSM_VENDOR_INTF_OPENIB +/****s* OpenSM: MAD Wrapper/osm_arbitrary_context_t +* NAME +* osm_arbitrary_context_t +* +* DESCRIPTION +* Context needed by arbitrary recipient. +* +* SYNOPSIS +*/ +typedef struct osm_arbitrary_context { + void *context1; + void *context2; +} osm_arbitrary_context_t; +/*********/ +#endif + +/****s* OpenSM: MAD Wrapper/osm_madw_context_t +* NAME +* osm_madw_context_t +* +* DESCRIPTION +* Context needed by recipients of MAD responses. +* +* SYNOPSIS +*/ +typedef union _osm_madw_context { + osm_ni_context_t ni_context; + osm_pi_context_t pi_context; + osm_gi_context_t gi_context; + osm_nd_context_t nd_context; + osm_si_context_t si_context; + osm_lft_context_t lft_context; + osm_mft_context_t mft_context; + osm_smi_context_t smi_context; + osm_slvl_context_t slvl_context; + osm_pkey_context_t pkey_context; + osm_vla_context_t vla_context; + osm_perfmgr_context_t perfmgr_context; + osm_cc_context_t cc_context; +#ifndef OSM_VENDOR_INTF_OPENIB + osm_arbitrary_context_t arb_context; +#endif +} osm_madw_context_t; +/*********/ + +/****s* OpenSM: MAD Wrapper/osm_mad_addr_t +* NAME +* osm_mad_addr_t +* +* DESCRIPTION +* +* SYNOPSIS +*/ +typedef struct osm_mad_addr { + ib_net16_t dest_lid; + uint8_t path_bits; + uint8_t static_rate; + union addr_type { + struct _smi { + ib_net16_t source_lid; + uint8_t port_num; + } smi; + + struct _gsi { + ib_net32_t remote_qp; + ib_net32_t remote_qkey; + uint16_t pkey_ix; + uint8_t service_level; + boolean_t global_route; + ib_grh_t grh_info; + } gsi; + } addr_type; +} osm_mad_addr_t; +/* +* FIELDS +* +* SEE ALSO +*********/ + +/****s* OpenSM: MAD Wrapper/osm_madw_t +* NAME +* osm_madw_t +* +* DESCRIPTION +* Context needed for processing individual MADs +* +* SYNOPSIS +*/ +typedef struct osm_madw { + cl_list_item_t list_item; + osm_bind_handle_t h_bind; + osm_vend_wrap_t vend_wrap; + osm_mad_addr_t mad_addr; + osm_madw_context_t context; + uint32_t mad_size; + ib_api_status_t status; + cl_disp_msgid_t fail_msg; + boolean_t resp_expected; + uint32_t timeout; + const ib_mad_t *p_mad; +} osm_madw_t; +/* +* FIELDS +* list_item +* List linkage for lists. MUST BE FIRST MEMBER! +* +* h_bind +* Bind handle for the port on which this MAD will be sent +* or was received. +* +* vend_wrap +* Transport vendor specific context. This structure is not +* used outside MAD transport vendor specific code. +* +* context +* Union of controller specific contexts needed for this MAD. +* This structure allows controllers to indirectly communicate +* with each other through the dispatcher. +* +* mad_size +* Size of this MAD in bytes. +* +* status +* Status of completed operation on the MAD. +* CL_SUCCESS if the operation was successful. +* +* fail_msg +* Dispatcher message with which to post this MAD on failure. +* This value is set by the originator of the MAD. +* If an operation on this MAD fails, for example due to a timeout, +* then the transport layer will dispose of the MAD by sending +* it through the Dispatcher with this message type. Presumably, +* there is a controller listening for the failure message that can +* properly clean up. +* +* resp_expected +* TRUE if a response is expected to this MAD. +* FALSE otherwise. +* +* timeout +* Transaction timeout in msec. +* +* p_mad +* Pointer to the wire MAD. The MAD itself cannot be part of the +* wrapper, since wire MADs typically reside in special memory +* registered with the local HCA. +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_init +* NAME +* osm_madw_init +* +* DESCRIPTION +* Initializes a MAD Wrapper object for use. +* +* SYNOPSIS +*/ +static inline void osm_madw_init(IN osm_madw_t * p_madw, + IN osm_bind_handle_t h_bind, + IN uint32_t mad_size, + IN const osm_mad_addr_t * p_mad_addr) +{ + memset(p_madw, 0, sizeof(*p_madw)); + p_madw->h_bind = h_bind; + p_madw->fail_msg = CL_DISP_MSGID_NONE; + p_madw->mad_size = mad_size; + if (p_mad_addr) + p_madw->mad_addr = *p_mad_addr; + p_madw->resp_expected = FALSE; +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object to initialize. +* +* h_bind +* [in] Pointer to the wire MAD. +* +* p_mad_addr +* [in] Pointer to the MAD address structure. This parameter may +* be NULL for directed route MADs. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_smp_ptr +* NAME +* osm_madw_get_smp_ptr +* +* DESCRIPTION +* Gets a pointer to the SMP in this MAD. +* +* SYNOPSIS +*/ +static inline ib_smp_t *osm_madw_get_smp_ptr(IN const osm_madw_t * p_madw) +{ + return ((ib_smp_t *) p_madw->p_mad); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object to initialize. +* +* RETURN VALUES +* Pointer to the start of the SMP MAD. +* +* NOTES +* +* SEE ALSO +* MAD Wrapper object +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_sa_mad_ptr +* NAME +* osm_madw_get_sa_mad_ptr +* +* DESCRIPTION +* Gets a pointer to the SA MAD in this MAD wrapper. +* +* SYNOPSIS +*/ +static inline ib_sa_mad_t *osm_madw_get_sa_mad_ptr(IN const osm_madw_t * p_madw) +{ + return ((ib_sa_mad_t *) p_madw->p_mad); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the SA MAD. +* +* NOTES +* +* SEE ALSO +* MAD Wrapper object +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_perfmgt_mad_ptr +* DESCRIPTION +* Gets a pointer to the PerfMgt MAD in this MAD wrapper. +* +* SYNOPSIS +*/ +static inline ib_perfmgt_mad_t *osm_madw_get_perfmgt_mad_ptr(IN const osm_madw_t + * p_madw) +{ + return ((ib_perfmgt_mad_t *) p_madw->p_mad); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the PerfMgt MAD. +* +* NOTES +* +* SEE ALSO +* MAD Wrapper object +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_cc_mad_ptr +* DESCRIPTION +* Gets a pointer to the Congestion Control MAD in this MAD wrapper. +* +* SYNOPSIS +*/ +static inline ib_cc_mad_t *osm_madw_get_cc_mad_ptr(IN const osm_madw_t + * p_madw) +{ + return ((ib_cc_mad_t *) p_madw->p_mad); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the Congestion Control MAD. +* +* NOTES +* +* SEE ALSO +* MAD Wrapper object +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_ni_context_ptr +* NAME +* osm_madw_get_ni_context_ptr +* +* DESCRIPTION +* Gets a pointer to the NodeInfo context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_ni_context_t *osm_madw_get_ni_context_ptr(IN const osm_madw_t + * p_madw) +{ + return ((osm_ni_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_pi_context_ptr +* NAME +* osm_madw_get_pi_context_ptr +* +* DESCRIPTION +* Gets a pointer to the PortInfo context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_pi_context_t *osm_madw_get_pi_context_ptr(IN const osm_madw_t + * p_madw) +{ + return ((osm_pi_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_gi_context_ptr +* NAME +* osm_madw_get_gi_context_ptr +* +* DESCRIPTION +* Gets a pointer to the GUIDInfo context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_gi_context_t *osm_madw_get_gi_context_ptr(IN const osm_madw_t + * p_madw) +{ + return ((osm_gi_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_nd_context_ptr +* NAME +* osm_madw_get_nd_context_ptr +* +* DESCRIPTION +* Gets a pointer to the NodeDescription context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_nd_context_t *osm_madw_get_nd_context_ptr(IN const osm_madw_t + * p_madw) +{ + return ((osm_nd_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_lft_context_ptr +* NAME +* osm_madw_get_lft_context_ptr +* +* DESCRIPTION +* Gets a pointer to the LFT context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_lft_context_t *osm_madw_get_lft_context_ptr(IN const + osm_madw_t * + p_madw) +{ + return ((osm_lft_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_mft_context_ptr +* NAME +* osm_madw_get_mft_context_ptr +* +* DESCRIPTION +* Gets a pointer to the MFT context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_mft_context_t *osm_madw_get_mft_context_ptr(IN const + osm_madw_t * + p_madw) +{ + return ((osm_mft_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_si_context_ptr +* NAME +* osm_madw_get_si_context_ptr +* +* DESCRIPTION +* Gets a pointer to the SwitchInfo context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_si_context_t *osm_madw_get_si_context_ptr(IN const osm_madw_t + * p_madw) +{ + return ((osm_si_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_smi_context_ptr +* NAME +* osm_madw_get_smi_context_ptr +* +* DESCRIPTION +* Gets a pointer to the SMInfo context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_smi_context_t *osm_madw_get_smi_context_ptr(IN const + osm_madw_t * + p_madw) +{ + return ((osm_smi_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_pkey_context_ptr +* NAME +* osm_madw_get_pkey_context_ptr +* +* DESCRIPTION +* Gets a pointer to the P_Key context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_pkey_context_t *osm_madw_get_pkey_context_ptr(IN const + osm_madw_t * + p_madw) +{ + return ((osm_pkey_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_slvl_context_ptr +* NAME +* osm_madw_get_slvl_context_ptr +* +* DESCRIPTION +* Gets a pointer to the PortInfo context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_slvl_context_t *osm_madw_get_slvl_context_ptr(IN const + osm_madw_t * + p_madw) +{ + return ((osm_slvl_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_vla_context_ptr +* NAME +* osm_madw_get_vla_context_ptr +* +* DESCRIPTION +* Gets a pointer to the Vl Arb context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_vla_context_t *osm_madw_get_vla_context_ptr(IN const + osm_madw_t * + p_madw) +{ + return ((osm_vla_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ + +#ifndef OSM_VENDOR_INTF_OPENIB +/****f* OpenSM: MAD Wrapper/osm_madw_get_arbitrary_context_ptr +* NAME +* osm_madw_get_arbitrary_context_ptr +* +* DESCRIPTION +* Gets a pointer to the arbitrary context in this MAD. +* +* SYNOPSIS +*/ +static inline osm_arbitrary_context_t *osm_madw_get_arbitrary_context_ptr(IN + const + osm_madw_t + * + const + p_madw) +{ + return ((osm_arbitrary_context_t *) & p_madw->context); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Pointer to the start of the context structure. +* +* NOTES +* +* SEE ALSO +*********/ +#endif + +/****f* OpenSM: MAD Wrapper/osm_madw_get_vend_ptr +* NAME +* osm_madw_get_vend_ptr +* +* DESCRIPTION +* Gets a pointer to the vendor specific MAD wrapper component. +* +* SYNOPSIS +*/ +static inline osm_vend_wrap_t *osm_madw_get_vend_ptr(IN const osm_madw_t * + p_madw) +{ + return ((osm_vend_wrap_t *) & p_madw->vend_wrap); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Gets a pointer to the vendor specific MAD wrapper component. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_bind_handle +* NAME +* osm_madw_get_bind_handle +* +* DESCRIPTION +* Returns the bind handle associated with this MAD. +* +* SYNOPSIS +*/ +static inline osm_bind_handle_t +osm_madw_get_bind_handle(IN const osm_madw_t * p_madw) +{ + return ((osm_bind_handle_t) p_madw->h_bind); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Returns the bind handle associated with this MAD. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_mad_addr_ptr +* NAME +* osm_madw_get_mad_addr_ptr +* +* DESCRIPTION +* Returns the mad address structure associated with this MAD. +* +* SYNOPSIS +*/ +static inline osm_mad_addr_t *osm_madw_get_mad_addr_ptr(IN const osm_madw_t * + p_madw) +{ + return ((osm_mad_addr_t *) & p_madw->mad_addr); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Returns the mad address structure associated with this MAD. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_mad_ptr +* NAME +* osm_madw_get_mad_ptr +* +* DESCRIPTION +* Returns the mad address structure associated with this MAD. +* +* SYNOPSIS +*/ +static inline ib_mad_t *osm_madw_get_mad_ptr(IN const osm_madw_t * p_madw) +{ + return ((ib_mad_t *) p_madw->p_mad); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Returns the mad address structure associated with this MAD. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_get_err_msg +* NAME +* osm_madw_get_err_msg +* +* DESCRIPTION +* Returns the message with which to post this mad wrapper if +* an error occurs during processing the mad. +* +* SYNOPSIS +*/ +static inline cl_disp_msgid_t osm_madw_get_err_msg(IN const osm_madw_t * p_madw) +{ + return ((cl_disp_msgid_t) p_madw->fail_msg); +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* RETURN VALUES +* Returns the message with which to post this mad wrapper if +* an error occurs during processing the mad. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_set_mad +* NAME +* osm_madw_set_mad +* +* DESCRIPTION +* Associates a wire MAD with this MAD Wrapper object. +* +* SYNOPSIS +*/ +static inline void osm_madw_set_mad(IN osm_madw_t * p_madw, + IN const ib_mad_t * p_mad) +{ + p_madw->p_mad = p_mad; +} + +/* +* PARAMETERS +* p_madw +* [in] Pointer to an osm_madw_t object. +* +* p_mad +* [in] Pointer to the wire MAD to attach to this wrapper. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: MAD Wrapper/osm_madw_copy_context +* NAME +* osm_madw_copy_context +* +* DESCRIPTION +* Copies the controller context from one MAD Wrapper to another. +* +* SYNOPSIS +*/ +static inline void osm_madw_copy_context(IN osm_madw_t * p_dest, + IN const osm_madw_t * p_src) +{ + p_dest->context = p_src->context; +} + +/* +* PARAMETERS +* p_dest +* [in] Pointer to the destination osm_madw_t object. +* +* p_src +* [in] Pointer to the source osm_madw_t object. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_MADW_H_ */ diff --git a/include/opensm/osm_mcast_mgr.h b/include/opensm/osm_mcast_mgr.h new file mode 100644 index 0000000..291a478 --- /dev/null +++ b/include/opensm/osm_mcast_mgr.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (C) 2012-2013 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mcast_work_obj_t. + * Provide access to a mcast function which searches the root swicth for + * a spanning tree. + */ + +#ifndef _OSM_MCAST_MGR_H_ +#define _OSM_MCAST_MGR_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef struct osm_mcast_work_obj { + cl_list_item_t list_item; + osm_port_t *p_port; + cl_map_item_t map_item; +} osm_mcast_work_obj_t; + +int osm_mcast_make_port_list_and_map(cl_qlist_t * list, cl_qmap_t * map, + osm_mgrp_box_t * mbox); + +void osm_mcast_drop_port_list(cl_qlist_t * list); + +osm_switch_t * osm_mcast_mgr_find_root_switch(osm_sm_t * sm, cl_qlist_t * list); + +END_C_DECLS +#endif /* _OSM_MCAST_MGR_H_ */ diff --git a/include/opensm/osm_mcast_tbl.h b/include/opensm/osm_mcast_tbl.h new file mode 100644 index 0000000..37e2c26 --- /dev/null +++ b/include/opensm/osm_mcast_tbl.h @@ -0,0 +1,486 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mcast_tbl_t. + * This object represents a multicast forwarding table. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_MCAST_TBL_H_ +#define _OSM_MCAST_TBL_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****s* OpenSM: Forwarding Table/osm_mcast_tbl_t +* NAME +* osm_mcast_tbl_t +* +* DESCRIPTION +* Multicast Forwarding Table structure. +* +* Callers may directly access this object. +* +* SYNOPSIS +*/ +typedef struct osm_mcast_fwdbl { + uint8_t num_ports; + uint8_t max_position; + uint16_t max_block; + int16_t max_block_in_use; + uint16_t num_entries; + uint16_t max_mlid_ho; + uint16_t mft_depth; + uint16_t(*p_mask_tbl)[][IB_MCAST_POSITION_MAX + 1]; +} osm_mcast_tbl_t; +/* +* FIELDS +* num_ports +* The number of ports in the port mask. This value +* is the same as the number of ports on the switch +* +* max_position +* Maximum bit mask position for this table. This value +* is computed from the number of ports on the switch. +* +* max_block +* Maximum block number supported in the table. This value +* is approximately the number of MLID entries divided by the +* number of MLIDs per block +* +* num_entries +* Number of entries in the table (aka number of MLIDs supported). +* +* max_mlid_ho +* Maximum MLID (host order) for the currently allocated multicast +* port mask table. +* +* mft_depth +* Number of MLIDs in the currently allocated multicast port mask +* table. +* +* p_mask_tbl +* Pointer to a two dimensional array of port_masks for this switch. +* The first dimension is MLID offset, second dimension is mask position. +* This pointer is null for switches that do not support multicast. +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_init +* NAME +* osm_mcast_tbl_init +* +* DESCRIPTION +* This function initializes a Multicast Forwarding Table object. +* +* SYNOPSIS +*/ +void osm_mcast_tbl_init(IN osm_mcast_tbl_t * p_tbl, IN uint8_t num_ports, + IN uint16_t capacity); +/* +* PARAMETERS +* num_ports +* [in] Number of ports in the switch owning this table. +* +* capacity +* [in] The number of MLID entries (starting at 0xC000) supported +* by this switch. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_delete +* NAME +* osm_mcast_tbl_delete +* +* DESCRIPTION +* This destroys and deallocates a Multicast Forwarding Table object. +* +* SYNOPSIS +*/ +void osm_mcast_tbl_delete(IN osm_mcast_tbl_t ** pp_tbl); +/* +* PARAMETERS +* pp_tbl +* [in] Pointer a Pointer to the Multicast Forwarding Table object. +* +* RETURN VALUE +* On success, returns a pointer to a new Multicast Forwarding Table object +* of the specified size. +* NULL otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_realloc +* NAME +* osm_mcast_tbl_realloc +* +* DESCRIPTION +* This function reallocates the multicast port mask table if necessary. +* +* SYNOPSIS +*/ +int osm_mcast_tbl_realloc(IN osm_mcast_tbl_t * p_tbl, IN unsigned mlid_offset); +/* +* PARAMETERS +* +* p_tbl +* [in] Pointer to the Multicast Forwarding Table object. +* +* mlid_offset +* [in] Offset of MLID being accessed. +* +* RETURN VALUE +* Returns 0 on success and non-zero value otherwise. +* +* NOTES +* +* SEE ALSO +*/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_destroy +* NAME +* osm_mcast_tbl_destroy +* +* DESCRIPTION +* This destroys and deallocates a Multicast Forwarding Table object. +* +* SYNOPSIS +*/ +void osm_mcast_tbl_destroy(IN osm_mcast_tbl_t * p_tbl); +/* +* PARAMETERS +* p_tbl +* [in] Pointer to the Multicast Forwarding Table object. +* +* RETURN VALUE +* None +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_set +* NAME +* osm_mcast_tbl_set +* +* DESCRIPTION +* Adds the port to the multicast group. +* +* SYNOPSIS +*/ +void osm_mcast_tbl_set(IN osm_mcast_tbl_t * p_tbl, IN uint16_t mlid_ho, + IN uint8_t port_num); +/* +* PARAMETERS +* p_tbl +* [in] Pointer to the Multicast Forwarding Table object. +* +* mlid_ho +* [in] MLID value (host order) for which to set the route. +* +* port_num +* [in] Port to add to the multicast group. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_clear_mlid +* NAME +* osm_mcast_tbl_clear_mlid +* +* DESCRIPTION +* Removes all multicast paths for the specified MLID. +* +* SYNOPSIS +*/ +void osm_mcast_tbl_clear_mlid(IN osm_mcast_tbl_t * p_tbl, IN uint16_t mlid_ho); +/* +* PARAMETERS +* p_tbl +* [in] Pointer to the Multicast Forwarding Table object. +* +* mlid_ho +* [in] MLID value (host order) for which to clear. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_is_port +* NAME +* osm_mcast_tbl_is_port +* +* DESCRIPTION +* Returns TRUE if the port is in the multicast group. +* +* SYNOPSIS +*/ +boolean_t osm_mcast_tbl_is_port(IN const osm_mcast_tbl_t * p_tbl, + IN uint16_t mlid_ho, IN uint8_t port_num); +/* +* PARAMETERS +* p_tbl +* [in] Pointer to the Multicast Forwarding Table object. +* +* mlid_ho +* [in] MLID value (host order). +* +* port_num +* [in] Port number on the switch +* +* RETURN VALUE +* Returns the port that routes the specified LID. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_is_any_port +* NAME +* osm_mcast_tbl_is_any_port +* +* DESCRIPTION +* Returns TRUE if any port is in the multicast group. +* +* SYNOPSIS +*/ +boolean_t osm_mcast_tbl_is_any_port(IN const osm_mcast_tbl_t * p_tbl, + IN uint16_t mlid_ho); +/* +* PARAMETERS +* p_tbl +* [in] Pointer to the Multicast Forwarding Table object. +* +* mlid_ho +* [in] MLID value (host order). +* +* RETURN VALUE +* Returns TRUE if any port is in the multicast group. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_set_block +* NAME +* osm_mcast_tbl_set_block +* +* DESCRIPTION +* Copies the specified block into the Multicast Forwarding Table. +* +* SYNOPSIS +*/ +ib_api_status_t osm_mcast_tbl_set_block(IN osm_mcast_tbl_t * p_tbl, + IN const ib_net16_t * p_block, + IN int16_t block_num, + IN uint8_t position); +/* +* PARAMETERS +* p_tbl +* [in] Pointer to the Multicast Forwarding Table object. +* +* p_block +* [in] Pointer to the Forwarding Table block. +* +* block_num +* [in] Block number of this block. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_get_tbl_block +* NAME +* osm_mcast_get_tbl_block +* +* DESCRIPTION +* Retrieve a multicast forwarding table block. +* +* SYNOPSIS +*/ +boolean_t osm_mcast_tbl_get_block(IN osm_mcast_tbl_t * p_tbl, + IN int16_t block_num, IN uint8_t position, + OUT ib_net16_t * p_block); +/* +* PARAMETERS +* p_tbl +* [in] Pointer to an osm_mcast_tbl_t object. +* +* p_block +* [in] Pointer to the Forwarding Table block. +* +* block_num +* [in] Block number of this block. +* +* p_block +* [out] Pointer to the 32 entry array to store the +* forwarding table clock specified by block_id. +* +* RETURN VALUES +* Returns true if there are more blocks necessary to +* configure all the MLIDs reachable from this switch. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_get_max_block +* NAME +* osm_mcast_tbl_get_max_block +* +* DESCRIPTION +* Returns the maximum block ID in this table. +* +* SYNOPSIS +*/ +static inline uint16_t osm_mcast_tbl_get_max_block(IN osm_mcast_tbl_t * p_tbl) +{ + return p_tbl->max_block; +} + +/* +* PARAMETERS +* p_tbl +* [in] Pointer to an osm_mcast_tbl_t object. +* +* RETURN VALUES +* Returns the maximum block ID in this table. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_get_max_block_in_use +* NAME +* osm_mcast_tbl_get_max_block_in_use +* +* DESCRIPTION +* Returns the maximum block ID in use in this table. +* A value of -1 indicates no blocks are in use. +* +* SYNOPSIS +*/ +static inline int16_t +osm_mcast_tbl_get_max_block_in_use(IN osm_mcast_tbl_t * p_tbl) +{ + return (p_tbl->max_block_in_use); +} + +/* +* PARAMETERS +* p_tbl +* [in] Pointer to an osm_mcast_tbl_t object. +* +* RETURN VALUES +* Returns the maximum block ID in use in this table. +* A value of -1 indicates no blocks are in use. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Forwarding Table/osm_mcast_tbl_get_max_position +* NAME +* osm_mcast_tbl_get_max_position +* +* DESCRIPTION +* Returns the maximum position in this table. +* +* SYNOPSIS +*/ +static inline uint8_t +osm_mcast_tbl_get_max_position(IN osm_mcast_tbl_t * p_tbl) +{ + return (p_tbl->max_position); +} + +/* +* PARAMETERS +* p_tbl +* [in] Pointer to an osm_mcast_tbl_t object. +* +* RETURN VALUES +* Returns the maximum position in this table. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_MCAST_TBL_H_ */ diff --git a/include/opensm/osm_mcm_port.h b/include/opensm/osm_mcm_port.h new file mode 100644 index 0000000..ae0f44f --- /dev/null +++ b/include/opensm/osm_mcm_port.h @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mcm_port_t. + * This object represents the membership of a port in a multicast group. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_MCM_PORT_H_ +#define _OSM_MCM_PORT_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +struct osm_mgrp; + +/****s* OpenSM: MCM Port Object/osm_mcm_port_t +* NAME +* osm_mcm_port_t +* +* DESCRIPTION +* This object represents a particular port as a member of a +* multicast group. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_mcm_port { + cl_map_item_t map_item; + cl_list_item_t list_item; + osm_port_t *port; + struct osm_mgrp *mgrp; +} osm_mcm_port_t; +/* +* FIELDS +* map_item +* Map Item for qmap linkage. Must be first element!! +* +* list_item +* Linkage structure for cl_qlist. +* +* port +* Reference to the parent port +* +* mgrp +* The pointer to multicast group where this port is member of +* +* SEE ALSO +* MCM Port Object +*********/ + +/****f* OpenSM: MCM Port Object/osm_mcm_port_new +* NAME +* osm_mcm_port_new +* +* DESCRIPTION +* The osm_mcm_port_new function allocates and initializes a +* MCM Port Object for use. +* +* SYNOPSIS +*/ +osm_mcm_port_t *osm_mcm_port_new(IN osm_port_t * port, IN struct osm_mgrp *mgrp); +/* +* PARAMETERS +* port +* [in] Pointer to the port object +* +* mgrp +* [in] Pointer to multicast group where this port is joined +* +* RETURN VALUES +* Pointer to the allocated and initialized MCM Port object. +* +* NOTES +* +* SEE ALSO +* MCM Port Object, osm_mcm_port_delete, +*********/ + +/****f* OpenSM: MCM Port Object/osm_mcm_port_delete +* NAME +* osm_mcm_port_delete +* +* DESCRIPTION +* The osm_mcm_port_delete function destroys and dellallocates an +* MCM Port Object, releasing all resources. +* +* SYNOPSIS +*/ +void osm_mcm_port_delete(IN osm_mcm_port_t * p_mcm); +/* +* PARAMETERS +* p_mcm +* [in] Pointer to a MCM Port Object to delete. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* MCM Port Object, osm_mcm_port_new +*********/ + +/****s* OpenSM: MCM Port Object/osm_mcm_alias_guid_t +* NAME +* osm_mcm_alias_guid_t +* +* DESCRIPTION +* This object represents an alias guid for a mcm port. +* +* The osm_mcm_alias_guid_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_mcm_alias_guid { + cl_map_item_t map_item; + ib_net64_t alias_guid; + osm_mcm_port_t *p_base_mcm_port; + ib_gid_t port_gid; + uint8_t scope_state; + boolean_t proxy_join; +} osm_mcm_alias_guid_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* alias_guid +* Alias GUID for port obtained from SM GUIDInfo attribute +* +* p_base_mcm_port +* Pointer to osm_mcm_port_t for base port GUID +* +* port_gid +* GID of the member port +* +* scope_state +* +* proxy_join +* If FALSE - Join was performed by the endport identified +* by PortGID. If TRUE - Join was performed on behalf of +* the endport identified by PortGID by another port within +* the same partition. +* +* SEE ALSO +* MCM Port, Physical Port, Physical Port Table +*/ + +/****f* OpenSM: MCM Port Object/osm_mcm_alias_guid_new +* NAME +* osm_mcm_alias_guid_new +* +* DESCRIPTION +* This function allocates and initializes an mcm alias guid object. +* +* SYNOPSIS +*/ +osm_mcm_alias_guid_t *osm_mcm_alias_guid_new(IN osm_mcm_port_t *p_base_mcm_port, + IN ib_member_rec_t *mcmr, + IN boolean_t proxy); +/* +* PARAMETERS +* p_base_mcm_port +* [in] Pointer to the mcm port for this base GUID +* +* mcmr +* [in] Pointer to MCMember record of the join request +* +* proxy +* [in] proxy_join state analyzed from the request +* +* RETURN VALUE +* Pointer to the initialized mcm alias guid object. +* +* NOTES +* Allows calling other mcm alias guid methods. +* +* SEE ALSO +* MCM Port Object +*********/ + +/****f* OpenSM: MCM Port Object/osm_mcm_alias_guid_delete +* NAME +* osm_mcm_alias_guid_delete +* +* DESCRIPTION +* This function destroys and deallocates an mcm alias guid object. +* +* SYNOPSIS +*/ +void osm_mcm_alias_guid_delete(IN OUT osm_mcm_alias_guid_t ** pp_mcm_alias_guid); +/* +* PARAMETERS +* pp_mcm_alias_guid +* [in][out] Pointer to a pointer to an mcm alias guid object to +* delete. On return, this pointer is NULL. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified mcm alias guid object. +* +* SEE ALSO +* MCM Port Object +*********/ + +END_C_DECLS +#endif /* _OSM_MCM_PORT_H_ */ diff --git a/include/opensm/osm_mesh.h b/include/opensm/osm_mesh.h new file mode 100644 index 0000000..b9e85bb --- /dev/null +++ b/include/opensm/osm_mesh.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2008,2009 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declarations for mesh analysis + */ + +#ifndef OSM_MESH_H +#define OSM_MESH_H + +struct _lash; +struct _switch; + +/* + * per switch to switch link info + */ +typedef struct _link { + int switch_id; + int link_id; + int next_port; + int num_ports; + int ports[0]; +} link_t; + +/* + * per switch node mesh info + */ +typedef struct _mesh_node { + int *axes; /* used to hold and reorder assigned axes */ + int *coord; /* mesh coordinates of switch */ + int **matrix; /* distances between adjacant switches */ + int *poly; /* characteristic polynomial of matrix */ + /* used as an invariant classification */ + int dimension; /* apparent dimension of mesh around node */ + int temp; /* temporary holder for distance info */ + int type; /* index of node type in mesh_info array */ + unsigned int num_links; /* number of 'links' to adjacent switches */ + link_t *links[0]; /* per link information */ +} mesh_node_t; + +void osm_mesh_node_delete(struct _lash *p_lash, struct _switch *sw); +int osm_mesh_node_create(struct _lash *p_lash, struct _switch *sw); +int osm_do_mesh_analysis(struct _lash *p_lash); + +#endif diff --git a/include/opensm/osm_msgdef.h b/include/opensm/osm_msgdef.h new file mode 100644 index 0000000..b0d92e0 --- /dev/null +++ b/include/opensm/osm_msgdef.h @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of Dispatcher message values. + */ + +#ifndef _OSM_MSGDEF_H_ +#define _OSM_MSGDEF_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Dispatcher Messages +* NAME +* Dispatcher Messages +* +* DESCRIPTION +* These constants define the messages sent between OpenSM controllers +* attached to the Dispatcher. +* +* Each message description contains the following information: +* Sent by: which controller(s) send this message +* Received by: which controller receives this message +* Delivery notice: Indicates if the sender requires confirmation +* that the message has been delivered. Typically a "yes" here +* means that some resources associated with sending the +* message must be freed. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Dispatcher Messages/OSM_MSG_MAD_NODE_INFO +* NAME +* OSM_MSG_MAD_NODE_INFO +* +* DESCRIPTION +* Message for received NodeInfo MADs. +* +* NOTES +* Sent by: osm_mad_ctrl_t +* Received by: osm_ni_rcv_ctrl_t +* Delivery notice: yes +* +* +***********/ +/****s* OpenSM: Dispatcher Messages/OSM_MSG_MAD_PORT_INFO +* NAME +* OSM_MSG_MAD_PORT_INFO +* +* DESCRIPTION +* Message for received PortInfo MADs. +* +* NOTES +* Sent by: osm_mad_ctrl_t +* Received by: osm_pi_rcv_ctrl_t +* Delivery notice: yes +* +* +***********/ +/****s* OpenSM: Dispatcher Messages/OSM_MSG_MAD_SWITCH_INFO +* NAME +* OSM_MSG_MAD_SWITCH_INFO +* +* DESCRIPTION +* Message for received SwitchInfo MADs. +* +* NOTES +* Sent by: osm_mad_ctrl_t +* Received by: osm_si_rcv_ctrl_t +* Delivery notice: yes +* +***********/ +/****s* OpenSM: Dispatcher Messages/OSM_MSG_MAD_NODE_DESC +* NAME +* OSM_MSG_MAD_NODE_DESC +* +* DESCRIPTION +* Message for received NodeDescription MADs. +* +* NOTES +* Sent by: osm_mad_ctrl_t +* Received by: osm_nd_rcv_ctrl_t +* Delivery notice: yes +* +* SOURCE +***********/ +enum { + OSM_MSG_NONE = 0, + OSM_MSG_MAD_NODE_INFO, + OSM_MSG_MAD_PORT_INFO, + OSM_MSG_MAD_SWITCH_INFO, + OSM_MSG_MAD_GUID_INFO, + OSM_MSG_MAD_NODE_DESC, + OSM_MSG_MAD_NODE_RECORD, + OSM_MSG_MAD_PORTINFO_RECORD, + OSM_MSG_MAD_SERVICE_RECORD, + OSM_MSG_MAD_PATH_RECORD, + OSM_MSG_MAD_MCMEMBER_RECORD, + OSM_MSG_MAD_LINK_RECORD, + OSM_MSG_MAD_SMINFO_RECORD, + OSM_MSG_MAD_CLASS_PORT_INFO, + OSM_MSG_MAD_INFORM_INFO, + OSM_MSG_MAD_LFT_RECORD, + OSM_MSG_MAD_LFT, + OSM_MSG_MAD_SM_INFO, + OSM_MSG_MAD_NOTICE, + OSM_MSG_LIGHT_SWEEP_FAIL, + OSM_MSG_MAD_MFT, + OSM_MSG_MAD_PKEY_TBL_RECORD, + OSM_MSG_MAD_VL_ARB_RECORD, + OSM_MSG_MAD_SLVL_TBL_RECORD, + OSM_MSG_MAD_PKEY, + OSM_MSG_MAD_VL_ARB, + OSM_MSG_MAD_SLVL, + OSM_MSG_MAD_GUIDINFO_RECORD, + OSM_MSG_MAD_INFORM_INFO_RECORD, + OSM_MSG_MAD_SWITCH_INFO_RECORD, + OSM_MSG_MAD_MFT_RECORD, +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + OSM_MSG_MAD_MULTIPATH_RECORD, +#endif + OSM_MSG_MAD_PORT_COUNTERS, + OSM_MSG_MAD_MLNX_EXT_PORT_INFO, + OSM_MSG_MAD_CC, + OSM_MSG_MAX +}; + +END_C_DECLS +#endif /* _OSM_MSGDEF_H_ */ diff --git a/include/opensm/osm_mtree.h b/include/opensm/osm_mtree.h new file mode 100644 index 0000000..ad47af6 --- /dev/null +++ b/include/opensm/osm_mtree.h @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mtree_t. + * This object represents multicast spanning tree. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_MTREE_H_ +#define _OSM_MTREE_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#define OSM_MTREE_LEAF ((void*)-1) +/****h* OpenSM/Multicast Tree +* NAME +* Multicast Tree +* +* DESCRIPTION +* The Multicast Tree object encapsulates the information needed by the +* OpenSM to manage multicast fabric routes. It is a tree structure +* in which each node in the tree represents a switch, and may have a +* varying number of children. +* +* Multicast trees do not contain loops. +* +* The Multicast Tree is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Multicast Tree/osm_mtree_node_t +* NAME +* osm_mtree_node_t +* +* DESCRIPTION +* The MTree Node object encapsulates the information needed by the +* OpenSM for a particular switch in the multicast tree. +* +* The MTree Node object is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_mtree_node { + cl_map_item_t map_item; + const osm_switch_t *p_sw; + uint8_t max_children; + struct osm_mtree_node *p_up; + struct osm_mtree_node *child_array[1]; +} osm_mtree_node_t; +/* +* FIELDS +* map_item +* Linkage for quick map. MUST BE FIRST ELEMENT!!! +* +* p_sw +* Pointer to the switch represented by this tree node. +* +* max_children +* Maximum number of child nodes of this node. Equal to the +* the number of ports on the switch if the switch supports +* multicast. Equal to 1 (default route) if the switch does +* not support multicast. +* +* p_up +* Pointer to the parent of this node. If this pointer is +* NULL, the node is at the root of the tree. +* +* child_array +* Array (indexed by port number) of pointers to the +* child osm_mtree_node_t objects of this tree node, if any. +* MUST BE LAST ELEMENT!!! +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Tree/osm_mtree_node_new +* NAME +* osm_mtree_node_new +* +* DESCRIPTION +* Returns an initialized Multicast Tree object for use. +* +* SYNOPSIS +*/ +osm_mtree_node_t *osm_mtree_node_new(IN const osm_switch_t * p_sw); +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch represented by this node. +* +* RETURN VALUES +* Pointer to an initialized tree node. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Tree/osm_mtree_destroy +* NAME +* osm_mtree_destroy +* +* DESCRIPTION +* Destroys a Multicast Tree object given by the p_mtn +* +* SYNOPSIS +*/ +void osm_mtree_destroy(IN osm_mtree_node_t * p_mtn); +/* +* PARAMETERS +* p_mtn +* [in] Pointer to an osm_mtree_node_t object to destroy. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Tree/osm_mtree_node_get_max_children +* NAME +* osm_mtree_node_get_max_children +* +* DESCRIPTION +* Returns the number maximum number of children of this node. +* The return value is 1 greater than the highest valid port +* number on the switch. +* +* +* SYNOPSIS +*/ +static inline uint8_t +osm_mtree_node_get_max_children(IN const osm_mtree_node_t * p_mtn) +{ + return (p_mtn->max_children); +} +/* +* PARAMETERS +* p_mtn +* [in] Pointer to the multicast tree node. +* +* RETURN VALUES +* See description. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Tree/osm_mtree_node_get_child +* NAME +* osm_mtree_node_get_child +* +* DESCRIPTION +* Returns the specified child node of this node. +* +* SYNOPSIS +*/ +static inline osm_mtree_node_t *osm_mtree_node_get_child(IN const + osm_mtree_node_t * + p_mtn, + IN uint8_t child) +{ + CL_ASSERT(child < p_mtn->max_children); + return (p_mtn->child_array[child]); +} +/* +* PARAMETERS +* p_mtn +* [in] Pointer to the multicast tree node. +* +* child +* [in] Index of the child to retrieve. +* +* RETURN VALUES +* Returns the specified child node of this node. +* +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Tree/osm_mtree_node_get_switch_ptr +* NAME +* osm_mtree_node_get_switch_ptr +* +* DESCRIPTION +* Returns a pointer to the switch object represented by this tree node. +* +* SYNOPSIS +*/ +static inline const osm_switch_t *osm_mtree_node_get_switch_ptr(IN const + osm_mtree_node_t * + p_mtn) +{ + return p_mtn->p_sw; +} +/* +* PARAMETERS +* p_mtn +* [in] Pointer to the multicast tree node. +* +* RETURN VALUES +* Returns a pointer to the switch object represented by this tree node. +* +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_MTREE_H_ */ diff --git a/include/opensm/osm_multicast.h b/include/opensm/osm_multicast.h new file mode 100644 index 0000000..380f306 --- /dev/null +++ b/include/opensm/osm_multicast.h @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mgrp_t. + * This object represents an IBA Multicast Group. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_MULTICAST_H_ +#define _OSM_MULTICAST_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Multicast Group +* NAME +* Multicast Group +* +* DESCRIPTION +* The Multicast Group encapsulates the information needed by the +* OpenSM to manage Multicast Groups. The OpenSM allocates one +* Multicast Group object per Multicast Group in the IBA subnet. +* +* The Multicast Group is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ + +/****s* OpenSM: Multicast Group/osm_mgrp_t +* NAME +* osm_mgrp_t +* +* DESCRIPTION +* Multicast Group structure. +* +* The osm_mgrp_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_mgrp { + cl_fmap_item_t map_item; + cl_list_item_t list_item; + ib_net16_t mlid; + cl_qmap_t mcm_port_tbl; + cl_qmap_t mcm_alias_port_tbl; + ib_member_rec_t mcmember_rec; + boolean_t well_known; + unsigned full_members; +} osm_mgrp_t; +/* +* FIELDS +* map_item +* Map Item for fmap linkage. Must be first element!! +* +* list_item +* List item for linkage in osm_mgrp_box's mgrp_list qlist. +* +* mlid +* The network ordered LID of this Multicast Group (must be +* >= 0xC000). +* +* mcm_port_tbl +* Table (sorted by port GUID) of osm_mcm_port_t objects +* representing the member ports of this multicast group. +* +* mcm_alias_port_tbl +* Table (sorted by port alias GUID) of osm_mcm_port_t +* objects representing the member ports of this multicast +* group. +* +* mcmember_rec +* Holds the parameters of the Multicast Group. +* +* well_known +* Indicates that this is the wellknown multicast group which +* is created during the initialization of SM/SA and will be +* present even if there are no ports for this group +* +* full_members +* Number of full members in the Multicast Group. +* +* SEE ALSO +*********/ + +/****s* OpenSM: Multicast Group/osm_mgrp_box_t +* NAME +* osm_mgrp_box_t +* +* DESCRIPTION +* Multicast structure which holds all multicast groups with same MLID. +* +* SYNOPSIS +*/ +typedef struct osm_mgrp_box { + uint16_t mlid; + cl_qlist_t mgrp_list; + osm_mtree_node_t *root; +} osm_mgrp_box_t; +/* +* FIELDS +* mlid +* The host ordered LID of this Multicast Group (must be +* >= 0xC000). +* +* mgrp_list +* List of multicast groups (mpgr object) having same MLID value. +* +* root +* Pointer to the root "tree node" in the single spanning tree +* for this multicast group. The nodes of the tree represent +* switches. Member ports are not represented in the tree. +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_new +* NAME +* osm_mgrp_new +* +* DESCRIPTION +* Allocates and initializes a Multicast Group for use. +* +* SYNOPSIS +*/ +osm_mgrp_t *osm_mgrp_new(IN osm_subn_t * subn, IN ib_net16_t mlid, + IN ib_member_rec_t * mcmr); +/* +* PARAMETERS +* subn +* [in] Pointer to osm_subn_t object. +* +* mlid +* [in] Multicast LID for this multicast group. +* +* mcmr +* [in] MCMember Record for this multicast group. +* +* RETURN VALUES +* IB_SUCCESS if initialization was successful. +* +* NOTES +* Allows calling other Multicast Group methods. +* +* SEE ALSO +* Multicast Group, osm_mgrp_delete +*********/ + +/* + * Need a forward declaration to work around include loop: + * osm_sm.h <- osm_multicast.h + */ +struct osm_sm; + +/****f* OpenSM: Multicast Tree/osm_purge_mtree +* NAME +* osm_purge_mtree +* +* DESCRIPTION +* Frees all the nodes in a multicast spanning tree +* +* SYNOPSIS +*/ +void osm_purge_mtree(IN struct osm_sm * sm, IN osm_mgrp_box_t * mgb); +/* +* PARAMETERS +* sm +* [in] Pointer to osm_sm_t object. +* mgb +* [in] Pointer to an osm_mgrp_box_t object. +* +* RETURN VALUES +* None. +* +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_is_guid +* NAME +* osm_mgrp_is_guid +* +* DESCRIPTION +* Indicates if the specified port GUID is a member of the Multicast Group. +* +* SYNOPSIS +*/ +static inline boolean_t osm_mgrp_is_guid(IN const osm_mgrp_t * p_mgrp, + IN ib_net64_t port_guid) +{ + return (cl_qmap_get(&p_mgrp->mcm_port_tbl, port_guid) != + cl_qmap_end(&p_mgrp->mcm_port_tbl)); +} + +/* +* PARAMETERS +* p_mgrp +* [in] Pointer to an osm_mgrp_t object. +* +* port_guid +* [in] Port GUID. +* +* RETURN VALUES +* TRUE if the port GUID is a member of the group, +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Multicast Group +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_is_empty +* NAME +* osm_mgrp_is_empty +* +* DESCRIPTION +* Indicates if the multicast group has any member ports. +* +* SYNOPSIS +*/ +static inline boolean_t osm_mgrp_is_empty(IN const osm_mgrp_t * p_mgrp) +{ + return (cl_qmap_count(&p_mgrp->mcm_port_tbl) == 0); +} + +/* +* PARAMETERS +* p_mgrp +* [in] Pointer to an osm_mgrp_t object. +* +* RETURN VALUES +* TRUE if there are no ports in the multicast group. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Multicast Group +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_get_mlid +* NAME +* osm_mgrp_get_mlid +* +* DESCRIPTION +* The osm_mgrp_get_mlid function returns the multicast LID of this group. +* +* SYNOPSIS +*/ +static inline ib_net16_t osm_mgrp_get_mlid(IN const osm_mgrp_t * p_mgrp) +{ + return p_mgrp->mlid; +} + +/* +* PARAMETERS +* p_mgrp +* [in] Pointer to an osm_mgrp_t object. +* +* RETURN VALUES +* MLID of the Multicast Group. +* +* NOTES +* +* SEE ALSO +* Multicast Group +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_add_port +* NAME +* osm_mgrp_add_port +* +* DESCRIPTION +* Adds a port to the multicast group. +* +* SYNOPSIS +*/ +osm_mcm_port_t *osm_mgrp_add_port(osm_subn_t *subn, osm_log_t *log, + IN osm_mgrp_t * mgrp, IN osm_port_t *port, + IN ib_member_rec_t *mcmr, IN boolean_t proxy); +/* +* PARAMETERS +* subn +* [in] Pointer to the subnet object +* +* log +* [in] The log object pointer +* +* mgrp +* [in] Pointer to an osm_mgrp_t object to initialize. +* +* port +* [in] Pointer to an osm_port_t object +* +* mcmr +* [in] Pointer to MCMember record received for the join +* +* proxy +* [in] The proxy join state for this port in the group. +* +* RETURN VALUES +* IB_SUCCESS +* IB_INSUFFICIENT_MEMORY +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_get_mcm_port +* NAME +* osm_mgrp_get_mcm_port +* +* DESCRIPTION +* Finds a port in the multicast group. +* +* SYNOPSIS +*/ +osm_mcm_port_t *osm_mgrp_get_mcm_port(IN const osm_mgrp_t * p_mgrp, + IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_mgrp +* [in] Pointer to an osm_mgrp_t object. +* +* port_guid +* [in] Port guid. +* +* RETURN VALUES +* Pointer to the mcm port object when present or NULL otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_get_mcm_alias_guid +* NAME +* osm_mgrp_get_mcm_alias_guid +* +* DESCRIPTION +* Finds an mcm alias GUID in the multicast group based on an alias GUID. +* +* SYNOPSIS +*/ +osm_mcm_alias_guid_t *osm_mgrp_get_mcm_alias_guid(IN const osm_mgrp_t * p_mgrp, + IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_mgrp +* [in] Pointer to an osm_mgrp_t object. +* +* port_guid +* [in] Alias port guid. +* +* RETURN VALUES +* Pointer to the mcm alias GUID object when present or NULL otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Multicast Group/osm_mgrp_delete_port +* NAME +* osm_mgrp_delete_port +* +* DESCRIPTION +* Removes a port from the multicast group. +* +* SYNOPSIS +*/ +void osm_mgrp_delete_port(IN osm_subn_t * subn, IN osm_log_t * log, + IN osm_mgrp_t * mgrp, IN osm_port_t * port); +/* +* PARAMETERS +* subn +* [in] Pointer to the subnet object +* +* log +* [in] The log object pointer +* +* mgrp +* [in] Pointer to an osm_mgrp_t object. +* +* port +* [in] Pointer to an osm_port_t object for the the departing port. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +boolean_t osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp, + osm_mcm_alias_guid_t * mcm_alias_guid, + ib_member_rec_t * mcmr); +void osm_mgrp_cleanup(osm_subn_t * subn, osm_mgrp_t * mpgr); +void osm_mgrp_box_delete(osm_mgrp_box_t *mbox); + +END_C_DECLS +#endif /* _OSM_MULTICAST_H_ */ diff --git a/include/opensm/osm_node.h b/include/opensm/osm_node.h new file mode 100644 index 0000000..b6e5e00 --- /dev/null +++ b/include/opensm/osm_node.h @@ -0,0 +1,684 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_node_t. + * This object represents an IBA node. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_NODE_H_ +#define _OSM_NODE_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +struct osm_switch; + +/****h* OpenSM/Node +* NAME +* Node +* +* DESCRIPTION +* The Node object encapsulates the information needed by the +* OpenSM to manage nodes. The OpenSM allocates one Node object +* per node in the IBA subnet. +* +* The Node object is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ + +/****s* OpenSM: Node/osm_node_t +* NAME +* osm_node_t +* +* DESCRIPTION +* Node structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_node { + cl_map_item_t map_item; + struct osm_switch *sw; + ib_node_info_t node_info; + ib_node_desc_t node_desc; + uint32_t discovery_count; + uint32_t physp_tbl_size; + char *print_desc; + uint8_t *physp_discovered; + osm_physp_t physp_table[1]; +} osm_node_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* sw +* For switch node contains pointer to appropriate osm_switch +* structure. NULL for non-switch nodes. Can be used for fast +* access to switch object and for simple node type detection +* +* node_info +* The IBA defined NodeInfo data for this node. +* +* node_desc +* The IBA defined NodeDescription data for this node. +* +* discovery_count +* The number of times this node has been discovered +* during the current fabric sweep. This number is reset +* to zero at the start of a sweep. +* +* physp_tbl_size +* The size of the physp_table array. This value is one greater +* than the number of ports in the node, since port numbers +* start with 1 for some bizarre reason. +* +* print_desc +* A printable version of the node description. +* +* physp_discovered +* Array of physp_discovered objects for all ports of this node. +* Each object indiactes whether the port has been discovered +* during the sweep or not. 1 means that the port had been discovered. +* +* physp_table +* Array of physical port objects belonging to this node. +* Index is contiguous by local port number. +* For switches, port 0 is the always the management port (14.2.5.6). +* MUST BE LAST MEMBER! - Since it grows !!!! +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_delete +* NAME +* osm_node_delete +* +* DESCRIPTION +* The osm_node_delete function destroys a node, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_node_delete(IN OUT osm_node_t ** p_node); +/* +* PARAMETERS +* p_node +* [in][out] Pointer to a Pointer a Node object to destroy. +* On return, the pointer to set to NULL. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified Node object. +* This function should only be called after a call to osm_node_new. +* +* SEE ALSO +* Node object, osm_node_new +*********/ + +/****f* OpenSM: Node/osm_node_new +* NAME +* osm_node_new +* +* DESCRIPTION +* The osm_node_new function initializes a Node object for use. +* +* SYNOPSIS +*/ +osm_node_t *osm_node_new(IN const osm_madw_t * p_madw); +/* +* PARAMETERS +* p_madw +* [in] Pointer to a osm_madw_t object containing a mad with +* the node's NodeInfo attribute. The caller may discard the +* osm_madw_t structure after calling osm_node_new. +* +* RETURN VALUES +* On success, a pointer to the new initialized osm_node_t structure. +* NULL otherwise. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_get_physp_ptr +* NAME +* osm_node_get_physp_ptr +* +* DESCRIPTION +* Returns a pointer to the physical port object at the +* specified local port number. +* +* SYNOPSIS +*/ +static inline osm_physp_t *osm_node_get_physp_ptr(IN osm_node_t * p_node, + IN uint32_t port_num) +{ + + CL_ASSERT(port_num < p_node->physp_tbl_size); + return osm_physp_is_valid(&p_node->physp_table[port_num]) ? + &p_node->physp_table[port_num] : NULL; +} + +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Local port number. +* +* RETURN VALUES +* Returns a pointer to the physical port object at the +* specified local port number. +* A return value of NULL means the port number was out of range. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_get_type +* NAME +* osm_node_get_type +* +* DESCRIPTION +* Returns the type of this node. +* +* SYNOPSIS +*/ +static inline uint8_t osm_node_get_type(IN const osm_node_t * p_node) +{ + return p_node->node_info.node_type; +} + +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* RETURN VALUES +* Returns the IBA defined type of this node. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_get_num_physp +* NAME +* osm_node_get_num_physp +* +* DESCRIPTION +* Returns the number of osm_physp ports allocated for this node. +* For switches, it is the number of external physical ports plus +* port 0. For CAs and routers, it is the number of external physical +* ports plus 1. +* +* SYNOPSIS +*/ +static inline uint8_t osm_node_get_num_physp(IN const osm_node_t * p_node) +{ + return (uint8_t) p_node->physp_tbl_size; +} + +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* RETURN VALUES +* Returns the IBA defined type of this node. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_get_remote_node +* NAME +* osm_node_get_remote_node +* +* DESCRIPTION +* Returns a pointer to the node on the other end of the +* specified port. +* Returns NULL if no remote node exists. +* +* SYNOPSIS +*/ +osm_node_t *osm_node_get_remote_node(IN osm_node_t * p_node, + IN uint8_t port_num, + OUT uint8_t * p_remote_port_num); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Port number in p_node through which to get the remote node. +* +* p_remote_port_num +* [out] Port number in the remote's node through which this +* link exists. The caller may specify NULL for this pointer +* if the port number isn't needed. +* +* RETURN VALUES +* Returns a pointer to the node on the other end of the +* specified port. +* Returns NULL if no remote node exists. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_get_base_lid +* NAME +* osm_node_get_base_lid +* +* DESCRIPTION +* Returns the LID value of the specified port on this node. +* +* SYNOPSIS +*/ +static inline ib_net16_t osm_node_get_base_lid(IN const osm_node_t * p_node, + IN uint32_t port_num) +{ + CL_ASSERT(port_num < p_node->physp_tbl_size); + return osm_physp_get_base_lid(&p_node->physp_table[port_num]); +} + +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Local port number. +* +* RETURN VALUES +* Returns a pointer to the physical port object at the +* specified local port number. +* A return value of zero means the port number was out of range. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_get_remote_base_lid +* NAME +* osm_node_get_remote_base_lid +* +* DESCRIPTION +* Returns the base LID value of the port on the other side +* of the wire from the specified port on this node. +* +* SYNOPSIS +*/ +ib_net16_t osm_node_get_remote_base_lid(IN osm_node_t * p_node, + IN uint32_t port_num); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Local port number. +* +* RETURN VALUES +* Returns a pointer to the physical port object at the +* specified local port number. +* A return value of zero means the port number was out of range. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_get_lmc +* NAME +* osm_node_get_lmc +* +* DESCRIPTION +* Returns the LMC value of the specified port on this node. +* +* SYNOPSIS +*/ +static inline uint8_t osm_node_get_lmc(IN const osm_node_t * p_node, + IN uint32_t port_num) +{ + CL_ASSERT(port_num < p_node->physp_tbl_size); + return osm_physp_get_lmc(&p_node->physp_table[port_num]); +} + +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Local port number. +* +* RETURN VALUES +* Returns the LMC value of the specified port on this node. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_init_physp +* NAME +* osm_node_init_physp +* +* DESCRIPTION +* Initializes a physical port for the given node. +* +* SYNOPSIS +*/ +void osm_node_init_physp(IN osm_node_t * p_node, uint8_t port_num, + IN const osm_madw_t * p_madw); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Local port number. +* +* p_madw +* [in] Pointer to a osm_madw_t object containing a mad with +* the node's NodeInfo attribute as discovered through the +* Physical Port to add to the node. The caller may discard the +* osm_madw_t structure after calling osm_node_new. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* Node object, Physical Port object. +*********/ + +/****f* OpenSM: Node/osm_node_get_node_guid +* NAME +* osm_node_get_node_guid +* +* DESCRIPTION +* Returns the node GUID of this node. +* +* SYNOPSIS +*/ +static inline ib_net64_t osm_node_get_node_guid(IN const osm_node_t * p_node) +{ + return p_node->node_info.node_guid; +} + +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* RETURN VALUES +* Returns the node GUID of this node. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_link +* NAME +* osm_node_link +* +* DESCRIPTION +* Logically connects a node to another node through the specified port. +* +* SYNOPSIS +*/ +void osm_node_link(IN osm_node_t * p_node, IN uint8_t port_num, + IN osm_node_t * p_remote_node, IN uint8_t remote_port_num); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Port number in p_node through which to create the link. +* +* p_remote_node +* [in] Pointer to the remote node object. +* +* remote_port_num +* [in] Port number in the remote's node through which to +* create this link. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_unlink +* NAME +* osm_node_unlink +* +* DESCRIPTION +* Logically disconnects a node from another node through +* the specified port. +* +* SYNOPSIS +*/ +void osm_node_unlink(IN osm_node_t * p_node, IN uint8_t port_num, + IN osm_node_t * p_remote_node, IN uint8_t remote_port_num); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Port number in p_node through which to unlink. +* +* p_remote_node +* [in] Pointer to the remote node object. +* +* remote_port_num +* [in] Port number in the remote's node through which to unlink. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_link_exists +* NAME +* osm_node_link_exists +* +* DESCRIPTION +* Return TRUE if a link exists between the specified nodes on +* the specified ports. +* Returns FALSE otherwise. +* +* SYNOPSIS +*/ +boolean_t osm_node_link_exists(IN osm_node_t * p_node, IN uint8_t port_num, + IN osm_node_t * p_remote_node, + IN uint8_t remote_port_num); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Port number in p_node through which to check the link. +* +* p_remote_node +* [in] Pointer to the remote node object. +* +* remote_port_num +* [in] Port number in the remote's node through which to +* check this link. +* +* RETURN VALUES +* Return TRUE if a link exists between the specified nodes on +* the specified ports. +* Returns FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_has_any_link +* NAME +* osm_node_has_any_link +* +* DESCRIPTION +* Return TRUE if a any link exists from the specified nodes on +* the specified port. +* Returns FALSE otherwise. +* +* SYNOPSIS +*/ +boolean_t osm_node_has_any_link(IN osm_node_t * p_node, IN uint8_t port_num); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Port number in p_node through which to check the link. +* +* RETURN VALUES +* Return TRUE if a any link exists from the specified nodes on +* the specified port. +* Returns FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +/****f* OpenSM: Node/osm_node_link_has_valid_ports +* NAME +* osm_node_link_has_valid_ports +* +* DESCRIPTION +* Return TRUE if both ports in the link are valid (initialized). +* Returns FALSE otherwise. +* +* SYNOPSIS +*/ +boolean_t osm_node_link_has_valid_ports(IN osm_node_t * p_node, + IN uint8_t port_num, + IN osm_node_t * p_remote_node, + IN uint8_t remote_port_num); +/* +* PARAMETERS +* p_node +* [in] Pointer to an osm_node_t object. +* +* port_num +* [in] Port number in p_node through which to check the link. +* +* p_remote_node +* [in] Pointer to the remote node object. +* +* remote_port_num +* [in] Port number in the remote's node through which to +* check this link. +* +* RETURN VALUES +* Return TRUE if both ports in the link are valid (initialized). +* Returns FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Node object +*********/ + +END_C_DECLS +#endif /* _OSM_NODE_H_ */ diff --git a/include/opensm/osm_opensm.h b/include/opensm/osm_opensm.h new file mode 100644 index 0000000..5c8dcf4 --- /dev/null +++ b/include/opensm/osm_opensm.h @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * Copyright (c) 2019 Fabriscale Technologies AS. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_opensm_t. + * This object represents the OpenSM super object. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_OPENSM_H_ +#define _OSM_OPENSM_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/OpenSM +* NAME +* OpenSM +* +* DESCRIPTION +* The OpenSM object encapsulates the information needed by the +* OpenSM to govern itself. The OpenSM is one OpenSM object. +* +* The OpenSM object is thread safe. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****d* OpenSM: OpenSM/osm_routing_engine_type_t +* NAME +* osm_routing_engine_type_t +* +* DESCRIPTION +* Enumerates the possible routing engines that +* could be used to route a subnet. +* +* SYNOPSIS +*/ +typedef enum _osm_routing_engine_type { + OSM_ROUTING_ENGINE_TYPE_NONE = 0, + OSM_ROUTING_ENGINE_TYPE_MINHOP, + OSM_ROUTING_ENGINE_TYPE_UPDN, + OSM_ROUTING_ENGINE_TYPE_DNUP, + OSM_ROUTING_ENGINE_TYPE_FILE, + OSM_ROUTING_ENGINE_TYPE_FTREE, + OSM_ROUTING_ENGINE_TYPE_LASH, + OSM_ROUTING_ENGINE_TYPE_DOR, + OSM_ROUTING_ENGINE_TYPE_TORUS_2QOS, + OSM_ROUTING_ENGINE_TYPE_NUE, + OSM_ROUTING_ENGINE_TYPE_SSSP, + OSM_ROUTING_ENGINE_TYPE_DFSSSP, + OSM_ROUTING_ENGINE_TYPE_UNKNOWN, + OSM_ROUTING_ENGINE_TYPE_EXTERNAL +} osm_routing_engine_type_t; +/***********/ + +/****s* OpenSM: OpenSM/osm_routing_engine +* NAME +* struct osm_routing_engine +* +* DESCRIPTION +* OpenSM routing engine module definition. +* NOTES +* routing engine structure - multicast callbacks may be +* added later. +*/ +typedef struct osm_routing_engine { + osm_routing_engine_type_t type; + const char *name; + void *context; + int (*build_lid_matrices) (void *context); + int (*ucast_build_fwd_tables) (void *context); + void (*ucast_dump_tables) (void *context); + void (*update_sl2vl)(void *context, IN osm_physp_t *port, + IN uint8_t in_port_num, IN uint8_t out_port_num, + IN OUT ib_slvl_table_t *t); + void (*update_vlarb)(void *context, IN osm_physp_t *port, + IN uint8_t port_num, + IN OUT ib_vl_arb_table_t *block, + unsigned block_length, unsigned block_num); + uint8_t (*path_sl)(void *context, IN uint8_t path_sl_hint, + IN const ib_net16_t slid, IN const ib_net16_t dlid); + ib_api_status_t (*mcast_build_stree)(void *context, + IN OUT osm_mgrp_box_t *mgb); + void (*destroy) (void *context); + struct osm_routing_engine *next; +} osm_routing_engine_t; +/* +* FIELDS +* name +* The routing engine name (will be used in logs). +* +* context +* The routing engine context. Will be passed as parameter +* to the callback functions. +* +* build_lid_matrices +* The callback for lid matrices generation. +* +* ucast_build_fwd_tables +* The callback for unicast forwarding table generation. +* +* ucast_dump_tables +* The callback for dumping unicast routing tables. +* +* update_sl2vl(void *context, IN osm_physp_t *port, +* IN uint8_t in_port_num, IN uint8_t out_port_num, +* OUT ib_slvl_table_t *t) +* The callback to allow routing engine input for SL2VL maps. +* *port is the physical port for which the SL2VL map is to be +* updated. For switches, in_port_num/out_port_num identify +* which part of the SL2VL map to update. For router/HCA ports, +* in_port_num/out_port_num should be ignored. +* +* update_vlarb(void *context, IN osm_physp_t *port, +* IN uint8_t port_num, +* IN OUT ib_vl_arb_table_t *block, +* unsigned block_length, unsigned block_num) +* The callback to allow routing engine input for VLArbitration. +* *port is the physical port for which the VLArb table is to be +* updated. +* +* path_sl +* The callback for computing path SL. +* +* mcast_build_stree +* The callback for building the spanning tree for multicast +* forwarding, called per MLID. +* +* destroy +* The destroy method, may be used for routing engine +* internals cleanup. +* +* next +* Pointer to next routing engine in the list. +*/ + +/****s* OpenSM: OpenSM/external_routing_engine_module_t + * NAME + * external_routing_engine_module_t + * + * DESCRIPTION + * External routing engine module structure. + * + * This structure is used to register a new external routing engine + * + * SYNOPSIS + */ +typedef struct external_routing_engine_module { + const char *name; + int (*setup)(struct osm_routing_engine *re, struct osm_opensm *osm); + void *context; +} external_routing_engine_module_t; +/* + * FIELDS + * name + * Name of the external routing engine + * + * setup + * function to setup the external routing engine's callbacks + * + * context + * User defined context + * + * SEE ALSO + * osm_register_external_routing_engine + *********/ + +/****s* OpenSM: OpenSM/osm_opensm_t +* NAME +* osm_opensm_t +* +* DESCRIPTION +* OpenSM structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_opensm { + const char *osm_version; + osm_subn_t subn; + osm_sm_t sm; + osm_sa_t sa; +#ifdef ENABLE_OSM_PERF_MGR + osm_perfmgr_t perfmgr; +#endif /* ENABLE_OSM_PERF_MGR */ + osm_congestion_control_t cc; + cl_qlist_t plugin_list; + osm_db_t db; + boolean_t mad_pool_constructed; + osm_mad_pool_t mad_pool; + osm_vendor_t *p_vendor; + boolean_t vl15_constructed; + osm_vl15_t vl15; + osm_log_t log; + cl_dispatcher_t disp; + cl_dispatcher_t sa_set_disp; + boolean_t sa_set_disp_initialized; + cl_plock_t lock; + struct osm_routing_engine *routing_engine_list; + struct osm_routing_engine *routing_engine_used; + struct osm_routing_engine *default_routing_engine; + boolean_t no_fallback_routing_engine; + osm_stats_t stats; + osm_console_t console; + nn_map_t *node_name_map; +} osm_opensm_t; +/* +* FIELDS +* osm_version +* OpenSM version (as generated in osm_version.h) +* +* subn +* Subnet object for this subnet. +* +* sm +* The Subnet Manager (SM) object for this subnet. +* +* sa +* The Subnet Administration (SA) object for this subnet. +* +* db +* Persistant storage of some data required between sessions. +* +* mad_pool +* Pool of Management Datagram (MAD) objects. +* +* p_vendor +* Pointer to the Vendor specific adapter for various +* transport interfaces, such as UMADT, AL, etc. The +* particular interface is set at compile time. +* +* vl15 +* The VL15 interface. +* +* log +* Log facility used by all OpenSM components. +* +* disp +* Central dispatcher containing the OpenSM worker threads. +* +* sa_set_disp +* Dispatcher for SA Set and Delete requests. +* +* sa_set_disp_initialized. +* Indicator that sa_set_disp dispatcher was initialized. +* +* lock +* Shared lock guarding most OpenSM structures. +* +* routing_engine_list +* List of routing engines that should be tried for use. +* +* routing_engine_used +* Indicates which routing engine was used to route a subnet. +* +* no_fallback_routing_engine +* Indicates if default routing engine should not be used. +* +* stats +* Open SM statistics block +* +* SEE ALSO +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_construct +* NAME +* osm_opensm_construct +* +* DESCRIPTION +* This function constructs an OpenSM object. +* +* SYNOPSIS +*/ +void osm_opensm_construct(IN osm_opensm_t * p_osm); +/* +* PARAMETERS +* p_osm +* [in] Pointer to a OpenSM object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_opensm_init, osm_opensm_destroy +* +* Calling osm_opensm_construct is a prerequisite to calling any other +* method except osm_opensm_init. +* +* SEE ALSO +* SM object, osm_opensm_init, osm_opensm_destroy +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_construct_finish +* NAME +* osm_opensm_construct_finish +* +* DESCRIPTION +* The osm_opensm_construct_finish function completes +* the second phase of constucting an OpenSM object. +* +* SYNOPSIS +*/ +void osm_opensm_construct_finish(IN osm_opensm_t * p_osm); +/* +* PARAMETERS +* p_osm +* [in] Pointer to a OpenSM object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Calling osm_opensm_construct/osm_construct_finish is a prerequisite +* to calling any other method except osm_opensm_init/osm_opensm_init_finish. +* +* SEE ALSO +* SM object, osm_opensm_init, osm_opensm_construct_finish, +* osm_opensm_destroy, osm_opensm_destroy_finish +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_destroy +* NAME +* osm_opensm_destroy +* +* DESCRIPTION +* The osm_opensm_destroy function destroys an SM, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_opensm_destroy(IN osm_opensm_t * p_osm); +/* +* PARAMETERS +* p_osm +* [in] Pointer to a OpenSM object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified OpenSM object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_opensm_construct or +* osm_opensm_init. +* +* SEE ALSO +* SM object, osm_opensm_construct, osm_opensm_init +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_destroy_finish +* NAME +* osm_opensm_destroy_finish +* +* DESCRIPTION +* The osm_opensm_destroy_finish function handles the second phase +* of destroying an SM, releasing all resources. +* +* SYNOPSIS +*/ +void osm_opensm_destroy_finish(IN osm_opensm_t * p_osm); +/* +* PARAMETERS +* p_osm +* [in] Pointer to a OpenSM object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs second phase of any necessary cleanup of the specified OpenSM object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to +* osm_opensm_construct_finish or osm_opensm_init_finish. +* +* SEE ALSO +* SM object, osm_opensm_construct, osm_opensm_construct_finish, +* osm_opensm_init, osm_opensm_init_finish +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_init +* NAME +* osm_opensm_init +* +* DESCRIPTION +* The osm_opensm_init function initializes a OpenSM object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, + IN const osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object to initialize. +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* IB_SUCCESS if the OpenSM object was initialized successfully. +* +* NOTES +* Allows calling other OpenSM methods. +* +* SEE ALSO +* SM object, osm_opensm_construct, osm_opensm_destroy +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_init_finish +* NAME +* osm_opensm_init_finish +* +* DESCRIPTION +* The osm_opensm_init_finish function performs the second phase +* of initialization of an OpenSM object. +* +* SYNOPSIS +*/ +ib_api_status_t osm_opensm_init_finish(IN osm_opensm_t * p_osm, + IN const osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object to initialize. +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* IB_SUCCESS if the OpenSM object was initialized successfully. +* +* NOTES +* Allows calling other OpenSM methods. +* +* SEE ALSO +* SM object, osm_opensm_construct, osm_opensm_construct_finish, +* osm_opensm_destroy, osm_opensm_destroy_finish +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_sweep +* NAME +* osm_opensm_sweep +* +* DESCRIPTION +* Initiates a subnet sweep. +* +* SYNOPSIS +*/ +static inline void osm_opensm_sweep(IN osm_opensm_t * p_osm) +{ + osm_sm_sweep(&p_osm->sm); +} + +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object on which to +* initiate a sweep. +* +* RETURN VALUES +* None +* +* NOTES +* If the OpenSM object is not bound to a port, this function +* does nothing. +* +* SEE ALSO +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_set_log_flags +* NAME +* osm_opensm_set_log_flags +* +* DESCRIPTION +* Sets the log level. +* +* SYNOPSIS +*/ +static inline void osm_opensm_set_log_flags(IN osm_opensm_t * p_osm, + IN osm_log_level_t log_flags) +{ + osm_log_set_level(&p_osm->log, log_flags); +} + +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object. +* +* log_flags +* [in] Log level flags to set. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_bind +* NAME +* osm_opensm_bind +* +* DESCRIPTION +* Binds the opensm object to a port guid. +* +* SYNOPSIS +*/ +ib_api_status_t osm_opensm_bind(IN osm_opensm_t * p_osm, IN ib_net64_t guid); +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object to bind. +* +* guid +* [in] Local port GUID with which to bind. +* +* RETURN VALUES +* None +* +* NOTES +* A given opensm object can only be bound to one port at a time. +* +* SEE ALSO +*********/ + +/****f* OpenSM: OpenSM/osm_opensm_wait_for_subnet_up +* NAME +* osm_opensm_wait_for_subnet_up +* +* DESCRIPTION +* Blocks the calling thread until the subnet is up. +* +* SYNOPSIS +*/ +static inline cl_status_t +osm_opensm_wait_for_subnet_up(IN osm_opensm_t * p_osm, IN uint32_t wait_us, + IN boolean_t interruptible) +{ + return osm_sm_wait_for_subnet_up(&p_osm->sm, wait_us, interruptible); +} + +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object. +* +* wait_us +* [in] Number of microseconds to wait. +* +* interruptible +* [in] Indicates whether the wait operation can be interrupted +* by external signals. +* +* RETURN VALUES +* CL_SUCCESS if the wait operation succeeded in response to the event +* being set. +* +* CL_TIMEOUT if the specified time period elapses. +* +* CL_NOT_DONE if the wait was interrupted by an external signal. +* +* CL_ERROR if the wait operation failed. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: OpenSM/osm_register_external_routing_engine + * NAME + * osm_register_external_routing_engine + * + * DESCRIPTION + * Register a new external routing engine. + * + * SYNOPSIS + */ +cl_status_t osm_register_external_routing_engine( + IN osm_opensm_t *osm, + IN const external_routing_engine_module_t *module, + IN void *context); +/* + * PARAMETERS + * type + * [in] Pointer to a osm_opensm_t object + * [in] Pointer to a external_routing_engine_module_t object to be registered. + * [in] Pointer to a user context that will be set in osm_routing_engine_t + * + * RETURN VALUES + * CL_SUCCESS if the routing engine was registered successfully. + * CL_DUPLICATE if a routing engine with the same name + * or type was already registered. + * + * NOTES + * + * SEE ALSO + * external_routing_engine_module_t + *********/ + +/****f* OpenSM: OpenSM/osm_routing_engine_type_str +* NAME +* osm_routing_engine_type_str +* +* DESCRIPTION +* Returns a string for the specified routing engine type. +* +* SYNOPSIS +*/ +const char *osm_routing_engine_type_str(IN osm_routing_engine_type_t type); +/* +* PARAMETERS +* type +* [in] routing engine type. +* +* RETURN VALUES +* Pointer to routing engine name. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: OpenSM/osm_routing_engine_type +* NAME +* osm_routing_engine_type +* +* DESCRIPTION +* Returns a routing engine type specified routing engine name string. +* +* SYNOPSIS +*/ +osm_routing_engine_type_t osm_routing_engine_type(IN const char *str); +/* +* PARAMETERS +* str +* [in] routing engine name string. +* +* RETURN VALUES +* Routing engine type. +* +* NOTES +* +* SEE ALSO +*********/ + +void osm_opensm_report_event(osm_opensm_t *osm, osm_epi_event_id_t event_id, + void *event_data); + +/* dump helpers */ +void osm_dump_mcast_routes(osm_opensm_t * osm); +void osm_dump_all(osm_opensm_t * osm); +void osm_dump_qmap_to_file(osm_opensm_t * p_osm, const char *file_name, + cl_qmap_t * map, + void (*func) (cl_map_item_t *, FILE *, void *), + void *cxt); + +/****v* OpenSM/osm_exit_flag +*/ +extern volatile unsigned int osm_exit_flag; +/* +* DESCRIPTION +* Set to one to cause all threads to leave +*********/ + +END_C_DECLS +#endif /* _OSM_OPENSM_H_ */ diff --git a/include/opensm/osm_partition.h b/include/opensm/osm_partition.h new file mode 100644 index 0000000..515e8d5 --- /dev/null +++ b/include/opensm/osm_partition.h @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_prtn_t. + * This object represents an IBA Partition. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_PARTITION_H_ +#define _OSM_PARTITION_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Partition +* NAME +* Partition +* +* DESCRIPTION +* The Partition object encapsulates the information needed by the +* OpenSM to manage Partitions. The OpenSM allocates one Partition +* object per Partition in the IBA subnet. +* +* The Partition is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Partition/osm_prtn_t +* NAME +* osm_prtn_t +* +* DESCRIPTION +* Partition structure. +* +* The osm_prtn_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_prtn { + cl_map_item_t map_item; + ib_net16_t pkey; + uint8_t sl; + cl_map_t full_guid_tbl; + cl_map_t part_guid_tbl; + char name[32]; + osm_mgrp_t **mgrps; + int nmgrps; +} osm_prtn_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* pkey +* The IBA defined P_KEY of this Partition. +* +* sl +* The Service Level (SL) associated with this Partiton. +* +* full_guid_tbl +* Container of pointers to all Port objects in the Partition +* with full membership, indexed by port GUID. +* +* part_guid_tbl +* Container of pointers to all Port objects in the Partition +* with limited membership, indexed by port GUID. +* +* name +* Name of the Partition as specified in partition +* configuration. +* +* mgrps +* List of well known Multicast Groups +* that were created for this partition (when configured). +* This includes the IPoIB broadcast group. +* +* nmgrps +* Number of known Multicast Groups. +* +* SEE ALSO +* Partition +*********/ + +/****f* OpenSM: Partition/osm_prtn_delete +* NAME +* osm_prtn_delete +* +* DESCRIPTION +* This function destroys and deallocates a Partition object. +* +* SYNOPSIS +*/ +void osm_prtn_delete(IN osm_subn_t * p_subn, IN OUT osm_prtn_t ** pp_prtn); +/* +* PARAMETERS +* pp_prtn +* [in][out] Pointer to a pointer to a Partition object to +* delete. On return, this pointer is NULL. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified Partition object. +* +* SEE ALSO +* Partition, osm_prtn_new +*********/ + +/****f* OpenSM: Partition/osm_prtn_new +* NAME +* osm_prtn_new +* +* DESCRIPTION +* This function allocates and initializes a Partition object. +* +* SYNOPSIS +*/ +osm_prtn_t *osm_prtn_new(IN const char *name, IN uint16_t pkey); +/* +* PARAMETERS +* name +* [in] Partition name string +* +* pkey +* [in] Partition P_Key value +* +* RETURN VALUE +* Pointer to the initialize Partition object. +* +* NOTES +* Allows calling other partition methods. +* +* SEE ALSO +* Partition +*********/ + +/****f* OpenSM: Partition/osm_prtn_is_guid +* NAME +* osm_prtn_is_guid +* +* DESCRIPTION +* Indicates if a port is a member of the partition. +* +* SYNOPSIS +*/ +static inline boolean_t osm_prtn_is_guid(IN const osm_prtn_t * p_prtn, + IN ib_net64_t guid) +{ + return (cl_map_get(&p_prtn->full_guid_tbl, guid) != NULL) || + (cl_map_get(&p_prtn->part_guid_tbl, guid) != NULL); +} + +/* +* PARAMETERS +* p_prtn +* [in] Pointer to an osm_prtn_t object. +* +* guid +* [in] Port GUID. +* +* RETURN VALUES +* TRUE if the specified port GUID is a member of the partition, +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Partition/osm_prtn_make_partitions +* NAME +* osm_prtn_make_partitions +* +* DESCRIPTION +* Makes all partitions in subnet. +* +* SYNOPSIS +*/ +ib_api_status_t osm_prtn_make_partitions(IN osm_log_t * p_log, + IN osm_subn_t * p_subn); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_subn +* [in] Pointer to subnet object. +* +* RETURN VALUES +* IB_SUCCESS value on success. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Partition/osm_prtn_find_by_name +* NAME +* osm_prtn_find_by_name +* +* DESCRIPTION +* Fides partition by name. +* +* SYNOPSIS +*/ +osm_prtn_t *osm_prtn_find_by_name(IN osm_subn_t * p_subn, IN const char *name); +/* +* PARAMETERS +* p_subn +* [in] Pointer to a subnet object. +* +* name +* [in] Required partition name. +* +* RETURN VALUES +* Pointer to the partition object on success. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_PARTITION_H_ */ diff --git a/include/opensm/osm_path.h b/include/opensm/osm_path.h new file mode 100644 index 0000000..1c2d32d --- /dev/null +++ b/include/opensm/osm_path.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_PATH_H_ +#define _OSM_PATH_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + * Abstract: + * Declaration of path related objects. + * These objects are part of the OpenSM family of objects. + */ +/****h* OpenSM/DR Path +* NAME +* DR Path +* +* DESCRIPTION +* The DR Path structure encapsulates a directed route through the subnet. +* +* This structure allows direct access to member variables. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: DR Path/osm_dr_path_t +* NAME +* osm_dr_path_t +* +* DESCRIPTION +* Directed Route structure. +* +* This structure allows direct access to member variables. +* +* SYNOPSIS +*/ +typedef struct osm_dr_path { + uint8_t hop_count; + uint8_t path[IB_SUBNET_PATH_HOPS_MAX]; +} osm_dr_path_t; +/* +* FIELDS +* hop_count +* The number of hops in this path. +* +* path +* The array of port numbers that comprise this path. +* +* SEE ALSO +* DR Path structure +*********/ +/****f* OpenSM: DR Path/osm_dr_path_construct +* NAME +* osm_dr_path_construct +* +* DESCRIPTION +* This function constructs a directed route path object. +* +* SYNOPSIS +*/ +static inline void osm_dr_path_construct(IN osm_dr_path_t * p_path) +{ + memset(p_path, 0, sizeof(*p_path)); +} + +/* +* PARAMETERS +* p_path +* [in] Pointer to a directed route path object to initialize. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: DR Path/osm_dr_path_init +* NAME +* osm_dr_path_init +* +* DESCRIPTION +* This function initializes a directed route path object. +* +* SYNOPSIS +*/ +static inline void +osm_dr_path_init(IN osm_dr_path_t * p_path, IN uint8_t hop_count, + IN const uint8_t path[IB_SUBNET_PATH_HOPS_MAX]) +{ + /* The first location in the path array is reserved. */ + CL_ASSERT(path[0] == 0); + CL_ASSERT(hop_count < IB_SUBNET_PATH_HOPS_MAX); + p_path->hop_count = hop_count; + memcpy(p_path->path, path, hop_count + 1); +} + +/* +* PARAMETERS +* p_path +* [in] Pointer to a directed route path object to initialize. +* +* hop_count +* [in] Hop count needed to reach this node. +* +* path +* [in] Directed route path to reach this node. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ +/****f* OpenSM: DR Path/osm_dr_path_extend +* NAME +* osm_dr_path_extend +* +* DESCRIPTION +* Adds a new hop to a path. +* +* SYNOPSIS +*/ +static inline int osm_dr_path_extend(IN osm_dr_path_t * p_path, + IN uint8_t port_num) +{ + p_path->hop_count++; + + if (p_path->hop_count >= IB_SUBNET_PATH_HOPS_MAX) + return -1; + /* + Location 0 in the path array is reserved per IB spec. + */ + p_path->path[p_path->hop_count] = port_num; + return 0; +} + +/* +* PARAMETERS +* p_path +* [in] Pointer to a directed route path object to initialize. +* +* port_num +* [in] Additional port to add to the DR path. +* +* RETURN VALUES +* 0 indicates path was extended. +* Other than 0 indicates path was not extended. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_PATH_H_ */ diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h new file mode 100644 index 0000000..ec12eb6 --- /dev/null +++ b/include/opensm/osm_perfmgr.h @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2007 The Regents of the University of California. + * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_PERFMGR_H_ +#define _OSM_PERFMGR_H_ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef ENABLE_OSM_PERF_MGR + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/****h* OpenSM/PerfMgr +* NAME +* PerfMgr +* +* DESCRIPTION +* Performance manager thread which takes care of polling the fabric for +* Port counters values. +* +* The PerfMgr object is thread safe. +* +* AUTHOR +* Ira Weiny, LLNL +* +*********/ + +#define OSM_PERFMGR_DEFAULT_SWEEP_TIME_S 180 +#define OSM_PERFMGR_DEFAULT_DUMP_FILE "opensm_port_counters.log" +#define OSM_PERFMGR_DEFAULT_MAX_OUTSTANDING_QUERIES 500 +#define OSM_PERFMGR_DEFAULT_XMIT_WAIT_THRESHOLD 0x0000FFFF + +/****s* OpenSM: PerfMgr/osm_perfmgr_state_t */ +typedef enum { + PERFMGR_STATE_DISABLE, + PERFMGR_STATE_ENABLED, + PERFMGR_STATE_NO_DB +} osm_perfmgr_state_t; + +/****s* OpenSM: PerfMgr/osm_perfmgr_sweep_state_t */ +typedef enum { + PERFMGR_SWEEP_SLEEP, + PERFMGR_SWEEP_ACTIVE, + PERFMGR_SWEEP_SUSPENDED, + PERFMGR_SWEEP_POST_PROCESSING +} osm_perfmgr_sweep_state_t; + +typedef struct monitored_port { + uint16_t pkey_ix; + ib_net16_t orig_lid; + boolean_t redirection; + boolean_t valid; + /* Redirection fields from ClassPortInfo */ + ib_gid_t gid; + ib_net16_t lid; + ib_net16_t pkey; + ib_net32_t qp; + /* ClassPortInfo fields */ + boolean_t cpi_valid; + ib_net16_t cap_mask; + /* Remote end connected to */ + boolean_t remote_valid; + uint64_t remote_guid; + char *remote_name; + uint8_t remote_port; +} monitored_port_t; + +/* Node to store information about nodes being monitored */ +typedef struct monitored_node { + cl_map_item_t map_item; + struct monitored_node *next; + uint64_t guid; + uint8_t node_type; + boolean_t esp0; + char *name; + uint32_t num_ports; + monitored_port_t port[1]; +} monitored_node_t; + +struct osm_opensm; + +/****s* OpenSM: PerfMgr/osm_perfmgr_t +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +*/ +typedef struct osm_perfmgr { + cl_timer_t sweep_timer; + struct osm_opensm *osm; + osm_subn_t *subn; + osm_sm_t *sm; + osm_log_t *log; + osm_mad_pool_t *mad_pool; + atomic32_t trans_id; + osm_vendor_t *vendor; + osm_bind_handle_t bind_handle; + cl_disp_reg_handle_t pc_disp_h; + osm_perfmgr_state_t state; + osm_perfmgr_sweep_state_t sweep_state; + cl_spinlock_t lock; + uint16_t sweep_time_s; + perfmgr_db_t *db; + atomic32_t outstanding_queries; /* this along with sig_query */ + cl_event_t sig_query; /* will throttle our queries */ + uint32_t max_outstanding_queries; + boolean_t ignore_cas; + cl_qmap_t monitored_map; /* map the nodes being tracked */ + monitored_node_t *remove_list; + ib_net64_t port_guid; + int16_t local_port; + int rm_nodes; + boolean_t query_cpi; + boolean_t xmit_wait_log; + uint32_t xmit_wait_threshold; +} osm_perfmgr_t; +/* +* FIELDS +* subn +* Subnet object for this subnet. +* +* log +* Pointer to the log object. +* +* mad_pool +* Pointer to the MAD pool. +* +* mad_ctrl +* Mad Controller +*********/ + +/****f* OpenSM: Creation Functions */ +void osm_perfmgr_shutdown(osm_perfmgr_t * p_perfmgr); +void osm_perfmgr_destroy(osm_perfmgr_t * p_perfmgr); + +/****f* OpenSM: Inline accessor functions */ +inline static void osm_perfmgr_set_state(osm_perfmgr_t * p_perfmgr, + osm_perfmgr_state_t state) +{ + p_perfmgr->state = state; + if (state == PERFMGR_STATE_ENABLED) { + cl_timer_start(&p_perfmgr->sweep_timer, p_perfmgr->sweep_time_s * 1000); + } else { + cl_timer_stop(&p_perfmgr->sweep_timer); + } +} + +inline static osm_perfmgr_state_t osm_perfmgr_get_state(osm_perfmgr_t * perfmgr) +{ + return perfmgr->state; +} + +inline static void osm_perfmgr_set_rm_nodes(osm_perfmgr_t *perfmgr, + int rm_nodes) +{ + perfmgr->rm_nodes = rm_nodes; +} + +inline static int osm_perfmgr_get_rm_nodes(osm_perfmgr_t *perfmgr) +{ + return perfmgr->rm_nodes; +} + +inline static void osm_perfmgr_set_query_cpi(osm_perfmgr_t *perfmgr, + int query_cpi) +{ + perfmgr->query_cpi = query_cpi; +} + +inline static int osm_perfmgr_get_query_cpi(osm_perfmgr_t *perfmgr) +{ + return perfmgr->query_cpi; +} + +inline static const char *osm_perfmgr_get_state_str(osm_perfmgr_t * p_perfmgr) +{ + switch (p_perfmgr->state) { + case PERFMGR_STATE_DISABLE: + return "Disabled"; + break; + case PERFMGR_STATE_ENABLED: + return "Enabled"; + break; + case PERFMGR_STATE_NO_DB: + return "No Database"; + break; + } + return "UNKNOWN"; +} + +inline static const char *osm_perfmgr_get_sweep_state_str(osm_perfmgr_t * perfmgr) +{ + switch (perfmgr->sweep_state) { + case PERFMGR_SWEEP_SLEEP: + return "Sleeping"; + break; + case PERFMGR_SWEEP_ACTIVE: + return "Active"; + break; + case PERFMGR_SWEEP_SUSPENDED: + return "Suspended"; + break; + case PERFMGR_SWEEP_POST_PROCESSING: + return "PostProcessing"; + break; + } + return "UNKNOWN"; +} + +inline static void osm_perfmgr_set_sweep_time_s(osm_perfmgr_t * p_perfmgr, + uint16_t time_s) +{ + p_perfmgr->sweep_time_s = time_s; + osm_sm_signal(p_perfmgr->sm, OSM_SIGNAL_PERFMGR_SWEEP); +} + +inline static uint16_t osm_perfmgr_get_sweep_time_s(osm_perfmgr_t * p_perfmgr) +{ + return p_perfmgr->sweep_time_s; +} + +inline static unsigned osm_perfmgr_delete_inactive(osm_perfmgr_t * pm) +{ + unsigned rc; + perfmgr_db_delete_inactive(pm->db, &rc); + return (rc); +} + +void osm_perfmgr_clear_counters(osm_perfmgr_t * p_perfmgr); +void osm_perfmgr_dump_counters(osm_perfmgr_t * p_perfmgr, + perfmgr_db_dump_t dump_type); +void osm_perfmgr_print_counters(osm_perfmgr_t *pm, char *nodename, FILE *fp, + char *port, int err_only); +void osm_perfmgr_update_nodename(osm_perfmgr_t *pm, uint64_t node_guid, + char *nodename); + +ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * p_perfmgr, + ib_net64_t port_guid); + +void osm_perfmgr_process(osm_perfmgr_t * pm); + +/****f* OpenSM: PerfMgr/osm_perfmgr_init */ +ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * perfmgr, + struct osm_opensm *osm, + const osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* perfmgr +* [in] Pointer to an osm_perfmgr_t object to initialize. +* +* osm +* [in] Pointer to the OpenSM object. +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* IB_SUCCESS if the PerfMgr object was initialized successfully. +*********/ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* ENABLE_OSM_PERF_MGR */ + +#endif /* _OSM_PERFMGR_H_ */ diff --git a/include/opensm/osm_perfmgr_db.h b/include/opensm/osm_perfmgr_db.h new file mode 100644 index 0000000..25d9c58 --- /dev/null +++ b/include/opensm/osm_perfmgr_db.h @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 The Regents of the University of California. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _PERFMGR_EVENT_DB_H_ +#define _PERFMGR_EVENT_DB_H_ + +#ifdef ENABLE_OSM_PERF_MGR + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +struct osm_perfmgr; +/****h* OpenSM/PerfMgr Event Database +* DESCRIPTION +* Database interface to record subnet events +* +* Implementations of this object _MUST_ be thread safe. +* +* AUTHOR +* Ira Weiny, LLNL +* +*********/ +typedef enum { + PERFMGR_EVENT_DB_SUCCESS = 0, + PERFMGR_EVENT_DB_FAIL, + PERFMGR_EVENT_DB_NOMEM, + PERFMGR_EVENT_DB_GUIDNOTFOUND, + PERFMGR_EVENT_DB_PORTNOTFOUND, + PERFMGR_EVENT_DB_NOT_IMPL +} perfmgr_db_err_t; + +/** ========================================================================= + * Port error reading + */ +typedef struct { + uint64_t symbol_err_cnt; + uint64_t link_err_recover; + uint64_t link_downed; + uint64_t rcv_err; + uint64_t rcv_rem_phys_err; + uint64_t rcv_switch_relay_err; + uint64_t xmit_discards; + uint64_t xmit_constraint_err; + uint64_t rcv_constraint_err; + uint64_t link_integrity; + uint64_t buffer_overrun; + uint64_t vl15_dropped; + uint64_t xmit_wait; + time_t time; +} perfmgr_db_err_reading_t; + +/** ========================================================================= + * Port data count reading + */ +typedef struct { + uint64_t xmit_data; /* can be used for std or extended */ + uint64_t rcv_data; /* can be used for std or extended */ + uint64_t xmit_pkts; /* can be used for std or extended */ + uint64_t rcv_pkts; /* can be used for std or extended */ + uint64_t unicast_xmit_pkts; + uint64_t unicast_rcv_pkts; + uint64_t multicast_xmit_pkts; + uint64_t multicast_rcv_pkts; + time_t time; +} perfmgr_db_data_cnt_reading_t; + +/** ========================================================================= + * Dump output options + */ +typedef enum { + PERFMGR_EVENT_DB_DUMP_HR = 0, /* Human readable */ + PERFMGR_EVENT_DB_DUMP_MR /* Machine readable */ +} perfmgr_db_dump_t; + +/** ========================================================================= + * Port counter object. + * Store all the port counters for a single port. + */ +typedef struct db_port { + perfmgr_db_err_reading_t err_total; + perfmgr_db_err_reading_t err_previous; + perfmgr_db_data_cnt_reading_t dc_total; + perfmgr_db_data_cnt_reading_t dc_previous; + time_t last_reset; + boolean_t valid; +} db_port_t; + +/** ========================================================================= + * group port counters for ports into the nodes + */ +#define NODE_NAME_SIZE (IB_NODE_DESCRIPTION_SIZE + 1) +typedef struct db_node { + cl_map_item_t map_item; /* must be first */ + uint64_t node_guid; + boolean_t active; /* activly being monitored */ + boolean_t esp0; + db_port_t *ports; + uint8_t num_ports; + char node_name[NODE_NAME_SIZE]; +} db_node_t; + +/** ========================================================================= + * all nodes in the subnet. + */ +typedef struct perfmgr_db { + cl_qmap_t pc_data; /* stores type (db_node_t *) */ + cl_plock_t lock; + struct osm_perfmgr *perfmgr; +} perfmgr_db_t; + +/** + * functions + */ +perfmgr_db_t *perfmgr_db_construct(struct osm_perfmgr *perfmgr); +void perfmgr_db_destroy(perfmgr_db_t * db); + +perfmgr_db_err_t perfmgr_db_create_entry(perfmgr_db_t * db, uint64_t guid, + boolean_t esp0, uint8_t num_ports, + char *node_name); +perfmgr_db_err_t perfmgr_db_delete_entry(perfmgr_db_t * db, uint64_t guid); +perfmgr_db_err_t perfmgr_db_delete_inactive(perfmgr_db_t * db, unsigned *cnt); + +perfmgr_db_err_t perfmgr_db_update_name(perfmgr_db_t * db, uint64_t node_guid, + char *name); + +perfmgr_db_err_t perfmgr_db_add_err_reading(perfmgr_db_t * db, uint64_t guid, + uint8_t port, + perfmgr_db_err_reading_t * reading); +perfmgr_db_err_t perfmgr_db_get_prev_err(perfmgr_db_t * db, uint64_t guid, + uint8_t port, + perfmgr_db_err_reading_t * reading); +perfmgr_db_err_t perfmgr_db_clear_prev_err(perfmgr_db_t * db, uint64_t guid, + uint8_t port); + +perfmgr_db_err_t perfmgr_db_add_dc_reading(perfmgr_db_t * db, uint64_t guid, + uint8_t port, + perfmgr_db_data_cnt_reading_t * + reading, + int ietf_sup); +perfmgr_db_err_t perfmgr_db_get_prev_dc(perfmgr_db_t * db, uint64_t guid, + uint8_t port, + perfmgr_db_data_cnt_reading_t * + reading); +perfmgr_db_err_t perfmgr_db_clear_prev_dc(perfmgr_db_t * db, uint64_t guid, + uint8_t port); + +perfmgr_db_err_t perfmgr_db_mark_active(perfmgr_db_t *db, uint64_t guid, + boolean_t active); + +void perfmgr_db_clear_counters(perfmgr_db_t * db); +perfmgr_db_err_t perfmgr_db_dump(perfmgr_db_t * db, char *file, + perfmgr_db_dump_t dump_type); +void perfmgr_db_print_all(perfmgr_db_t * db, FILE *fp, int err_only); +void perfmgr_db_print_by_name(perfmgr_db_t * db, char *nodename, FILE *fp, + char *port, int err_only); +void perfmgr_db_print_by_guid(perfmgr_db_t * db, uint64_t guid, FILE *fp, + char *port, int err_only); + +/** ========================================================================= + * helper functions to fill in the various db objects from wire objects + */ + +void perfmgr_db_fill_err_read(ib_port_counters_t * wire_read, + perfmgr_db_err_reading_t * reading, + boolean_t xmit_wait_sup); +void perfmgr_db_fill_data_cnt_read_pc(ib_port_counters_t * wire_read, + perfmgr_db_data_cnt_reading_t * reading); +void perfmgr_db_fill_data_cnt_read_pce(ib_port_counters_ext_t * wire_read, + perfmgr_db_data_cnt_reading_t * reading, + int ietf_sup); + +END_C_DECLS + +#endif /* ENABLE_OSM_PERF_MGR */ + +#endif /* _PERFMGR_PM_DB_H_ */ diff --git a/include/opensm/osm_pkey.h b/include/opensm/osm_pkey.h new file mode 100644 index 0000000..de4f2f9 --- /dev/null +++ b/include/opensm/osm_pkey.h @@ -0,0 +1,730 @@ +/* + * Copyright (c) 2010 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_PKEY_H_ +#define _OSM_PKEY_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + Forward references. +*/ +struct osm_physp; +struct osm_port; +struct osm_subn; +struct osm_node; +struct osm_physp; + +/* + * Abstract: + * Declaration of pkey manipulation functions. + */ + +/****s* OpenSM: osm_pkey_tbl_t +* NAME +* osm_pkey_tbl_t +* +* DESCRIPTION +* This object represents a pkey table. The need for a special object +* is required to optimize search performance of a PKey in the IB standard +* non sorted table. +* +* The osm_pkey_tbl_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_pkeybl { + cl_map_t accum_pkeys; + cl_ptr_vector_t blocks; + cl_ptr_vector_t new_blocks; + cl_map_t keys; + cl_qlist_t pending; + uint16_t last_pkey_idx; + uint16_t used_blocks; + uint16_t max_blocks; + uint16_t rcv_blocks_cnt; + uint16_t indx0_pkey; +} osm_pkey_tbl_t; +/* +* FIELDS +* accum_pkeys +* Accumulated pkeys with pkey index. Used to +* preserve pkey index. +* +* blocks +* The IBA defined blocks of pkey values, updated from the subnet +* +* new_blocks +* The blocks of pkey values, will be used for updates by SM +* +* keys +* A set holding all keys +* +* pending +* A list of osm_pending_pkey structs that is temporarily set by +* the pkey mgr and used during pkey mgr algorithm only +* +* used_blocks +* Tracks the number of blocks having non-zero pkeys +* +* max_blocks +* The maximal number of blocks this partition table might hold +* this value is based on node_info (for port 0 or CA) or +* switch_info updated on receiving the node_info or switch_info +* GetResp +* +* rcv_blocks_cnt +* Counter for the received GetPKeyTable mads. +* For every GetPKeyTable mad we send, increase the counter, +* and for every GetRespPKeyTable we decrease the counter. +* +* indx0_pkey +* stores the pkey to be inserted at block 0 index 0. +* if this field is 0, the default pkey will be inserted. +* +* NOTES +* 'blocks' vector should be used to store pkey values obtained from +* the port and SM pkey manager should not change it directly, for this +* purpose 'new_blocks' should be used. +* +* The only pkey values stored in 'blocks' vector will be mapped with +* 'keys' map +* +*********/ + +/****s* OpenSM: osm_pending_pkey_t +* NAME +* osm_pending_pkey_t +* +* DESCRIPTION +* This objects stores temporary information on pkeys, their target block, +* and index during the pkey manager operation +* +* SYNOPSIS +*/ +typedef struct osm_pending_pkey { + cl_list_item_t list_item; + uint16_t pkey; + uint16_t block; + uint8_t index; + boolean_t is_new; +} osm_pending_pkey_t; +/* +* FIELDS +* pkey +* The actual P_Key +* +* block +* The block index based on the previous table extracted from the +* device +* +* index +* The index of the pkey within the block +* +* is_new +* TRUE for new P_Keys such that the block and index are invalid +* in that case +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_construct +* NAME +* osm_pkey_tbl_construct +* +* DESCRIPTION +* Constructs the PKey table object +* +* SYNOPSIS +*/ +void osm_pkey_tbl_construct(IN osm_pkey_tbl_t * p_pkey_tbl); +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_init +* NAME +* osm_pkey_tbl_init +* +* DESCRIPTION +* Inits the PKey table object +* +* SYNOPSIS +*/ +ib_api_status_t osm_pkey_tbl_init(IN osm_pkey_tbl_t * p_pkey_tbl); +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_destroy +* NAME +* osm_pkey_tbl_destroy +* +* DESCRIPTION +* Destroys the PKey table object +* +* SYNOPSIS +*/ +void osm_pkey_tbl_destroy(IN osm_pkey_tbl_t * p_pkey_tbl); +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_get_num_blocks +* NAME +* osm_pkey_tbl_get_num_blocks +* +* DESCRIPTION +* Obtain the number of blocks in IB PKey table +* +* SYNOPSIS +*/ +static inline uint16_t +osm_pkey_tbl_get_num_blocks(IN const osm_pkey_tbl_t * p_pkey_tbl) +{ + return ((uint16_t) (cl_ptr_vector_get_size(&p_pkey_tbl->blocks))); +} + +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object. +* +* RETURN VALUES +* The IB pkey table of that pkey table element +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_block_get +* NAME +* osm_pkey_tbl_block_get +* +* DESCRIPTION +* Obtain the pointer to the IB PKey table block stored in the object +* +* SYNOPSIS +*/ +static inline ib_pkey_table_t *osm_pkey_tbl_block_get(const osm_pkey_tbl_t * + p_pkey_tbl, + uint16_t block) +{ + return ((block < cl_ptr_vector_get_size(&p_pkey_tbl->blocks)) ? + (ib_pkey_table_t *)cl_ptr_vector_get( + &p_pkey_tbl->blocks, block) : NULL); +}; + +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object. +* +* block +* [in] The block number to get +* +* RETURN VALUES +* The IB pkey table of that pkey table element +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_new_block_get +* NAME +* osm_pkey_tbl_new_block_get +* +* DESCRIPTION +* The same as above but for new block +* +* SYNOPSIS +*/ +static inline ib_pkey_table_t *osm_pkey_tbl_new_block_get(const osm_pkey_tbl_t * + p_pkey_tbl, + uint16_t block) +{ + return ((block < cl_ptr_vector_get_size(&p_pkey_tbl->new_blocks)) ? + (ib_pkey_table_t *)cl_ptr_vector_get( + &p_pkey_tbl->new_blocks, block) : NULL); +}; + +/****f* OpenSM: osm_pkey_find_last_accum_pkey_index + * NAME + * osm_pkey_find_last_accum_pkey_index + * + * DESCRIPTION + * Finds the next last accumulated pkey + * + * SYNOPSIS + */ +void osm_pkey_find_last_accum_pkey_index(IN osm_pkey_tbl_t * p_pkey_tbl); + + +/****f* OpenSM: osm_pkey_tbl_set_accum_pkeys +* NAME +* osm_pkey_tbl_set_accum_pkeys +* +* DESCRIPTION +* Stores the given pkey and pkey index in the "accum_pkeys" array +* +* SYNOPSIS +*/ +cl_status_t +osm_pkey_tbl_set_accum_pkeys(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t pkey, IN uint16_t pkey_idx); +/* +* p_pkey_tbl +* [in] Pointer to the PKey table +* +* pkey +* [in] PKey to store +* +* pkey_idx +* [in] The overall index +* +* RETURN VALUES +* CL_SUCCESS if OK +* CL_INSUFFICIENT_MEMORY if failed +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_set_new_entry +* NAME +* osm_pkey_tbl_set_new_entry +* +* DESCRIPTION +* Stores the given pkey in the "new" blocks array and update +* the "map" to show that on the "old" blocks +* +* SYNOPSIS +*/ +ib_api_status_t +osm_pkey_tbl_set_new_entry(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t block_idx, + IN uint8_t pkey_idx, IN uint16_t pkey); +/* +* p_pkey_tbl +* [in] Pointer to the PKey table +* +* block_idx +* [in] The block index to use +* +* pkey_idx +* [in] The index within the block +* +* pkey +* [in] PKey to store +* +* RETURN VALUES +* IB_SUCCESS if OK +* IB_ERROR if failed +* +*********/ + +/****f* OpenSM: osm_pkey_find_next_free_entry +* NAME +* osm_pkey_find_next_free_entry +* +* DESCRIPTION +* Find the next free entry in the PKey table starting at the given +* index and block number. The user should increment pkey_idx before +* next call +* Inspect the "new" blocks array for empty space. +* +* SYNOPSIS +*/ +boolean_t +osm_pkey_find_next_free_entry(IN osm_pkey_tbl_t * p_pkey_tbl, + OUT uint16_t * p_block_idx, + OUT uint8_t * p_pkey_idx); +/* +* p_pkey_tbl +* [in] Pointer to the PKey table +* +* p_block_idx +* [out] The block index to use +* +* p_pkey_idx +* [out] The index within the block to use +* +* RETURN VALUES +* TRUE if found +* FALSE if did not find +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_init_new_blocks +* NAME +* osm_pkey_tbl_init_new_blocks +* +* DESCRIPTION +* Initializes new_blocks vector content (allocate and clear) +* +* SYNOPSIS +*/ +void osm_pkey_tbl_init_new_blocks(const osm_pkey_tbl_t * p_pkey_tbl); +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_get_block_and_idx +* NAME +* osm_pkey_tbl_get_block_and_idx +* +* DESCRIPTION +* Set the block index and pkey index the given +* pkey is found in. Return IB_NOT_FOUND if could +* not find it, IB_SUCCESS if OK +* +* SYNOPSIS +*/ +ib_api_status_t +osm_pkey_tbl_get_block_and_idx(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t * p_pkey, + OUT uint16_t * block_idx, + OUT uint8_t * pkey_index); +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object. +* +* p_pkey +* [in] Pointer to the P_Key entry searched +* +* p_block_idx +* [out] Pointer to the block index to be updated +* +* p_pkey_idx +* [out] Pointer to the pkey index (in the block) to be updated +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_set +* NAME +* osm_pkey_tbl_set +* +* DESCRIPTION +* Set the PKey table block provided in the PKey object. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_pkey_tbl_set(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t block, IN ib_pkey_table_t * p_tbl, + IN boolean_t allow_both_pkeys); +/* +* p_pkey_tbl +* [in] Pointer to osm_pkey_tbl_t object +* +* block +* [in] The block number to set +* +* p_tbl +* [in] The IB PKey block to copy to the object +* +* allow_both_pkeys +* [in] Whether both full and limited membership on same partition +* are allowed +* +* RETURN VALUES +* IB_SUCCESS or IB_ERROR +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_physp_share_this_pkey +* NAME +* osm_physp_share_this_pkey +* +* DESCRIPTION +* Checks if the given physical ports share the specified pkey. +* +* SYNOPSIS +*/ +boolean_t osm_physp_share_this_pkey(IN const struct osm_physp * p_physp1, + IN const struct osm_physp * p_physp2, + IN ib_net16_t pkey, + IN boolean_t allow_both_pkeys); +/* +* PARAMETERS +* +* p_physp1 +* [in] Pointer to an osm_physp_t object. +* +* p_physp2 +* [in] Pointer to an osm_physp_t object. +* +* pkey +* [in] value of P_Key to check. +* +* allow_both_pkeys +* [in] whether both pkeys allowed policy is being used. +* +* RETURN VALUES +* Returns TRUE if the two ports are matching. +* FALSE otherwise. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_physp_find_common_pkey +* NAME +* osm_physp_find_common_pkey +* +* DESCRIPTION +* Returns first matching P_Key values for specified physical ports. +* +* SYNOPSIS +*/ +ib_net16_t osm_physp_find_common_pkey(IN const struct osm_physp *p_physp1, + IN const struct osm_physp *p_physp2, + IN boolean_t allow_both_pkeys); +/* +* PARAMETERS +* +* p_physp1 +* [in] Pointer to an osm_physp_t object. +* +* p_physp2 +* [in] Pointer to an osm_physp_t object. +* +* allow_both_pkeys +* [in] Whether both full and limited membership on same partition +* are allowed +* +* RETURN VALUES +* Returns value of first shared P_Key or INVALID P_Key (0x0) if not +* found. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_physp_share_pkey +* NAME +* osm_physp_share_pkey +* +* DESCRIPTION +* Checks if the given physical ports share a pkey. +* The meaning P_Key matching: +* 10.9.3 : +* In the following, let M_P_Key(Message P_Key) be the P_Key in the incoming +* packet and E_P_Key(Endnode P_Key) be the P_Key it is being compared against +* in the packet's destination endnode. +* +* If: +* * neither M_P_Key nor E_P_Key are the invalid P_Key +* * and the low-order 15 bits of the M_P_Key match the low order 15 +* bits of the E_P_Key +* * and the high order bit(membership type) of both the M_P_Key and +* E_P_Key are not both 0 (i.e., both are not Limited members of +* the partition) +* +* then the P_Keys are said to match. +* +* SYNOPSIS +*/ +boolean_t osm_physp_share_pkey(IN osm_log_t * p_log, + IN const struct osm_physp * p_physp_1, + IN const struct osm_physp * p_physp_2, + IN boolean_t allow_both_pkeys); + +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_physp_1 +* [in] Pointer to an osm_physp_t object. +* +* p_physp_2 +* [in] Pointer to an osm_physp_t object. +* +* allow_both_pkeys +* [in] Whether both full and limited membership on same partition +* are allowed +* +* RETURN VALUES +* Returns TRUE if the 2 physical ports are matching. +* FALSE otherwise. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_port_share_pkey +* NAME +* osm_port_share_pkey +* +* DESCRIPTION +* Checks if the given ports (on their default physical port) share a pkey. +* The meaning P_Key matching: +* 10.9.3 : +* In the following, let M_P_Key(Message P_Key) be the P_Key in the incoming +* packet and E_P_Key(Endnode P_Key) be the P_Key it is being compared against +* in the packet's destination endnode. +* +* If: +* * neither M_P_Key nor E_P_Key are the invalid P_Key +* * and the low-order 15 bits of the M_P_Key match the low order 15 +* bits of the E_P_Key +* * and the high order bit(membership type) of both the M_P_Key and +* E_P_Key are not both 0 (i.e., both are not Limited members of +* the partition) +* +* then the P_Keys are said to match. +* +* SYNOPSIS +*/ +boolean_t osm_port_share_pkey(IN osm_log_t * p_log, + IN const struct osm_port * p_port_1, + IN const struct osm_port * p_port_2, + IN boolean_t allow_both_pkeys); + +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_port_1 +* [in] Pointer to an osm_port_t object. +* +* p_port_2 +* [in] Pointer to an osm_port_t object. +* +* RETURN VALUES +* Returns TRUE if the 2 ports are matching. +* FALSE otherwise. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_physp_has_pkey +* NAME +* osm_physp_has_pkey +* +* DESCRIPTION +* Given a physp and a pkey, check if pkey exists in physp pkey table +* +* SYNOPSIS +*/ +boolean_t osm_physp_has_pkey(IN osm_log_t * p_log, IN ib_net16_t pkey, + IN const struct osm_physp *p_physp); + +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* pkey +* [in] pkey number to look for. +* +* p_physp +* [in] Pointer to osm_physp_t object. +* +* RETURN VALUES +* Returns TRUE if the p_physp has the pkey given. False otherwise. +* +* NOTES +* +*********/ + +/****f* OpenSM: osm_pkey_tbl_set_indx0_pkey +* NAME +* osm_pkey_tbl_set_indx0_pkey +* +* DESCRIPTION +* Sets given pkey at index0 in given pkey_tbl. +* +* SYNOPSIS +*/ +void osm_pkey_tbl_set_indx0_pkey(IN osm_log_t * p_log, IN ib_net16_t pkey, + IN boolean_t full, + OUT osm_pkey_tbl_t * p_pkey_tbl); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* pkey +* [in] P_Key. +* +* full +* [in] Indication if this is a full/limited membership pkey. +* +* p_pkey_tbl +* [out] Pointer to osm_pkey_tbl_t object in which to set indx0 pkey. +* +* RETURN VALUES +* None +* +* NOTES +* +*********/ +END_C_DECLS +#endif /* _OSM_PKEY_H_ */ diff --git a/include/opensm/osm_port.h b/include/opensm/osm_port.h new file mode 100644 index 0000000..570325c --- /dev/null +++ b/include/opensm/osm_port.h @@ -0,0 +1,1735 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of port related objects. + * These objects comprise an IBA port. + * These objects are part of the OpenSM family of objects. + */ + +#ifndef _OSM_PORT_H_ +#define _OSM_PORT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + Forward references. +*/ +struct osm_port; +struct osm_node; +struct osm_mgrp; +struct osm_sm; + +/****h* OpenSM/Physical Port +* NAME +* Physical Port +* +* DESCRIPTION +* The Physical Port object encapsulates the information needed by the +* OpenSM to manage physical ports. The OpenSM allocates one Physical Port +* per physical port in the IBA subnet. +* +* In a switch, one multiple Physical Port objects share the same port GUID. +* In an end-point, Physical Ports do not share GUID values. +* +* The Physical Port is not thread safe, thus callers must provide +* serialization. +* +* These objects should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ + +/****s* OpenSM: Physical Port/osm_physp_t +* NAME +* osm_physp_t +* +* DESCRIPTION +* This object represents a physical port on a switch, router or end-point. +* +* The osm_physp_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_physp { + ib_port_info_t port_info; + ib_mlnx_ext_port_info_t ext_port_info; + ib_net64_t port_guid; + ib_net64_t (*p_guids)[]; + uint8_t port_num; + struct osm_node *p_node; + struct osm_physp *p_remote_physp; + boolean_t healthy; + uint8_t vl_high_limit; + unsigned need_update; + unsigned is_prof_ignored; + osm_dr_path_t dr_path; + osm_pkey_tbl_t pkeys; + ib_vl_arb_table_t vl_arb[4]; + cl_ptr_vector_t slvl_by_port; + uint8_t hop_wf; + union { + struct { + ib_sw_cong_setting_t sw_cong_setting; + } sw; + struct { + ib_ca_cong_setting_t ca_cong_setting; + ib_cc_tbl_t cc_tbl[OSM_CCT_ENTRY_MAD_BLOCKS]; + } ca; + } cc; +} osm_physp_t; +/* +* FIELDS +* port_info +* The IBA defined PortInfo data for this port. +* +* ext_port_info +* Mellanox vendor specific ExtendedPortInfo data for this port. +* +* port_guid +* Port GUID value of this port. For switches, +* all ports share the same GUID value. +* +* p_guids +* Pointer to array of GUIDs obtained from GUIDInfo. +* This pointer is null for switch physical/external ports +* (used for endports only). +* +* port_num +* The port number of this port. The PortInfo also +* contains a port_number, but that number is not +* the port number of this port, but rather the number +* of the port that received the SMP during discovery. +* Therefore, we must keep a separate record for this +* port's port number. +* +* p_node +* Pointer to the parent Node object of this Physical Port. +* +* p_remote_physp +* Pointer to the Physical Port on the other side of the wire. +* If this pointer is NULL no link exists at this port. +* +* healthy +* Tracks the health of the port. Normally should be TRUE but +* might change as a result of incoming traps indicating the port +* healthy is questionable. +* +* vl_high_limit +* PortInfo:VLHighLimit value which installed by QoS manager +* and should be uploaded to port's PortInfo +* +* need_update +* When set indicates that port was probably reset and port +* related tables (PKey, SL2VL, VLArb) require refreshing. +* +* is_prof_ignored +* When set indicates that switch port will be ignored by +* the link load equalization algorithm. +* +* dr_path +* The directed route path to this port. +* +* pkeys +* osm_pkey_tbl_t object holding the port PKeys. +* +* vl_arb[] +* Each Physical Port has 4 sections of VL Arbitration table. +* +* slvl_by_port +* A vector of pointers to the sl2vl tables (ordered by input port). +* Switches have an entry for every other input port (inc SMA=0). +* On CAs only one per port. +* +* hop_wf +* Hop weighting factor to be used in the routing. +* +* sw_cong_setting +* Physical port switch congestion settings (switches only) +* +* ca_cong_setting +* Physical port ca congestion settings (cas only) +* +* cc_tbl +* Physical port ca congestion control table (cas only) +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_construct +* NAME +* osm_physp_construct +* +* DESCRIPTION +* Constructs a Physical Port. +* +* SYNOPSIS +*/ +void osm_physp_construct(IN osm_physp_t * p_physp); +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object to initialize. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_init +* NAME +* osm_physp_init +* +* DESCRIPTION +* Initializes a Physical Port for use. +* +* SYNOPSIS +*/ +void osm_physp_init(IN osm_physp_t * p_physp, IN ib_net64_t port_guid, + IN uint8_t port_num, IN const struct osm_node *p_node, + IN osm_bind_handle_t h_bind, IN uint8_t hop_count, + IN const uint8_t * p_initial_path); +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object to initialize. +* +* port_guid +* [in] GUID value of this port. Switch ports all share +* the same value. +* Caller should use 0 if the guid is unknown. +* +* port_num +* [in] The port number of this port. +* +* p_node +* [in] Pointer to the parent Node object of this Physical Port. +* +* h_bind +* [in] Bind handle on which this port is accessed. +* Caller should use OSM_BIND_INVALID_HANDLE if the bind +* handle to this port is unknown. +* +* hop_count +* [in] Directed route hop count to reach this port. +* Caller should use 0 if the hop count is unknown. +* +* p_initial_path +* [in] Pointer to the directed route path to reach this node. +* Caller should use NULL if the path is unknown. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Port/void osm_physp_destroy +* NAME +* osm_physp_destroy +* +* DESCRIPTION +* This function destroys a Port object. +* +* SYNOPSIS +*/ +void osm_physp_destroy(IN osm_physp_t * p_physp); +/* +* PARAMETERS +* p_port +* [in] Pointer to a PhysPort object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified PhysPort object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_physp_construct or +* osm_physp_init. +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_is_valid +* NAME +* osm_physp_is_valid +* +* DESCRIPTION +* Returns TRUE if the Physical Port has been successfully initialized. +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t osm_physp_is_valid(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(p_physp); + return (p_physp->port_guid != 0); +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns TRUE if the Physical Port has been successfully initialized. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_is_healthy +* NAME +* osm_physp_is_healthy +* +* DESCRIPTION +* Returns TRUE if the Physical Port has been maked as healthy +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t osm_physp_is_healthy(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(p_physp); + return p_physp->healthy; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns TRUE if the Physical Port has been maked as healthy +* FALSE otherwise. +* All physical ports are initialized as "healthy" but may be marked +* otherwise if a received trap claims otherwise. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_link_is_healthy +* NAME +* osm_link_is_healthy +* +* DESCRIPTION +* Returns TRUE if the link given by the physical port is health, +* and FALSE otherwise. Link is healthy if both its physical ports are +* healthy +* +* SYNOPSIS +*/ +boolean_t osm_link_is_healthy(IN const osm_physp_t * p_physp); +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* TRUE if both physical ports on the link are healthy, and FALSE otherwise. +* All physical ports are initialized as "healthy" but may be marked +* otherwise if a received trap claiming otherwise. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_set_health +* NAME +* osm_physp_set_health +* +* DESCRIPTION +* Sets the port health flag. TRUE means the port is healthy and +* should be used for packet routing. FALSE means it should be avoided. +* +* SYNOPSIS +*/ +static inline void osm_physp_set_health(IN osm_physp_t * p_physp, + IN boolean_t is_healthy) +{ + CL_ASSERT(p_physp); + p_physp->healthy = is_healthy; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* is_healthy +* [in] The health value to be assigned to the port. +* TRUE if the Physical Port should been maked as healthy +* FALSE otherwise. +* +* RETURN VALUES +* NONE +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_link_is_throttled +* NAME +* osm_link_is_throttled +* +* DESCRIPTION +* Returns TRUE if the link speed/width given by the physical port is +* below the theoretical maximum enabled by both ports, and FALSE +* otherwise. +* +* SYNOPSIS +*/ +boolean_t osm_link_is_throttled(IN osm_physp_t * p_physp, + IN const boolean_t subn_has_fdr10_enabled); +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* subn_has_fdr10_enabled +* [in] TRUE if FDR10 is enabled for subnet, or FALSE otherwise. +* +* RETURN VALUES +* FALSE if both directions of the link have active link speed/width equal +* to the common maximum which both sides have enabled, and TRUE otherwise. +* The return value is independent of whether or not the link was +* throttled intentionally by the admin or throttled automatically by the +* link auto-negotiation. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Physical Port/osm_physp_set_port_info +* NAME +* osm_physp_set_port_info +* +* DESCRIPTION +* Copies the PortInfo attribute into the Physical Port object +* based on the PortState. +* +* SYNOPSIS +*/ +void osm_physp_set_port_info(IN osm_physp_t * p_physp, + IN const ib_port_info_t * p_pi, + IN const struct osm_sm * p_sm); + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* p_pi +* [in] Pointer to the IBA defined PortInfo at this port number. +* +* p_sm +* [in] Pointer to an osm_sm_t object. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_set_base_lid +* NAME +* osm_physp_set_base_lid +* +* DESCRIPTION +* Sets the base lid for this Physical Port. +* +* SYNOPSIS +*/ +static inline void osm_physp_set_base_lid(IN osm_physp_t * p_physp, + IN ib_net16_t base_lid) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + p_physp->port_info.base_lid = base_lid; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* base_lid +* [in] Lid to set. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Physical Port/osm_physp_set_pkey_tbl +* NAME +* osm_physp_set_pkey_tbl +* +* DESCRIPTION +* Copies the P_Key table into the Physical Port object. +* +* SYNOPSIS +*/ +void osm_physp_set_pkey_tbl(IN osm_log_t * p_log, IN const osm_subn_t * p_subn, + IN osm_physp_t * p_physp, + IN ib_pkey_table_t * p_pkey_tbl, + IN uint16_t block_num, + IN boolean_t is_set); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_subn +* [in] Pointer to the subnet data structure. +* +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* p_pkey_tbl +* [in] Pointer to the IBA defined P_Key table for this port +* number. +* +* block_num +* [in] The part of the P_Key table as defined in the IBA +* (valid values 0-2047, and is further limited by the +* partitionCap). +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_pkey_tbl +* NAME +* osm_physp_get_pkey_tbl +* +* DESCRIPTION +* Returns a pointer to the P_Key table object of the Physical Port object. +* +* SYNOPSIS +*/ +static inline const osm_pkey_tbl_t *osm_physp_get_pkey_tbl(IN const osm_physp_t + * p_physp) +{ + CL_ASSERT(osm_physp_is_valid(p_physp)); + /* + (14.2.5.7) - the block number valid values are 0-2047, and are + further limited by the size of the P_Key table specified by the + PartitionCap on the node. + */ + return &p_physp->pkeys; +}; + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* The pointer to the P_Key table object. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_set_slvl_tbl +* NAME +* osm_physp_set_slvl_tbl +* +* DESCRIPTION +* Copies the SLtoVL attribute into the Physical Port object. +* +* SYNOPSIS +*/ +static inline void osm_physp_set_slvl_tbl(IN osm_physp_t * p_physp, + IN ib_slvl_table_t * p_slvl_tbl, + IN uint8_t in_port_num) +{ + ib_slvl_table_t *p_tbl; + + CL_ASSERT(p_slvl_tbl); + CL_ASSERT(osm_physp_is_valid(p_physp)); + p_tbl = (ib_slvl_table_t *)cl_ptr_vector_get(&p_physp->slvl_by_port, in_port_num); + *p_tbl = *p_slvl_tbl; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* p_slvl_tbl +* [in] Pointer to the IBA defined SLtoVL map table for this +* port number. +* +* in_port_num +* [in] Input Port Number for this SLtoVL. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_slvl_tbl +* NAME +* osm_physp_get_slvl_tbl +* +* DESCRIPTION +* Returns a pointer to the SLtoVL attribute of the Physical Port object. +* +* SYNOPSIS +*/ +static inline ib_slvl_table_t *osm_physp_get_slvl_tbl(IN const osm_physp_t * + p_physp, + IN uint8_t in_port_num) +{ + ib_slvl_table_t *p_tbl; + + CL_ASSERT(osm_physp_is_valid(p_physp)); + p_tbl = (ib_slvl_table_t *)cl_ptr_vector_get(&p_physp->slvl_by_port, in_port_num); + return p_tbl; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* in_port_num +* [in] Input Port Number for this SLtoVL. +* +* RETURN VALUES +* The pointer to the slvl table +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_set_vla_tbl +* NAME +* osm_physp_set_vla_tbl +* +* DESCRIPTION +* Copies the VL Arbitration attribute into the Physical Port object. +* +* SYNOPSIS +*/ +static inline void osm_physp_set_vla_tbl(IN osm_physp_t * p_physp, + IN ib_vl_arb_table_t * p_vla_tbl, + IN uint8_t block_num) +{ + CL_ASSERT(p_vla_tbl); + CL_ASSERT(osm_physp_is_valid(p_physp)); + CL_ASSERT((1 <= block_num) && (block_num <= 4)); + p_physp->vl_arb[block_num - 1] = *p_vla_tbl; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* p_vla_tbl +* [in] Pointer to the IBA defined VL Arbitration table for this +* port number. +* +* block_num +* [in] The part of the VL arbitration as defined in the IBA +* (valid values 1-4) +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_vla_tbl +* NAME +* osm_physp_get_vla_tbl +* +* DESCRIPTION +* Returns a pointer to the VL Arbitration table of the Physical Port object. +* +* SYNOPSIS +*/ +static inline ib_vl_arb_table_t *osm_physp_get_vla_tbl(IN osm_physp_t * p_physp, + IN uint8_t block_num) +{ + CL_ASSERT(osm_physp_is_valid(p_physp)); + CL_ASSERT((1 <= block_num) && (block_num <= 4)); + return &(p_physp->vl_arb[block_num - 1]); +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* block_num +* [in] The part of the VL arbitration as defined in the IBA +* (valid values 1-4) +* +* RETURN VALUES +* The pointer to the VL Arbitration table +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_remote +* NAME +* osm_physp_get_remote +* +* DESCRIPTION +* Returns a pointer to the Physical Port on the other side the wire. +* +* SYNOPSIS +*/ +static inline osm_physp_t *osm_physp_get_remote(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(osm_physp_is_valid(p_physp)); + return p_physp->p_remote_physp; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns a pointer to the Physical Port on the other side of +* the wire. A return value of NULL means there is no link at this port. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_port_guid +* NAME +* osm_physp_get_port_guid +* +* DESCRIPTION +* Returns the port guid of this physical port. +* +* SYNOPSIS +*/ +static inline ib_net64_t osm_physp_get_port_guid(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(osm_physp_is_valid(p_physp)); + return p_physp->port_guid; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns the port guid of this physical port. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_subnet_prefix +* NAME +* osm_physp_get_subnet_prefix +* +* DESCRIPTION +* Returns the subnet prefix for this physical port. +* +* SYNOPSIS +*/ +static inline ib_net64_t +osm_physp_get_subnet_prefix(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(osm_physp_is_valid(p_physp)); + return p_physp->port_info.subnet_prefix; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns the subnet prefix for this physical port. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_link_exists +* NAME +* osm_physp_link_exists +* +* DESCRIPTION +* Returns TRUE if the Physical Port has a link to the specified port. +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t osm_physp_link_exists(IN const osm_physp_t * p_physp, + IN const osm_physp_t * p_remote_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + CL_ASSERT(p_remote_physp); + CL_ASSERT(osm_physp_is_valid(p_remote_physp)); + return ((p_physp->p_remote_physp == p_remote_physp) && + (p_remote_physp->p_remote_physp == p_physp)); +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* p_remote_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns TRUE if the Physical Port has a link to another port. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_link +* NAME +* osm_physp_link +* +* DESCRIPTION +* Sets the pointers to the Physical Ports on the other side the wire. +* +* SYNOPSIS +*/ +static inline void osm_physp_link(IN osm_physp_t * p_physp, + IN osm_physp_t * p_remote_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(p_remote_physp); + p_physp->p_remote_physp = p_remote_physp; + p_remote_physp->p_remote_physp = p_physp; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object to link. +* +* p_remote_physp +* [in] Pointer to the adjacent osm_physp_t object to link. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_unlink +* NAME +* osm_physp_unlink +* +* DESCRIPTION +* Clears the pointers to the Physical Port on the other side the wire. +* +* SYNOPSIS +*/ +static inline void osm_physp_unlink(IN osm_physp_t * p_physp, + IN osm_physp_t * p_remote_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(p_remote_physp); + CL_ASSERT(osm_physp_link_exists(p_physp, p_remote_physp)); + p_physp->p_remote_physp = NULL; + p_remote_physp->p_remote_physp = NULL; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object to link. +* +* p_remote_physp +* [in] Pointer to the adjacent osm_physp_t object to link. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_has_any_link +* NAME +* osm_physp_has_any_link +* +* DESCRIPTION +* Returns TRUE if the Physical Port has a link to another port. +* FALSE otherwise. +* +* SYNOPSIS +*/ +static inline boolean_t osm_physp_has_any_link(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(p_physp); + if (osm_physp_is_valid(p_physp)) + return (p_physp->p_remote_physp != NULL); + else + return FALSE; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns TRUE if the Physical Port has a link to another port. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* Port, Physical Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_port_num +* NAME +* osm_physp_get_port_num +* +* DESCRIPTION +* Returns the local port number of this Physical Port. +* +* SYNOPSIS +*/ +static inline uint8_t osm_physp_get_port_num(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + return p_physp->port_num; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns the local port number of this Physical Port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_node_ptr +* NAME +* osm_physp_get_node_ptr +* +* DESCRIPTION +* Returns a pointer to the parent Node object for this port. +* +* SYNOPSIS +*/ +static inline struct osm_node *osm_physp_get_node_ptr(IN const osm_physp_t * + p_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + return p_physp->p_node; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns a pointer to the parent Node object for this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_port_state +* NAME +* osm_physp_get_port_state +* +* DESCRIPTION +* Returns the port state of this Physical Port. +* +* SYNOPSIS +*/ +static inline uint8_t osm_physp_get_port_state(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + return ib_port_info_get_port_state(&p_physp->port_info); +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns the local port number of this Physical Port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_base_lid +* NAME +* osm_physp_get_base_lid +* +* DESCRIPTION +* Returns the base lid of this Physical Port. +* +* SYNOPSIS +*/ +static inline ib_net16_t osm_physp_get_base_lid(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + return p_physp->port_info.base_lid; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns the base lid of this Physical Port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_lmc +* NAME +* osm_physp_get_lmc +* +* DESCRIPTION +* Returns the LMC value of this Physical Port. +* +* SYNOPSIS +*/ +static inline uint8_t osm_physp_get_lmc(IN const osm_physp_t * p_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + return ib_port_info_get_lmc(&p_physp->port_info); +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* RETURN VALUES +* Returns the LMC value of this Physical Port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Physical Port/osm_physp_get_dr_path_ptr +* NAME +* osm_physp_get_dr_path_ptr +* +* DESCRIPTION +* Returns a pointer to the directed route path for this port. +* +* SYNOPSIS +*/ +static inline osm_dr_path_t *osm_physp_get_dr_path_ptr(IN const osm_physp_t * + p_physp) +{ + CL_ASSERT(p_physp); + CL_ASSERT(osm_physp_is_valid(p_physp)); + return (osm_dr_path_t *) & p_physp->dr_path; +} + +/* +* PARAMETERS +* p_physp +* [in] Pointer to a Physical Port object. +* +* RETURN VALUES +* Returns a pointer to the directed route path for this port. +* +* NOTES +* +* SEE ALSO +* Physical Port object +*********/ + +/****h* OpenSM/Port +* NAME +* Port +* +* DESCRIPTION +* The Port object encapsulates the information needed by the +* OpenSM to manage ports. The OpenSM allocates one Port object +* per port in the IBA subnet. +* +* Each Port object is associated with a single port GUID. A Port object +* contains 1 or more Physical Port objects. An end point node has +* one Physical Port per Port. A switch node has more than +* one Physical Port per Port. +* +* The Port object is not thread safe, thus callers must provide +* serialization. +* +* These objects should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ + +/****s* OpenSM: Port/osm_port_t +* NAME +* osm_port_t +* +* DESCRIPTION +* This object represents a logical port on a switch, router, or CA. +* +* The osm_port_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_port { + cl_map_item_t map_item; + cl_list_item_t list_item; + struct osm_node *p_node; + ib_net64_t guid; + uint32_t discovery_count; + unsigned is_new; + osm_physp_t *p_physp; + cl_qlist_t mcm_list; + int flag; + int use_scatter; + unsigned int cc_timeout_count; + int cc_unavailable_flag; + void *priv; + ib_net16_t lid; +} osm_port_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* list_item +* Linkage structure for cl_qlist. Used by ucast mgr during +* LFT calculation. +* +* p_node +* Points to the Node object that owns this port. +* +* guid +* Manufacturer assigned GUID for this port. +* +* discovery_count +* The number of times this port has been discovered +* during the current fabric sweep. This number is reset +* to zero at the start of a sweep. +* +* p_physp +* The pointer to physical port used when physical +* characteristics contained in the Physical Port are needed. +* +* mcm_list +* Multicast member list +* +* flag +* Utility flag for port management +* +* cc_timeout_count +* Count number of times congestion control config times out. +* +* cc_unavailable_flag +* Flag indicating if congestion control is not supported. +* +* SEE ALSO +* Port, Physical Port, Physical Port Table +*********/ + +/****f* OpenSM: Port/osm_port_delete +* NAME +* osm_port_delete +* +* DESCRIPTION +* This function destroys and deallocates a Port object. +* +* SYNOPSIS +*/ +void osm_port_delete(IN OUT osm_port_t ** pp_port); +/* +* PARAMETERS +* pp_port +* [in][out] Pointer to a pointer to a Port object to delete. +* On return, this pointer is NULL. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified Port object. +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_port_new +* NAME +* osm_port_new +* +* DESCRIPTION +* This function allocates and initializes a Port object. +* +* SYNOPSIS +*/ +osm_port_t *osm_port_new(IN const ib_node_info_t * p_ni, + IN struct osm_node *p_parent_node); +/* +* PARAMETERS +* p_ni +* [in] Pointer to the NodeInfo attribute relevant for this port. +* +* p_parent_node +* [in] Pointer to the initialized parent osm_node_t object +* that owns this port. +* +* RETURN VALUE +* Pointer to the initialize Port object. +* +* NOTES +* Allows calling other port methods. +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_port_get_base_lid +* NAME +* osm_port_get_base_lid +* +* DESCRIPTION +* Gets the base LID of a port. +* +* SYNOPSIS +*/ +static inline ib_net16_t osm_port_get_base_lid(IN const osm_port_t * p_port) +{ + CL_ASSERT(p_port->p_physp && osm_physp_is_valid(p_port->p_physp)); + return osm_physp_get_base_lid(p_port->p_physp); +} + +/* +* PARAMETERS +* p_port +* [in] Pointer to a Port object. +* +* RETURN VALUE +* Base LID of the port. +* If the return value is 0, then this port has no assigned LID. +* +* NOTES +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_port_get_lmc +* NAME +* osm_port_get_lmc +* +* DESCRIPTION +* Gets the LMC value of a port. +* +* SYNOPSIS +*/ +static inline uint8_t osm_port_get_lmc(IN const osm_port_t * p_port) +{ + CL_ASSERT(p_port->p_physp && osm_physp_is_valid(p_port->p_physp)); + return osm_physp_get_lmc(p_port->p_physp); +} + +/* +* PARAMETERS +* p_port +* [in] Pointer to a Port object. +* +* RETURN VALUE +* Gets the LMC value of a port. +* +* NOTES +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_port_get_guid +* NAME +* osm_port_get_guid +* +* DESCRIPTION +* Gets the GUID of a port. +* +* SYNOPSIS +*/ +static inline ib_net64_t osm_port_get_guid(IN const osm_port_t * p_port) +{ + return p_port->guid; +} + +/* +* PARAMETERS +* p_port +* [in] Pointer to a Port object. +* +* RETURN VALUE +* Manufacturer assigned GUID of the port. +* +* NOTES +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_port_get_lid_range_ho +* NAME +* osm_port_get_lid_range_ho +* +* DESCRIPTION +* Returns the HOST ORDER lid min and max values for this port, +* based on the lmc value. +* +* SYNOPSIS +*/ +void osm_port_get_lid_range_ho(IN const osm_port_t * p_port, + OUT uint16_t * p_min_lid, + OUT uint16_t * p_max_lid); +/* +* PARAMETERS +* p_port +* [in] Pointer to a Port object. +* +* p_min_lid +* [out] Pointer to the minimum LID value occupied by this port. +* +* p_max_lid +* [out] Pointer to the maximum LID value occupied by this port. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Physical Port/osm_physp_calc_link_mtu +* NAME +* osm_physp_calc_link_mtu +* +* DESCRIPTION +* Calculate the Port MTU based on current and remote +* physical ports MTU CAP values. +* +* SYNOPSIS +*/ +uint8_t osm_physp_calc_link_mtu(IN osm_log_t * p_log, + IN const osm_physp_t * p_physp, + IN uint8_t current_mtu); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* current_mtu +* [in] Current neighbor mtu on this port +* +* RETURN VALUES +* The MTU of the link to be used. +* +* NOTES +* +* SEE ALSO +* PhysPort object +*********/ + +/****f* OpenSM: Physical Port/osm_physp_calc_link_op_vls +* NAME +* osm_physp_calc_link_op_vls +* +* DESCRIPTION +* Calculate the Port OP_VLS based on current and remote +* physical ports VL CAP values. Allowing user option for a max limit. +* +* SYNOPSIS +*/ +uint8_t osm_physp_calc_link_op_vls(IN osm_log_t * p_log, + IN const osm_subn_t * p_subn, + IN const osm_physp_t * p_physp, + IN uint8_t current_op_vls); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_subn +* [in] Pointer to the subnet object for accessing of the options. +* +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* current_op_vls +* [in] Current operational VL on the port +* +* RETURN VALUES +* The OP_VLS of the link to be used. +* +* NOTES +* +* SEE ALSO +* PhysPort object +*********/ + +/****f* OpenSM: Physical Port/osm_physp_replace_dr_path_with_alternate_dr_path +* NAME +* osm_physp_replace_dr_path_with_alternate_dr_path +* +* DESCRIPTION +* Replace the direct route path for the given phys port with an +* alternate path going through forien set of phys port. +* +* SYNOPSIS +*/ +void +osm_physp_replace_dr_path_with_alternate_dr_path(IN osm_log_t * p_log, + IN osm_subn_t const *p_subn, + IN osm_physp_t const *p_physp, + IN osm_bind_handle_t * h_bind); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_subn +* [in] Pointer to the subnet object for accessing of the options. +* +* p_physp +* [in] Pointer to an osm_physp_t object. +* +* h_bind +* [in] Pointer to osm_bind_handle_t object. +* +* RETURN VALUES +* NONE +* +* NOTES +* +* SEE ALSO +* PhysPort object +*********/ + +/****s* OpenSM: Port/osm_alias_guid_t +* NAME +* osm_alias_guid_t +* +* DESCRIPTION +* This object represents an alias guid for an endport. +* +* The osm_alias_guid_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_alias_guid { + cl_map_item_t map_item; + ib_net64_t alias_guid; + osm_port_t *p_base_port; +} osm_alias_guid_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* alias_guid +* Alias GUID for port obtained from SM GUIDInfo attribute +* +* p_base_port +* Pointer to osm_port_t for base port GUID +* +* SEE ALSO +* Port, Physical Port, Physical Port Table +*/ + +/****f* OpenSM: Port/osm_alias_guid_new +* NAME +* osm_alias_guid_new +* +* DESCRIPTION +* This function allocates and initializes an alias guid object. +* +* SYNOPSIS +*/ +osm_alias_guid_t *osm_alias_guid_new(IN const ib_net64_t alias_guid, + IN osm_port_t *p_base_port); +/* +* PARAMETERS +* alias_guid +* [in] Alias GUID in network order +* +* p_base_port +* [in] Pointer to the port for this base GUID +* +* RETURN VALUE +* Pointer to the initialized alias guid object. +* +* NOTES +* Allows calling other alias guid methods. +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_alias_guid_delete +* NAME +* osm_alias_guid_delete +* +* DESCRIPTION +* This function destroys and deallocates an alias guid object. +* +* SYNOPSIS +*/ +void osm_alias_guid_delete(IN OUT osm_alias_guid_t ** pp_alias_guid); +/* +* PARAMETERS +* pp_alias_guid +* [in][out] Pointer to a pointer to an alias guid object to delete. +* On return, this pointer is NULL. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified alias guid object. +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_alias_guid_get_alias_guid +* NAME +* osm_alias_guid_get_alias_guid +* +* DESCRIPTION +* This function retrieves alias guid from alias guid object. +* +* SYNOPSIS +*/ +static inline ib_net64_t osm_alias_guid_get_alias_guid(IN osm_alias_guid_t *p_alias_guid) +{ + CL_ASSERT(p_alias_guid); + return p_alias_guid->alias_guid; +} +/* +* PARAMETERS +* p_alias_guid +* [in] Pointer to a pointer to an alias guid object. +* +* RETURN VALUE +* This function returns the alias guid or NULL if fails. +* +* NOTES +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_alias_guid_get_base_guid +* NAME +* osm_alias_guid_get_base_guid +* +* DESCRIPTION +* This function retrieves base guid from alias guid object. +* +* SYNOPSIS +*/ +static inline ib_net64_t osm_alias_guid_get_base_guid(IN osm_alias_guid_t *p_alias_guid) +{ + CL_ASSERT(p_alias_guid); + return osm_port_get_guid(p_alias_guid->p_base_port); +} +/* +* PARAMETERS +* p_alias_guid +* [in] Pointer to a pointer to an alias guid object. +* +* RETURN VALUE +* This function returns the base guid or NULL if fails. +* +* NOTES +* +* SEE ALSO +* Port +*********/ + +/****f* OpenSM: Port/osm_port_clear_base_lid +* NAME +* osm_port_clear_base_lid +* +* DESCRIPTION +* Clear the base lid of a given port. +* +* SYNOPSIS +*/ +static inline void osm_port_clear_base_lid(OUT osm_port_t * p_port) +{ + CL_ASSERT(p_port); + p_port->lid = 0; + osm_physp_set_base_lid(p_port->p_physp, 0); +} + +/* +* PARAMETERS +* p_port +* [out] Pointer to a Port object. +* +* RETURN VALUE +* None +* +* NOTES +* +* SEE ALSO +* Port +*********/ + +END_C_DECLS +#endif /* _OSM_PORT_H_ */ diff --git a/include/opensm/osm_port_profile.h b/include/opensm/osm_port_profile.h new file mode 100644 index 0000000..dc5b426 --- /dev/null +++ b/include/opensm/osm_port_profile.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of Switch/osm_port_profile_t. + * This object represents a port profile for an IBA switch. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_PORT_PROFILE_H_ +#define _OSM_PORT_PROFILE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Port Profile +* NAME +* Port Profile +* +* DESCRIPTION +* The Port Profile object contains profiling information for +* each Physical Port on a switch. The profile information +* may be used to optimize path selection. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Switch/osm_port_profile_t +* NAME +* osm_port_profile_t +* +* DESCRIPTION +* The Port Profile object contains profiling information for +* each Physical Port on the switch. The profile information +* may be used to optimize path selection. +* +* This object should be treated as opaque and should be +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_port_profile { + uint32_t num_paths; +} osm_port_profile_t; +/* +* FIELDS +* num_paths +* The number of paths using this port. +* +* SEE ALSO +*********/ + +/****s* OpenSM: Switch/osm_port_mask_t +* NAME +* osm_port_mask_t +* +* DESCRIPTION +* The Port Mask object contains a port numbered bit mask +* for whether the port should be ignored by the link load +* equalization algorithm. +* +* SYNOPSIS +*/ +typedef long osm_port_mask_t[32 / sizeof(long)]; +/* +* FIELDS +* osm_port_mask_t +* Bit mask by port number +* +* SEE ALSO +*********/ + +/****f* OpenSM: Port Profile/osm_port_prof_construct +* NAME +* osm_port_prof_construct +* +* DESCRIPTION +* +* +* SYNOPSIS +*/ +static inline void osm_port_prof_construct(IN osm_port_profile_t * p_prof) +{ + CL_ASSERT(p_prof); + memset(p_prof, 0, sizeof(*p_prof)); +} +/* +* PARAMETERS +* p_prof +* [in] Pointer to the Port Profile object to construct. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Port Profile/osm_port_prof_path_count_inc +* NAME +* osm_port_prof_path_count_inc +* +* DESCRIPTION +* Increments the count of the number of paths going through this port. +* +* +* SYNOPSIS +*/ +static inline void osm_port_prof_path_count_inc(IN osm_port_profile_t * p_prof) +{ + CL_ASSERT(p_prof); + p_prof->num_paths++; +} +/* +* PARAMETERS +* p_prof +* [in] Pointer to the Port Profile object. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Port Profile/osm_port_prof_path_count_get +* NAME +* osm_port_prof_path_count_get +* +* DESCRIPTION +* Returns the count of the number of paths going through this port. +* +* SYNOPSIS +*/ +static inline uint32_t +osm_port_prof_path_count_get(IN const osm_port_profile_t * p_prof) +{ + return p_prof->num_paths; +} +/* +* PARAMETERS +* p_prof +* [in] Pointer to the Port Profile object. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_PORT_PROFILE_H_ */ diff --git a/include/opensm/osm_prefix_route.h b/include/opensm/osm_prefix_route.h new file mode 100644 index 0000000..829a9ff --- /dev/null +++ b/include/opensm/osm_prefix_route.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_PREFIX_ROUTE_H_ +#define _OSM_PREFIX_ROUTE_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef struct { + cl_list_item_t list_item; /* must be first */ + ib_net64_t prefix; /* zero means "any" */ + ib_net64_t guid; /* zero means "any" */ +} osm_prefix_route_t; + +#ifdef ROUTER_EXP +#error ROUTER_EXP is deprecated, specify prefix routes at runtime instead (see opensm man page for details) +#endif + +END_C_DECLS +#endif /* _OSM_PREFIX_ROUTE_H_ */ diff --git a/include/opensm/osm_qos_policy.h b/include/opensm/osm_qos_policy.h new file mode 100644 index 0000000..53d56cd --- /dev/null +++ b/include/opensm/osm_qos_policy.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of OSM QoS Policy data types and functions. + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#ifndef OSM_QOS_POLICY_H +#define OSM_QOS_POLICY_H + +#include +#include +#include +#include +#include + +#define YYSTYPE char * +#define OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH 128 +#define OSM_QOS_POLICY_DEFAULT_LEVEL_NAME "default" + +#define OSM_QOS_POLICY_ULP_SDP_SERVICE_ID 0x0000000000010000ULL +#define OSM_QOS_POLICY_ULP_RDS_SERVICE_ID 0x0000000001060000ULL +#define OSM_QOS_POLICY_ULP_RDS_PORT 0x48CA +#define OSM_QOS_POLICY_ULP_ISER_SERVICE_ID 0x0000000001060000ULL +#define OSM_QOS_POLICY_ULP_ISER_PORT 0x0CBC + +#define OSM_QOS_POLICY_NODE_TYPE_CA (((uint8_t)1)< +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Remote SM +* NAME +* Remote SM +* +* DESCRIPTION +* The Remote SM object encapsulates the information tracked for +* other SM ports on the subnet. +* +* The Remote SM object is thread safe. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Remote SM/osm_remote_sm_t +* NAME +* osm_remote_sm_t +* +* DESCRIPTION +* Remote Subnet Manager structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_remote_sm { + cl_map_item_t map_item; + ib_sm_info_t smi; +} osm_remote_sm_t; +/* +* FIELDS +* map_item +* Linkage for the cl_qmap container. MUST BE FIRST ELEMENT!! +* +* smi +* The SMInfo attribute for this SM. +* +* SEE ALSO +*********/ + +/****f* OpenSM: SM/osm_remote_sm_construct +* NAME +* osm_remote_sm_construct +* +* DESCRIPTION +* This function constructs an Remote SM object. +* +* SYNOPSIS +*/ +void osm_remote_sm_construct(IN osm_remote_sm_t * p_sm); +/* +* PARAMETERS +* p_sm +* [in] Pointer to an Remote SM object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_remote_sm_init, osm_remote_sm_destroy +* +* Calling osm_remote_sm_construct is a prerequisite to calling any other +* method except osm_remote_sm_init. +* +* SEE ALSO +* SM object, osm_remote_sm_init, osm_remote_sm_destroy +*********/ + +/****f* OpenSM: SM/osm_remote_sm_destroy +* NAME +* osm_remote_sm_destroy +* +* DESCRIPTION +* The osm_remote_sm_destroy function destroys an SM, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_remote_sm_destroy(IN osm_remote_sm_t * p_sm); +/* +* PARAMETERS +* p_sm +* [in] Pointer to an Remote SM object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified Remote SM object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to +* osm_remote_sm_construct or osm_remote_sm_init. +* +* SEE ALSO +* Remote SM object, osm_remote_sm_construct, osm_remote_sm_init +*********/ + +/****f* OpenSM: SM/osm_remote_sm_init +* NAME +* osm_remote_sm_init +* +* DESCRIPTION +* The osm_remote_sm_init function initializes an Remote SM object for use. +* +* SYNOPSIS +*/ +void osm_remote_sm_init(IN osm_remote_sm_t * p_sm, IN const ib_sm_info_t * p_smi); +/* +* PARAMETERS +* p_sm +* [in] Pointer to an osm_remote_sm_t object to initialize. +* +* p_smi +* [in] Pointer to the SMInfo attribute for this SM. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* Allows calling other Remote SM methods. +* +* SEE ALSO +* Remote SM object, osm_remote_sm_construct, osm_remote_sm_destroy +*********/ + +END_C_DECLS +#endif /* _OSM_REMOTE_SM_H_ */ diff --git a/include/opensm/osm_router.h b/include/opensm/osm_router.h new file mode 100644 index 0000000..11e34b6 --- /dev/null +++ b/include/opensm/osm_router.h @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_router_t. + * This object represents an IBA router. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_ROUTER_H_ +#define _OSM_ROUTER_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Router +* NAME +* Router +* +* DESCRIPTION +* The Router object encapsulates the information needed by the +* OpenSM to manage routers. The OpenSM allocates one router object +* per router in the IBA subnet. +* +* The Router object is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Hal Rosenstock, Voltaire +* +*********/ +/****s* OpenSM: Router/osm_router_t +* NAME +* osm_router_t +* +* DESCRIPTION +* Router structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_router { + cl_map_item_t map_item; + osm_port_t *p_port; +} osm_router_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* p_port +* Pointer to the Port object for this router. +* +* SEE ALSO +* Router object +*********/ + +/****f* OpenSM: Router/osm_router_delete +* NAME +* osm_router_delete +* +* DESCRIPTION +* Destroys and deallocates the object. +* +* SYNOPSIS +*/ +void osm_router_delete(IN OUT osm_router_t ** pp_rtr); +/* +* PARAMETERS +* p_rtr +* [in][out] Pointer to a pointer to the object to destroy. +* The pointer will be set to NULL on return. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +* Router object, osm_router_new +*********/ + +/****f* OpenSM: Router/osm_router_new +* NAME +* osm_router_new +* +* DESCRIPTION +* The osm_router_new function initializes a Router object for use. +* +* SYNOPSIS +*/ +osm_router_t *osm_router_new(IN osm_port_t * p_port); +/* +* PARAMETERS +* p_node +* [in] Pointer to the node object of this router +* +* RETURN VALUES +* Pointer to the new initialized router object. +* +* NOTES +* +* SEE ALSO +* Router object, osm_router_new +*********/ + +/****f* OpenSM: Router/osm_router_get_port_ptr +* NAME +* osm_router_get_port_ptr +* +* DESCRIPTION +* Returns a pointer to the Port object for this router. +* +* SYNOPSIS +*/ +static inline osm_port_t *osm_router_get_port_ptr(IN const osm_router_t * p_rtr) +{ + return p_rtr->p_port; +} + +/* +* PARAMETERS +* p_rtr +* [in] Pointer to an osm_router_t object. +* +* RETURN VALUES +* Returns a pointer to the Port object for this router. +* +* NOTES +* +* SEE ALSO +* Router object +*********/ + +/****f* OpenSM: Router/osm_router_get_node_ptr +* NAME +* osm_router_get_node_ptr +* +* DESCRIPTION +* Returns a pointer to the Node object for this router. +* +* SYNOPSIS +*/ +static inline osm_node_t *osm_router_get_node_ptr(IN const osm_router_t * p_rtr) +{ + return p_rtr->p_port->p_node; +} + +/* +* PARAMETERS +* p_rtr +* [in] Pointer to an osm_router_t object. +* +* RETURN VALUES +* Returns a pointer to the Node object for this router. +* +* NOTES +* +* SEE ALSO +* Router object +*********/ + +END_C_DECLS +#endif /* _OSM_ROUTER_H_ */ diff --git a/include/opensm/osm_sa.h b/include/opensm/osm_sa.h new file mode 100644 index 0000000..aeeaa02 --- /dev/null +++ b/include/opensm/osm_sa.h @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_sa_t. + * This object represents an IBA subnet. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_SA_H_ +#define _OSM_SA_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/SA +* NAME +* SA +* +* DESCRIPTION +* The SA object encapsulates the information needed by the +* OpenSM to instantiate subnet administration. The OpenSM +* allocates one SA object per subnet manager. +* +* The SA object is thread safe. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* AUTHOR +* Ranjit Pandit, Intel +* Anil Keshavamurthy, Intel +* +*********/ + +/****d* OpenSM: SA/osm_sa_state_t +* NAME +* osm_sa_state_t +* +* DESCRIPTION +* Enumerates the possible states of SA object. +* +* SYNOPSIS +*/ +typedef enum _osm_sa_state { + OSM_SA_STATE_INIT = 0, + OSM_SA_STATE_READY +} osm_sa_state_t; +/***********/ + +/****d* OpenSM: SA/osm_mpr_rec_t +* NAME +* osm_mpr_rec_t +* +* DESCRIPTION +* SA MultiPathRecord response. +* +* SYNOPSIS +*/ +typedef struct osm_mpr_rec { + ib_path_rec_t path_rec; + const osm_port_t *p_src_port; + const osm_port_t *p_dest_port; + int hops; +} osm_mpr_rec_t; +/***********/ + +/****d* OpenSM: SA/osm_sa_item_t +* NAME +* osm_sa_item_t +* +* DESCRIPTION +* SA response item. +* +* SYNOPSIS +*/ +typedef struct osm_sa_item { + cl_list_item_t list_item; + union { + char data[0]; + ib_guidinfo_record_t guid_rec; + ib_inform_info_t inform; + ib_inform_info_record_t inform_rec; + ib_lft_record_t lft_rec; + ib_link_record_t link_rec; + ib_member_rec_t mc_rec; + ib_mft_record_t mft_rec; + osm_mpr_rec_t mpr_rec; + ib_node_record_t node_rec; + ib_path_rec_t path_rec; + ib_pkey_table_record_t pkey_rec; + ib_portinfo_record_t port_rec; + ib_service_record_t service_rec; + ib_slvl_table_record_t slvl_rec; + ib_sminfo_record_t sminfo_rec; + ib_switch_info_record_t swinfo_rec; + ib_vl_arb_table_record_t vlarb_rec; + } resp; +} osm_sa_item_t; +/* +* NOTES +* Actual structure allocated is based on SA attribute +* type. As such, it is variable sized. The allocation +* occurs in the SA attribute handling code. +* Note also that the size is specified external +* to this structure (It's passed as a parameter to +* osm_sa_respond). The SA_ITEM_RESP_SIZE macro +* facilitates determining the size required. +* +***********/ + +#define SA_ITEM_RESP_SIZE(_m) offsetof(osm_sa_item_t, resp._m) + \ + sizeof(((osm_sa_item_t *)NULL)->resp._m) + +/****s* OpenSM: SM/osm_sa_t +* NAME +* osm_sa_t +* +* DESCRIPTION +* Subnet Administration structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_sa { + osm_sa_state_t state; + osm_sm_t *sm; + osm_subn_t *p_subn; + osm_vendor_t *p_vendor; + osm_log_t *p_log; + osm_mad_pool_t *p_mad_pool; + cl_dispatcher_t *p_disp; + cl_dispatcher_t *p_set_disp; + cl_plock_t *p_lock; + atomic32_t sa_trans_id; + osm_sa_mad_ctrl_t mad_ctrl; + cl_timer_t sr_timer; + boolean_t dirty; + cl_disp_reg_handle_t cpi_disp_h; + cl_disp_reg_handle_t nr_disp_h; + cl_disp_reg_handle_t pir_disp_h; + cl_disp_reg_handle_t gir_disp_h; + cl_disp_reg_handle_t lr_disp_h; + cl_disp_reg_handle_t pr_disp_h; + cl_disp_reg_handle_t smir_disp_h; + cl_disp_reg_handle_t mcmr_disp_h; + cl_disp_reg_handle_t sr_disp_h; +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + cl_disp_reg_handle_t mpr_disp_h; +#endif + cl_disp_reg_handle_t infr_disp_h; + cl_disp_reg_handle_t infir_disp_h; + cl_disp_reg_handle_t vlarb_disp_h; + cl_disp_reg_handle_t slvl_disp_h; + cl_disp_reg_handle_t pkey_disp_h; + cl_disp_reg_handle_t lft_disp_h; + cl_disp_reg_handle_t sir_disp_h; + cl_disp_reg_handle_t mft_disp_h; + cl_disp_reg_handle_t infr_set_disp_h; + cl_disp_reg_handle_t gir_set_disp_h; + cl_disp_reg_handle_t mcmr_set_disp_h; + cl_disp_reg_handle_t sr_set_disp_h; +} osm_sa_t; +/* +* FIELDS +* state +* State of this SA object +* +* sm +* Pointer to the Subnet Manager object. +* +* p_subn +* Pointer to the Subnet object for this subnet. +* +* p_vendor +* Pointer to the vendor specific interfaces object. +* +* p_log +* Pointer to the log object. +* +* p_mad_pool +* Pointer to the MAD pool. +* +* p_disp +* Pointer to dispatcher +* +* p_set_disp +* Pointer to dispatcher for Set requests. +* +* p_lock +* Pointer to Lock for serialization +* +* sa_trans_id +* Transaction ID +* +* mad_ctrl +* Mad Controller +* +* dirty +* A flag that denotes that SA DB is dirty and needs +* to be written to the dump file (if dumping is enabled) +* +* SEE ALSO +* SM object +*********/ + +/****f* OpenSM: SA/osm_sa_construct +* NAME +* osm_sa_construct +* +* DESCRIPTION +* This function constructs an SA object. +* +* SYNOPSIS +*/ +void osm_sa_construct(IN osm_sa_t * p_sa); +/* +* PARAMETERS +* p_sa +* [in] Pointer to a SA object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_sa_destroy. +* +* Calling osm_sa_construct is a prerequisite to calling any other +* method except osm_sa_init. +* +* SEE ALSO +* SA object, osm_sa_init, osm_sa_destroy +*********/ + +/****f* OpenSM: SA/osm_sa_shutdown +* NAME +* osm_sa_shutdown +* +* DESCRIPTION +* The osm_sa_shutdown function shutdowns an SA, unregistering from all +* dispatcher messages and unbinding the QP1 mad service +* +* SYNOPSIS +*/ +void osm_sa_shutdown(IN osm_sa_t * p_sa); +/* +* PARAMETERS +* p_sa +* [in] Pointer to a SA object to shutdown. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* SA object, osm_sa_construct, osm_sa_init +*********/ + +/****f* OpenSM: SA/osm_sa_destroy +* NAME +* osm_sa_destroy +* +* DESCRIPTION +* The osm_sa_destroy function destroys an SA, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_sa_destroy(IN osm_sa_t * p_sa); +/* +* PARAMETERS +* p_sa +* [in] Pointer to a SA object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified SA object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_sa_construct or +* osm_sa_init. +* +* SEE ALSO +* SA object, osm_sa_construct, osm_sa_init +*********/ + +/****f* OpenSM: SA/osm_sa_init +* NAME +* osm_sa_init +* +* DESCRIPTION +* The osm_sa_init function initializes a SA object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sa_init(IN osm_sm_t * p_sm, IN osm_sa_t * p_sa, + IN osm_subn_t * p_subn, IN osm_vendor_t * p_vendor, + IN osm_mad_pool_t * p_mad_pool, + IN osm_log_t * p_log, IN osm_stats_t * p_stats, + IN cl_dispatcher_t * p_disp, + IN cl_dispatcher_t * p_set_disp, + IN cl_plock_t * p_lock); +/* +* PARAMETERS +* p_sa +* [in] Pointer to an osm_sa_t object to initialize. +* +* p_subn +* [in] Pointer to the Subnet object for this subnet. +* +* p_vendor +* [in] Pointer to the vendor specific interfaces object. +* +* p_mad_pool +* [in] Pointer to the MAD pool. +* +* p_log +* [in] Pointer to the log object. +* +* p_stats +* [in] Pointer to the statistics object. +* +* p_disp +* [in] Pointer to the OpenSM central Dispatcher. +* +* p_set_disp +* [in] Pointer to the OpenSM Dispatcher for Set requests. +* +* p_lock +* [in] Pointer to the OpenSM serializing lock. +* +* RETURN VALUES +* CL_SUCCESS if the SA object was initialized successfully. +* +* NOTES +* Allows calling other SA methods. +* +* SEE ALSO +* SA object, osm_sa_construct, osm_sa_destroy +*********/ + +/****f* OpenSM: SA/osm_sa_bind +* NAME +* osm_sa_bind +* +* DESCRIPTION +* Binds the SA object to a port guid. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sa_bind(IN osm_sa_t * p_sa, IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_sa +* [in] Pointer to an osm_sa_t object to bind. +* +* port_guid +* [in] Local port GUID with which to bind. +* +* +* RETURN VALUES +* None +* +* NOTES +* A given SA object can only be bound to one port at a time. +* +* SEE ALSO +*********/ + +/****f* OpenSM: SA/osm_sa_send +* NAME +* osm_sa_send +* +* DESCRIPTION +* Sends SA MAD via osm_vendor_send and maintains the QP1 sent statistic +* +* SYNOPSIS +*/ +ib_api_status_t osm_sa_send(osm_sa_t *sa, IN osm_madw_t * p_madw, + IN boolean_t resp_expected); + +/****f* IBA Base: Types/osm_sa_send_error +* NAME +* osm_sa_send_error +* +* DESCRIPTION +* Sends a generic SA response with the specified error status. +* The payload is simply replicated from the request MAD. +* +* SYNOPSIS +*/ +void osm_sa_send_error(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, + IN ib_net16_t sa_status); +/* +* PARAMETERS +* sa +* [in] Pointer to an osm_sa_t object. +* +* p_madw +* [in] Original MAD to which the response must be sent. +* +* sa_status +* [in] Status to send in the response. +* +* RETURN VALUES +* None. +* +* SEE ALSO +* SA object +*********/ + +/****f* OpenSM: SA/osm_sa_respond +* NAME +* osm_sa_respond +* +* DESCRIPTION +* Sends SA MAD response +*/ +void osm_sa_respond(osm_sa_t *sa, osm_madw_t *madw, size_t attr_size, + cl_qlist_t *list); +/* +* PARAMETERS +* sa +* [in] Pointer to an osm_sa_t object. +* +* p_madw +* [in] Original MAD to which the response must be sent. +* +* attr_size +* [in] Size of this SA attribute. +* +* list +* [in] List of attribute to respond - it will be freed after +* sending. +* +* RETURN VALUES +* None. +* +* SEE ALSO +* SA object +*********/ + +struct osm_opensm; +/****f* OpenSM: SA/osm_sa_db_file_dump +* NAME +* osm_sa_db_file_dump +* +* DESCRIPTION +* Dumps the SA DB to the dump file. +* +* SYNOPSIS +*/ +int osm_sa_db_file_dump(struct osm_opensm *p_osm); +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object. +* +* RETURN VALUES +* 0 if the SA DB was actually dumped +* >0 if there was no need to dump the SA DB +* <0 if some error occurred. +* +*********/ + +/****f* OpenSM: SA/osm_sa_db_file_load +* NAME +* osm_sa_db_file_load +* +* DESCRIPTION +* Loads SA DB from the file. +* +* SYNOPSIS +*/ +int osm_sa_db_file_load(struct osm_opensm *p_osm); +/* +* PARAMETERS +* p_osm +* [in] Pointer to an osm_opensm_t object. +* +* RETURN VALUES +* 0 on success, other value on failure. +* +*********/ + +/****f* OpenSM: MC Member Record Receiver/osm_mcmr_rcv_find_or_create_new_mgrp +* NAME +* osm_mcmr_rcv_find_or_create_new_mgrp +* +* DESCRIPTION +* Create new Multicast group +* +* SYNOPSIS +*/ + +osm_mgrp_t *osm_mcmr_rcv_find_or_create_new_mgrp(IN osm_sa_t * sa, + IN ib_net64_t comp_mask, + IN ib_member_rec_t * + p_recvd_mcmember_rec); +/* +* PARAMETERS +* p_sa +* [in] Pointer to an osm_sa_t object. +* comp_mask +* [in] SA query component mask +* p_recvd_mcmember_rec +* [in] Received Multicast member record +* +* RETURN VALUES +* The pointer to MC group object found or created, NULL in case of errors +* +*********/ + +/** + * The following expose functionality of osm_sa_path_record.c for internal use + * by sub managers + */ +typedef struct osm_path_parms { + ib_net16_t pkey; + uint8_t mtu; + uint8_t rate; + uint8_t sl; + uint8_t pkt_life; + boolean_t reversible; + int hops; +} osm_path_parms_t; + +ib_api_status_t osm_get_path_params(IN osm_sa_t * sa, + IN const osm_port_t * p_src_port, + IN const uint16_t slid_ho, + IN const osm_port_t * p_dest_port, + IN const uint16_t dlid_ho, + OUT osm_path_parms_t * p_parms); + +ib_net16_t osm_pr_get_end_points(IN osm_sa_t * sa, + IN const ib_sa_mad_t *sa_mad, + OUT const osm_alias_guid_t ** pp_src_alias_guid, + OUT const osm_alias_guid_t ** pp_dest_alias_guid, + OUT const osm_port_t ** pp_src_port, + OUT const osm_port_t ** pp_dest_port, + OUT const ib_gid_t ** pp_sgid, + OUT const ib_gid_t ** pp_dgid); + +void osm_pr_process_pair(IN osm_sa_t * sa, IN const ib_sa_mad_t * sa_mad, + IN const osm_port_t * requester_port, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN cl_qlist_t * p_list); + +void osm_pr_process_half(IN osm_sa_t * sa, IN const ib_sa_mad_t * sa_mad, + IN const osm_port_t * requester_port, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN cl_qlist_t * p_list); + +END_C_DECLS +#endif /* _OSM_SA_H_ */ diff --git a/include/opensm/osm_sa_mad_ctrl.h b/include/opensm/osm_sa_mad_ctrl.h new file mode 100644 index 0000000..d59fae0 --- /dev/null +++ b/include/opensm/osm_sa_mad_ctrl.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_sa_mad_ctrl_t. + * This object represents a controller that receives the IBA SA + * attributes from a node. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_SA_MAD_CTRL_H_ +#define _OSM_SA_MAD_CTRL_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/SA MAD Controller +* NAME +* SA MAD Controller +* +* DESCRIPTION +* The SA MAD Controller object encapsulates +* the information needed to receive MADs from the transport layer. +* +* The SA MAD Controller object is thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Ranjit Pandit, Intel +* +*********/ + +struct osm_sa; +/****s* OpenSM: SA MAD Controller/osm_sa_mad_ctrl_t +* NAME +* osm_sa_mad_ctrl_t +* +* DESCRIPTION +* SA MAD Controller structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_sa_mad_ctrl { + struct osm_sa *sa; + osm_log_t *p_log; + osm_mad_pool_t *p_mad_pool; + osm_vendor_t *p_vendor; + osm_bind_handle_t h_bind; + cl_dispatcher_t *p_disp; + cl_dispatcher_t *p_set_disp; + cl_disp_reg_handle_t h_disp; + cl_disp_reg_handle_t h_set_disp; + osm_stats_t *p_stats; + osm_subn_t *p_subn; +} osm_sa_mad_ctrl_t; +/* +* FIELDS +* sa +* Pointer to the SA object. +* +* p_log +* Pointer to the log object. +* +* p_mad_pool +* Pointer to the MAD pool. +* +* p_vendor +* Pointer to the vendor specific interfaces object. +* +* h_bind +* Bind handle returned by the transport layer. +* +* p_disp +* Pointer to the Dispatcher. +* +* p_set_disp +* Pointer to the Dispatcher for Set requests. +* +* h_disp +* Handle returned from dispatcher registration. +* +* h_set_disp +* Handle returned from Set requests dispatcher registration. +* +* p_stats +* Pointer to the OpenSM statistics block. +* +* p_subn +* Pointer to the OpenSM Subnet object. +* +* SEE ALSO +* SA MAD Controller object +*********/ + +/****f* OpenSM: SA MAD Controller/osm_sa_mad_ctrl_construct +* NAME +* osm_sa_mad_ctrl_construct +* +* DESCRIPTION +* This function constructs a SA MAD Controller object. +* +* SYNOPSIS +*/ +void osm_sa_mad_ctrl_construct(IN osm_sa_mad_ctrl_t * p_ctrl); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to a SA MAD Controller +* object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_sa_mad_ctrl_init, and osm_sa_mad_ctrl_destroy. +* +* Calling osm_sa_mad_ctrl_construct is a prerequisite to calling any other +* method except osm_sa_mad_ctrl_init. +* +* SEE ALSO +* SA MAD Controller object, osm_sa_mad_ctrl_init, +* osm_sa_mad_ctrl_destroy +*********/ + +/****f* OpenSM: SA MAD Controller/osm_sa_mad_ctrl_destroy +* NAME +* osm_sa_mad_ctrl_destroy +* +* DESCRIPTION +* The osm_sa_mad_ctrl_destroy function destroys the object, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_sa_mad_ctrl_destroy(IN osm_sa_mad_ctrl_t * p_ctrl); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified +* SA MAD Controller object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to +* osm_sa_mad_ctrl_construct or osm_sa_mad_ctrl_init. +* +* SEE ALSO +* SA MAD Controller object, osm_sa_mad_ctrl_construct, +* osm_sa_mad_ctrl_init +*********/ + +/****f* OpenSM: SA MAD Controller/osm_sa_mad_ctrl_init +* NAME +* osm_sa_mad_ctrl_init +* +* DESCRIPTION +* The osm_sa_mad_ctrl_init function initializes a +* SA MAD Controller object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sa_mad_ctrl_init(IN osm_sa_mad_ctrl_t * p_ctrl, + IN struct osm_sa * sa, + IN osm_mad_pool_t * p_mad_pool, + IN osm_vendor_t * p_vendor, + IN osm_subn_t * p_subn, + IN osm_log_t * p_log, + IN osm_stats_t * p_stats, + IN cl_dispatcher_t * p_disp, + IN cl_dispatcher_t * p_set_disp); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to an osm_sa_mad_ctrl_t object to initialize. +* +* sa +* [in] Pointer to the SA object. +* +* p_mad_pool +* [in] Pointer to the MAD pool. +* +* p_vendor +* [in] Pointer to the vendor specific interfaces object. +* +* p_subn +* [in] Pointer to the OpenSM Subnet object. +* +* p_log +* [in] Pointer to the log object. +* +* p_stats +* [in] Pointer to the OpenSM stastics block. +* +* p_disp +* [in] Pointer to the OpenSM central Dispatcher. +* +* p_set_disp +* [in] Pointer to the OpenSM Dispatcher for Set requests. +* +* RETURN VALUES +* IB_SUCCESS if the SA MAD Controller object was initialized +* successfully. +* +* NOTES +* Allows calling other SA MAD Controller methods. +* +* SEE ALSO +* SA MAD Controller object, osm_sa_mad_ctrl_construct, +* osm_sa_mad_ctrl_destroy +*********/ + +/****f* OpenSM: SA/osm_sa_mad_ctrl_bind +* NAME +* osm_sa_mad_ctrl_bind +* +* DESCRIPTION +* Binds the SA MAD Controller object to a port guid. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * p_ctrl, + IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to an osm_sa_mad_ctrl_t object to initialize. +* +* port_guid +* [in] Local port GUID with which to bind. +* +* +* RETURN VALUES +* None +* +* NOTES +* A given SA MAD Controller object can only be bound to one +* port at a time. +* +* SEE ALSO +*********/ + +/****f* OpenSM: SA/osm_sa_mad_ctrl_unbind +* NAME +* osm_sa_mad_ctrl_unbind +* +* DESCRIPTION +* Un-Binds the SA MAD Controller object from the IB port +* +* SYNOPSIS +*/ +ib_api_status_t osm_sa_mad_ctrl_unbind(IN osm_sa_mad_ctrl_t * p_ctrl); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to an osm_sa_mad_ctrl_t object to initialize. +* +* RETURN VALUES +* None +* +* NOTES +* A given SA MAD Controller should be previously bound to IB +* port. +* +* SEE ALSO +*********/ + +/****f* OpenSM: SA/osm_sa_mad_ctrl_get_bind_handle +* NAME +* osm_sa_mad_ctrl_get_bind_handle +* +* DESCRIPTION +* Returns the bind handle. +* +* SYNOPSIS +*/ +static inline osm_bind_handle_t +osm_sa_mad_ctrl_get_bind_handle(IN const osm_sa_mad_ctrl_t * p_ctrl) +{ + return p_ctrl->h_bind; +} + +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to an osm_sa_mad_ctrl_t object. +* +* RETURN VALUES +* Returns the bind handle, which may be OSM_BIND_INVALID_HANDLE +* if no port has been bound. +* +* NOTES +* A given SA MAD Controller object can only be bound to one +* port at a time. +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_SA_MAD_CTRL_H_ */ diff --git a/include/opensm/osm_service.h b/include/opensm/osm_service.h new file mode 100644 index 0000000..8ed97df --- /dev/null +++ b/include/opensm/osm_service.h @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_SVCR_H_ +#define _OSM_SVCR_H_ + +/* + * Abstract: + * Declaration of osm_service_rec_t. + * This object represents an IBA Service Record. + * This object is part of the OpenSM family of objects. + */ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Service Record +* NAME +* Service Record +* +* DESCRIPTION +* The service record encapsulates the information needed by the +* SA to manage service registrations. +* +* The service records is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Anil S Keshavamurthy, Intel +* +*********/ +/****s* OpenSM: Service Record/osm_svcr_t +* NAME +* osm_svcr_t +* +* DESCRIPTION +* Service Record structure. +* +* The osm_svcr_t object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_svcr { + cl_list_item_t list_item; + ib_service_record_t service_record; + uint32_t modified_time; + uint32_t lease_period; +} osm_svcr_t; +/* +* FIELDS +* list_item +* List Item for Quick List linkage. Must be first element!! +* +* service_record +* IB Service record structure +* +* modified_time +* Last modified time of this record in milliseconds +* +* lease_period +* Remaining lease period for this record +* +* +* SEE ALSO +*********/ + +/****f* OpenSM: Service Record/osm_svcr_new +* NAME +* osm_svcr_new +* +* DESCRIPTION +* Allocates and initializes a Service Record for use. +* +* SYNOPSIS +*/ +osm_svcr_t *osm_svcr_new(IN const ib_service_record_t * p_svc_rec); +/* +* PARAMETERS +* p_svc_rec +* [in] Pointer to IB Service Record +* +* RETURN VALUES +* Pointer to osm_svcr_t structure. +* +* NOTES +* Allows calling other service record methods. +* +* SEE ALSO +* Service Record, osm_svcr_delete +*********/ + +/****f* OpenSM: Service Record/osm_svcr_init +* NAME +* osm_svcr_init +* +* DESCRIPTION +* Initializes the osm_svcr_t structure. +* +* SYNOPSIS +*/ +void osm_svcr_init(IN osm_svcr_t * p_svcr, + IN const ib_service_record_t * p_svc_rec); +/* +* PARAMETERS +* p_svcr +* [in] Pointer to osm_svcr_t structure +* +* p_svc_rec +* [in] Pointer to IB Service Record +* +* SEE ALSO +* Service Record +*********/ + +/****f* OpenSM: Service Record/osm_svcr_delete +* NAME +* osm_svcr_delete +* +* DESCRIPTION +* Deallocates the osm_svcr_t structure. +* +* SYNOPSIS +*/ +void osm_svcr_delete(IN osm_svcr_t * p_svcr); +/* +* PARAMETERS +* p_svcr +* [in] Pointer to osm_svcr_t structure +* +* SEE ALSO +* Service Record, osm_svcr_new +*********/ + +/****f* OpenSM: Service Record/osm_svcr_get_by_rid +* NAME +* osm_svcr_get_by_rid +* +* DESCRIPTION +* Search the Service Record Database by record service_id, +* service_gid and service_pkey (RID). +* +* SYNOPSIS +*/ +osm_svcr_t *osm_svcr_get_by_rid(IN osm_subn_t const *p_subn, + IN osm_log_t * p_log, + IN ib_service_record_t * p_svc_rec); +/* +* PARAMETERS +* p_subn +* [in] Pointer to Subnet structure +* +* p_log +* [in] Pointer to osm_log_t +* +* p_svc_rec +* [in] Pointer to IB Service Record +* +* RETURN VALUES +* If a matching record is found, pointer to osm_svcr_t structure. +* Otherwise, pointer to NULL. +* +* SEE ALSO +* Service Record +*********/ + +/****f* OpenSM: Service Record/osm_svcr_insert_to_db +* NAME +* osm_svcr_insert_to_db +* +* DESCRIPTION +* Insert new Service Record into Database +* +* SYNOPSIS +*/ +void osm_svcr_insert_to_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_svcr_t * p_svcr); +/* +* PARAMETERS +* p_subn +* [in] Pointer to Subnet structure +* +* p_log +* [in] Pointer to osm_log_t +* +* p_svcr +* [in] Pointer to IB Service Record to be inserted +* +* RETURN VALUES +* This function does not return a value. +* +* SEE ALSO +* Service Record, osm_svcr_remove_from_db +*********/ + +/****f* OpenSM: Service Record/osm_svcr_remove_from_db +* NAME +* osm_svcr_remove_from_db +* +* DESCRIPTION +* Remove a Service Record from Database +* +* SYNOPSIS +*/ +void osm_svcr_remove_from_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_svcr_t * p_svcr); +/* +* PARAMETERS +* p_subn +* [in] Pointer to Subnet structure +* +* p_log +* [in] Pointer to osm_log_t +* +* p_svcr +* [in] Pointer to IB Service Record to be removed +* +* RETURN VALUES +* This function does not return a value. +* +* SEE ALSO +* Service Record, osm_svcr_insert_to_db +*********/ + +END_C_DECLS +#endif /* _OSM_SVCR_H_ */ diff --git a/include/opensm/osm_sm.h b/include/opensm/osm_sm.h new file mode 100644 index 0000000..d54cf75 --- /dev/null +++ b/include/opensm/osm_sm.h @@ -0,0 +1,844 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_sm_t. + * This object represents an IBA subnet. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_SM_H_ +#define _OSM_SM_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/SM +* NAME +* SM +* +* DESCRIPTION +* The SM object encapsulates the information needed by the +* OpenSM to instantiate a subnet manager. The OpenSM allocates +* one SM object per subnet manager. +* +* The SM object is thread safe. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: SM/osm_sm_t +* NAME +* osm_sm_t +* +* DESCRIPTION +* Subnet Manager structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_sm { + osm_thread_state_t thread_state; + unsigned signal_mask; + cl_spinlock_t signal_lock; + cl_spinlock_t state_lock; + cl_event_t signal_event; + cl_event_t subnet_up_event; + cl_timer_t sweep_timer; + cl_timer_t polling_timer; + cl_event_wheel_t trap_aging_tracker; + cl_thread_t sweeper; + unsigned master_sm_found; + uint32_t retry_number; + ib_net64_t master_sm_guid; + ib_net64_t polling_sm_guid; + osm_subn_t *p_subn; + osm_db_t *p_db; + osm_vendor_t *p_vendor; + osm_log_t *p_log; + osm_mad_pool_t *p_mad_pool; + osm_vl15_t *p_vl15; + cl_dispatcher_t *p_disp; + cl_plock_t *p_lock; + atomic32_t sm_trans_id; + uint16_t mlids_init_max; + unsigned mlids_req_max; + uint8_t *mlids_req; + osm_sm_mad_ctrl_t mad_ctrl; + osm_lid_mgr_t lid_mgr; + osm_ucast_mgr_t ucast_mgr; + cl_disp_reg_handle_t sweep_fail_disp_h; + cl_disp_reg_handle_t ni_disp_h; + cl_disp_reg_handle_t pi_disp_h; + cl_disp_reg_handle_t gi_disp_h; + cl_disp_reg_handle_t nd_disp_h; + cl_disp_reg_handle_t si_disp_h; + cl_disp_reg_handle_t lft_disp_h; + cl_disp_reg_handle_t mft_disp_h; + cl_disp_reg_handle_t sm_info_disp_h; + cl_disp_reg_handle_t trap_disp_h; + cl_disp_reg_handle_t slvl_disp_h; + cl_disp_reg_handle_t vla_disp_h; + cl_disp_reg_handle_t pkey_disp_h; + cl_disp_reg_handle_t mlnx_epi_disp_h; +} osm_sm_t; +/* +* FIELDS +* p_subn +* Pointer to the Subnet object for this subnet. +* +* p_db +* Pointer to the database (persistency) object +* +* p_vendor +* Pointer to the vendor specific interfaces object. +* +* p_log +* Pointer to the log object. +* +* p_mad_pool +* Pointer to the MAD pool. +* +* p_vl15 +* Pointer to the VL15 interface. +* +* mad_ctrl +* MAD Controller. +* +* p_disp +* Pointer to the Dispatcher. +* +* p_lock +* Pointer to the serializing lock. +* +* SEE ALSO +* SM object +*********/ + +/****f* OpenSM: SM/osm_sm_construct +* NAME +* osm_sm_construct +* +* DESCRIPTION +* This function constructs an SM object. +* +* SYNOPSIS +*/ +void osm_sm_construct(IN osm_sm_t * p_sm); +/* +* PARAMETERS +* p_sm +* [in] Pointer to a SM object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_sm_init, osm_sm_destroy +* +* Calling osm_sm_construct is a prerequisite to calling any other +* method except osm_sm_init. +* +* SEE ALSO +* SM object, osm_sm_init, osm_sm_destroy +*********/ + +/****f* OpenSM: SM/osm_sm_shutdown +* NAME +* osm_sm_shutdown +* +* DESCRIPTION +* The osm_sm_shutdown function shutdowns an SM, stopping the sweeper +* and unregistering all messages from the dispatcher +* +* SYNOPSIS +*/ +void osm_sm_shutdown(IN osm_sm_t * p_sm); +/* +* PARAMETERS +* p_sm +* [in] Pointer to a SM object to shutdown. +* +* RETURN VALUE +* This function does not return a value. +* +* SEE ALSO +* SM object, osm_sm_construct, osm_sm_init +*********/ + +/****f* OpenSM: SM/osm_sm_destroy +* NAME +* osm_sm_destroy +* +* DESCRIPTION +* The osm_sm_destroy function destroys an SM, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_sm_destroy(IN osm_sm_t * p_sm); +/* +* PARAMETERS +* p_sm +* [in] Pointer to a SM object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified SM object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_sm_construct or +* osm_sm_init. +* +* SEE ALSO +* SM object, osm_sm_construct, osm_sm_init +*********/ + +/****f* OpenSM: SM/osm_sm_init +* NAME +* osm_sm_init +* +* DESCRIPTION +* The osm_sm_init function initializes a SM object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_init(IN osm_sm_t * p_sm, IN osm_subn_t * p_subn, + IN osm_db_t * p_db, IN osm_vendor_t * p_vendor, + IN osm_mad_pool_t * p_mad_pool, + IN osm_vl15_t * p_vl15, IN osm_log_t * p_log, + IN osm_stats_t * p_stats, + IN cl_dispatcher_t * p_disp, IN cl_plock_t * p_lock); +/* +* PARAMETERS +* p_sm +* [in] Pointer to an osm_sm_t object to initialize. +* +* p_subn +* [in] Pointer to the Subnet object for this subnet. +* +* p_vendor +* [in] Pointer to the vendor specific interfaces object. +* +* p_mad_pool +* [in] Pointer to the MAD pool. +* +* p_vl15 +* [in] Pointer to the VL15 interface. +* +* p_log +* [in] Pointer to the log object. +* +* p_stats +* [in] Pointer to the statistics object. +* +* p_disp +* [in] Pointer to the OpenSM central Dispatcher. +* +* p_lock +* [in] Pointer to the OpenSM serializing lock. +* +* RETURN VALUES +* IB_SUCCESS if the SM object was initialized successfully. +* +* NOTES +* Allows calling other SM methods. +* +* SEE ALSO +* SM object, osm_sm_construct, osm_sm_destroy +*********/ + +/****f* OpenSM: SM/osm_sm_signal +* NAME +* osm_sm_signal +* +* DESCRIPTION +* Signal event to SM +* +* SYNOPSIS +*/ +void osm_sm_signal(IN osm_sm_t * p_sm, osm_signal_t signal); +/* +* PARAMETERS +* p_sm +* [in] Pointer to an osm_sm_t object. +* +* signal +* [in] sm signal number. +* +* NOTES +* +* SEE ALSO +* SM object +*********/ + +/****f* OpenSM: SM/osm_sm_sweep +* NAME +* osm_sm_sweep +* +* DESCRIPTION +* Initiates a subnet sweep. +* +* SYNOPSIS +*/ +void osm_sm_sweep(IN osm_sm_t * p_sm); +/* +* PARAMETERS +* p_sm +* [in] Pointer to an osm_sm_t object. +* +* RETURN VALUES +* IB_SUCCESS if the sweep completed successfully. +* +* NOTES +* +* SEE ALSO +* SM object +*********/ + +/****f* OpenSM: SM/osm_sm_bind +* NAME +* osm_sm_bind +* +* DESCRIPTION +* Binds the sm object to a port guid. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_bind(IN osm_sm_t * p_sm, IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_sm +* [in] Pointer to an osm_sm_t object to bind. +* +* port_guid +* [in] Local port GUID with which to bind. +* +* +* RETURN VALUES +* None +* +* NOTES +* A given SM object can only be bound to one port at a time. +* +* SEE ALSO +*********/ + +/****f* OpenSM: SM/osm_req_get +* NAME +* osm_req_get +* +* DESCRIPTION +* Starts the process to transmit a directed route request for +* the attribute. +* +* SYNOPSIS +*/ +ib_api_status_t osm_req_get(IN osm_sm_t * sm, IN const osm_dr_path_t * p_path, + IN ib_net16_t attr_id, IN ib_net32_t attr_mod, + IN boolean_t find_mkey, IN ib_net64_t m_key, + IN uint32_t timeout, IN cl_disp_msgid_t err_msg, + IN const osm_madw_context_t * p_context); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* p_path +* [in] Pointer to the directed route path to the node +* from which to retrieve the attribute. +* +* attr_id +* [in] Attribute ID to request. +* +* attr_mod +* [in] Attribute modifier for this request. +* +* find_mkey +* [in] Flag to indicate whether the M_Key should be looked up for +* this MAD. +* m_key +* [in] M_Key value to be send with this MAD. Applied, only when +* find_mkey is FALSE. +* +* timeout +* [in] Transaction timeout in msec. +* +* err_msg +* [in] Message id with which to post this MAD if an error occurs. +* +* p_context +* [in] Mad wrapper context structure to be copied into the wrapper +* context, and thus visible to the recipient of the response. +* +* RETURN VALUES +* IB_SUCCESS if the request was successful. +* +* NOTES +* This function asynchronously requests the specified attribute. +* The response from the node will be routed through the Dispatcher +* to the appropriate receive controller object. +*********/ + +/****f* OpenSM: SM/osm_send_req_mad +* NAME +* osm_send_req_mad +* +* DESCRIPTION +* Starts the process to transmit a preallocated/predefined directed route +* Set() request. +* +* SYNOPSIS +*/ +void osm_send_req_mad(IN osm_sm_t * sm, IN osm_madw_t *p_madw); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* p_madw +* [in] Pointer to a preallocated MAD buffer +* +*********/ + +/***f* OpenSM: SM/osm_prepare_req_set +* NAME +* osm_prepare_req_set +* +* DESCRIPTION +* Preallocate and fill a directed route Set() MAD w/o sending it. +* +* SYNOPSIS +*/ +osm_madw_t *osm_prepare_req_set(IN osm_sm_t * sm, IN const osm_dr_path_t * p_path, + IN const uint8_t * p_payload, + IN size_t payload_size, IN ib_net16_t attr_id, + IN ib_net32_t attr_mod, IN boolean_t find_mkey, + IN ib_net64_t m_key, IN uint32_t timeout, + IN cl_disp_msgid_t err_msg, + IN const osm_madw_context_t * p_context); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* p_path +* [in] Pointer to the directed route path of the recipient. +* +* p_payload +* [in] Pointer to the SMP payload to send. +* +* payload_size +* [in] The size of the payload to be copied to the SMP data field. +* +* attr_id +* [in] Attribute ID to request. +* +* attr_mod +* [in] Attribute modifier for this request. +* +* find_mkey +* [in] Flag to indicate whether the M_Key should be looked up for +* this MAD. +* m_key +* [in] M_Key value to be send with this MAD. Applied, only when +* find_mkey is FALSE. +* +* timeout +* [in] Transaction timeout in msec. +* +* err_msg +* [in] Message id with which to post this MAD if an error occurs. +* +* p_context +* [in] Mad wrapper context structure to be copied into the wrapper +* context, and thus visible to the recipient of the response. +* +* RETURN VALUES +* Pointer the MAD buffer in case of success and NULL in case of failure. +* +*********/ + +/****f* OpenSM: SM/osm_req_set +* NAME +* osm_req_set +* +* DESCRIPTION +* Starts the process to transmit a directed route Set() request. +* +* SYNOPSIS +*/ +ib_api_status_t osm_req_set(IN osm_sm_t * sm, IN const osm_dr_path_t * p_path, + IN const uint8_t * p_payload, + IN size_t payload_size, IN ib_net16_t attr_id, + IN ib_net32_t attr_mod, IN boolean_t find_mkey, + IN ib_net64_t m_key, IN uint32_t timeout, + IN cl_disp_msgid_t err_msg, + IN const osm_madw_context_t * p_context); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* p_path +* [in] Pointer to the directed route path of the recipient. +* +* p_payload +* [in] Pointer to the SMP payload to send. +* +* payload_size +* [in] The size of the payload to be copied to the SMP data field. +* +* attr_id +* [in] Attribute ID to request. +* +* attr_mod +* [in] Attribute modifier for this request. +* +* find_mkey +* [in] Flag to indicate whether the M_Key should be looked up for +* this MAD. +* +* m_key +* [in] M_Key value to be send with this MAD. Applied, only when +* find_mkey is FALSE. +* +* timeout +* [in] Transaction timeout in msec. +* +* err_msg +* [in] Message id with which to post this MAD if an error occurs. +* +* p_context +* [in] Mad wrapper context structure to be copied into the wrapper +* context, and thus visible to the recipient of the response. +* +* RETURN VALUES +* IB_SUCCESS if the request was successful. +* +* NOTES +* This function asynchronously requests the specified attribute. +* The response from the node will be routed through the Dispatcher +* to the appropriate receive controller object. +*********/ +/****f* OpenSM: SM/osm_resp_send +* NAME +* osm_resp_send +* +* DESCRIPTION +* Starts the process to transmit a directed route response. +* +* SYNOPSIS +*/ +ib_api_status_t osm_resp_send(IN osm_sm_t * sm, + IN const osm_madw_t * p_req_madw, + IN ib_net16_t status, + IN const uint8_t * p_payload); +/* +* PARAMETERS +* p_resp +* [in] Pointer to an osm_resp_t object. +* +* p_madw +* [in] Pointer to the MAD Wrapper object for the requesting MAD +* to which this response is generated. +* +* status +* [in] Status for this response. +* +* p_payload +* [in] Pointer to the payload of the response MAD. +* +* RETURN VALUES +* IB_SUCCESS if the response was successful. +* +*********/ + +/****f* OpenSM: SM/osm_sm_reroute_mlid +* NAME +* osm_sm_reroute_mlid +* +* DESCRIPTION +* Requests (schedules) MLID rerouting +* +* SYNOPSIS +*/ +void osm_sm_reroute_mlid(osm_sm_t * sm, ib_net16_t mlid); + +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* mlid +* [in] MLID value +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: OpenSM/osm_sm_wait_for_subnet_up +* NAME +* osm_sm_wait_for_subnet_up +* +* DESCRIPTION +* Blocks the calling thread until the subnet is up. +* +* SYNOPSIS +*/ +static inline cl_status_t osm_sm_wait_for_subnet_up(IN osm_sm_t * p_sm, + IN uint32_t wait_us, + IN boolean_t interruptible) +{ + return cl_event_wait_on(&p_sm->subnet_up_event, wait_us, interruptible); +} + +/* +* PARAMETERS +* p_sm +* [in] Pointer to an osm_sm_t object. +* +* wait_us +* [in] Number of microseconds to wait. +* +* interruptible +* [in] Indicates whether the wait operation can be interrupted +* by external signals. +* +* RETURN VALUES +* CL_SUCCESS if the wait operation succeeded in response to the event +* being set. +* +* CL_TIMEOUT if the specified time period elapses. +* +* CL_NOT_DONE if the wait was interrupted by an external signal. +* +* CL_ERROR if the wait operation failed. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: State Manager/osm_sm_is_greater_than +* NAME +* osm_sm_is_greater_than +* +* DESCRIPTION +* Compares two SM's (14.4.1.2) +* +* SYNOPSIS +*/ +static inline boolean_t osm_sm_is_greater_than(IN uint8_t l_priority, + IN ib_net64_t l_guid, + IN uint8_t r_priority, + IN ib_net64_t r_guid) +{ + return (l_priority > r_priority + || (l_priority == r_priority + && cl_ntoh64(l_guid) < cl_ntoh64(r_guid))); +} + +/* +* PARAMETERS +* l_priority +* [in] Priority of the SM on the "left" +* +* l_guid +* [in] GUID of the SM on the "left" +* +* r_priority +* [in] Priority of the SM on the "right" +* +* r_guid +* [in] GUID of the SM on the "right" +* +* RETURN VALUES +* Return TRUE if an sm with l_priority and l_guid is higher than an sm +* with r_priority and r_guid, return FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + +/****f* OpenSM: SM State Manager/osm_sm_state_mgr_process +* NAME +* osm_sm_state_mgr_process +* +* DESCRIPTION +* Processes and maintains the states of the SM. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_state_mgr_process(IN osm_sm_t *sm, + IN osm_sm_signal_t signal); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* signal +* [in] Signal to the state SM engine. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + +/****f* OpenSM: SM State Manager/osm_sm_state_mgr_signal_master_is_alive +* NAME +* osm_sm_state_mgr_signal_master_is_alive +* +* DESCRIPTION +* Signals that the remote Master SM is alive. +* Need to clear the retry_number variable. +* +* SYNOPSIS +*/ +void osm_sm_state_mgr_signal_master_is_alive(IN osm_sm_t *sm); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + +/****f* OpenSM: SM State Manager/osm_sm_state_mgr_check_legality +* NAME +* osm_sm_state_mgr_check_legality +* +* DESCRIPTION +* Checks the legality of the signal received, according to the +* current state of the SM state machine. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_state_mgr_check_legality(IN osm_sm_t *sm, + IN osm_sm_signal_t signal); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* signal +* [in] Signal to the state SM engine. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + +void osm_report_sm_state(osm_sm_t *sm); + +/****f* OpenSM: SM State Manager/osm_send_trap144 +* NAME +* osm_send_trap144 +* +* DESCRIPTION +* Send trap 144 to the master SM. +* +* SYNOPSIS +*/ +int osm_send_trap144(osm_sm_t *sm, ib_net16_t local); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* local +* [in] OtherLocalChanges mask in network byte order. +* +* RETURN VALUES +* 0 on success, non-zero value otherwise. +* +*********/ + +void osm_set_sm_priority(osm_sm_t *sm, uint8_t priority); + +END_C_DECLS +#endif /* _OSM_SM_H_ */ diff --git a/include/opensm/osm_sm_mad_ctrl.h b/include/opensm/osm_sm_mad_ctrl.h new file mode 100644 index 0000000..4ec1e83 --- /dev/null +++ b/include/opensm/osm_sm_mad_ctrl.h @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_sm_mad_ctrl_t. + * This object represents a controller that receives the IBA NodeInfo + * attribute from a node. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_SM_MAD_CTRL_H_ +#define _OSM_SM_MAD_CTRL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/SM MAD Controller +* NAME +* SM MAD Controller +* +* DESCRIPTION +* The SM MAD Controller object encapsulates +* the information needed to receive MADs from the transport layer. +* +* The SM MAD Controller object is thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: SM MAD Controller/osm_sm_mad_ctrl_t +* NAME +* osm_sm_mad_ctrl_t +* +* DESCRIPTION +* SM MAD Controller structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_sm_mad_ctrl { + osm_log_t *p_log; + osm_subn_t *p_subn; + osm_mad_pool_t *p_mad_pool; + osm_vl15_t *p_vl15; + osm_vendor_t *p_vendor; + osm_bind_handle_t h_bind; + cl_plock_t *p_lock; + cl_dispatcher_t *p_disp; + cl_disp_reg_handle_t h_disp; + osm_stats_t *p_stats; +} osm_sm_mad_ctrl_t; +/* +* FIELDS +* p_log +* Pointer to the log object. +* +* p_subn +* Pointer to the subnet object. +* +* p_mad_pool +* Pointer to the MAD pool. +* +* p_vl15 +* Pointer to the VL15 interface object. +* +* p_vendor +* Pointer to the vendor specific interfaces object. +* +* h_bind +* Bind handle returned by the transport layer. +* +* p_lock +* Pointer to the serializing lock. +* +* p_disp +* Pointer to the Dispatcher. +* +* h_disp +* Handle returned from dispatcher registration. +* +* p_stats +* Pointer to the OpenSM statistics block. +* +* SEE ALSO +* SM MAD Controller object +*********/ + +/****f* OpenSM: SM MAD Controller/osm_sm_mad_ctrl_construct +* NAME +* osm_sm_mad_ctrl_construct +* +* DESCRIPTION +* This function constructs a SM MAD Controller object. +* +* SYNOPSIS +*/ +void osm_sm_mad_ctrl_construct(IN osm_sm_mad_ctrl_t * p_ctrl); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to a SM MAD Controller +* object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_sm_mad_ctrl_init, and osm_sm_mad_ctrl_destroy. +* +* Calling osm_sm_mad_ctrl_construct is a prerequisite to calling any other +* method except osm_sm_mad_ctrl_init. +* +* SEE ALSO +* SM MAD Controller object, osm_sm_mad_ctrl_init, +* osm_sm_mad_ctrl_destroy +*********/ + +/****f* OpenSM: SM MAD Controller/osm_sm_mad_ctrl_destroy +* NAME +* osm_sm_mad_ctrl_destroy +* +* DESCRIPTION +* The osm_sm_mad_ctrl_destroy function destroys the object, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_sm_mad_ctrl_destroy(IN osm_sm_mad_ctrl_t * p_ctrl); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified +* SM MAD Controller object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to +* osm_sm_mad_ctrl_construct or osm_sm_mad_ctrl_init. +* +* SEE ALSO +* SM MAD Controller object, osm_sm_mad_ctrl_construct, +* osm_sm_mad_ctrl_init +*********/ + +/****f* OpenSM: SM MAD Controller/osm_sm_mad_ctrl_init +* NAME +* osm_sm_mad_ctrl_init +* +* DESCRIPTION +* The osm_sm_mad_ctrl_init function initializes a +* SM MAD Controller object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_mad_ctrl_init(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_subn_t * p_subn, + IN osm_mad_pool_t * p_mad_pool, + IN osm_vl15_t * p_vl15, + IN osm_vendor_t * p_vendor, + IN osm_log_t * p_log, + IN osm_stats_t * p_stats, + IN cl_plock_t * p_lock, + IN cl_dispatcher_t * p_disp); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to an osm_sm_mad_ctrl_t object to initialize. +* +* p_subn +* [in] Pointer to the subnet object. +* +* p_mad_pool +* [in] Pointer to the MAD pool. +* +* p_vl15 +* [in] Pointer to the VL15 interface object. +* +* p_vendor +* [in] Pointer to the vendor specific interfaces object. +* +* p_log +* [in] Pointer to the log object. +* +* p_stats +* [in] Pointer to the OpenSM stastics block. +* +* p_lock +* [in] Pointer to the OpenSM serializing lock. +* +* p_disp +* [in] Pointer to the OpenSM central Dispatcher. +* +* RETURN VALUES +* IB_SUCCESS if the SM MAD Controller object was initialized +* successfully. +* +* NOTES +* Allows calling other SM MAD Controller methods. +* +* SEE ALSO +* SM MAD Controller object, osm_sm_mad_ctrl_construct, +* osm_sm_mad_ctrl_destroy +*********/ + +/****f* OpenSM: SM/osm_sm_mad_ctrl_bind +* NAME +* osm_sm_mad_ctrl_bind +* +* DESCRIPTION +* Binds the SM MAD Controller object to a port guid. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * p_ctrl, + IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to an osm_sm_mad_ctrl_t object to initialize. +* +* port_guid +* [in] Local port GUID with which to bind. +* +* +* RETURN VALUES +* None +* +* NOTES +* A given SM MAD Controller object can only be bound to one +* port at a time. +* +* SEE ALSO +*********/ + +/****f* OpenSM: SM/osm_sm_mad_ctrl_get_bind_handle +* NAME +* osm_sm_mad_ctrl_get_bind_handle +* +* DESCRIPTION +* Returns the bind handle. +* +* SYNOPSIS +*/ +static inline osm_bind_handle_t +osm_sm_mad_ctrl_get_bind_handle(IN const osm_sm_mad_ctrl_t * p_ctrl) +{ + return p_ctrl->h_bind; +} + +/* +* PARAMETERS +* p_ctrl +* [in] Pointer to an osm_sm_mad_ctrl_t object. +* +* RETURN VALUES +* Returns the bind handle, which may be OSM_BIND_INVALID_HANDLE +* if no port has been bound. +* +* NOTES +* A given SM MAD Controller object can only be bound to one +* port at a time. +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_SM_MAD_CTRL_H_ */ diff --git a/include/opensm/osm_stats.h b/include/opensm/osm_stats.h new file mode 100644 index 0000000..4331cfa --- /dev/null +++ b/include/opensm/osm_stats.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_stats_t. + * This object represents the OpenSM statistics object. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_STATS_H_ +#define _OSM_STATS_H_ + +#ifdef HAVE_LIBPTHREAD +#include +#else +#include +#endif +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Statistics +* NAME +* OpenSM +* +* DESCRIPTION +* The OpenSM object encapsulates the information needed by the +* OpenSM to track interesting traffic and internal statistics. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Statistics/osm_stats_t +* NAME +* osm_stats_t +* +* DESCRIPTION +* OpenSM statistics block. +* +* SYNOPSIS +*/ +typedef struct osm_stats { + atomic32_t qp0_mads_outstanding; + atomic32_t qp0_mads_outstanding_on_wire; + atomic32_t qp0_mads_rcvd; + atomic32_t qp0_mads_sent; + atomic32_t qp0_unicasts_sent; + atomic32_t qp0_mads_rcvd_unknown; + atomic32_t sa_mads_outstanding; + atomic32_t sa_mads_rcvd; + atomic32_t sa_mads_sent; + atomic32_t sa_mads_rcvd_unknown; + atomic32_t sa_mads_ignored; +#ifdef HAVE_LIBPTHREAD + pthread_mutex_t mutex; + pthread_cond_t cond; +#else + cl_event_t event; +#endif +} osm_stats_t; +/* +* FIELDS +* qp0_mads_outstanding +* Contains the number of MADs outstanding on QP0. +* When this value reaches zero, OpenSM has discovered all +* nodes on the subnet, and finished retrieving attributes. +* At that time, subnet configuration may begin. +* This variable must be manipulated using atomic instructions. +* +* qp0_mads_outstanding_on_wire +* The number of MADs outstanding on the wire at any moment. +* +* qp0_mads_rcvd +* Total number of QP0 MADs received. +* +* qp0_mads_sent +* Total number of QP0 MADs sent. +* +* qp0_unicasts_sent +* Total number of response-less MADs sent on the wire. This count +* includes getresp(), send() and trap() methods. +* +* qp0_mads_rcvd_unknown +* Total number of unknown QP0 MADs received. This includes +* unrecognized attribute IDs and methods. +* +* sa_mads_outstanding +* Contains the number of SA MADs outstanding on QP1. +* +* sa_mads_rcvd +* Total number of SA MADs received. +* +* sa_mads_sent +* Total number of SA MADs sent. +* +* sa_mads_rcvd_unknown +* Total number of unknown SA MADs received. This includes +* unrecognized attribute IDs and methods. +* +* sa_mads_ignored +* Total number of SA MADs received because SM is not +* master or SM is in first time sweep. +* +* SEE ALSO +***************/ + +static inline uint32_t osm_stats_inc_qp0_outstanding(osm_stats_t *stats) +{ + uint32_t outstanding; + +#ifdef HAVE_LIBPTHREAD + pthread_mutex_lock(&stats->mutex); + outstanding = ++stats->qp0_mads_outstanding; + pthread_mutex_unlock(&stats->mutex); +#else + outstanding = cl_atomic_inc(&stats->qp0_mads_outstanding); +#endif + + return outstanding; +} + +static inline uint32_t osm_stats_dec_qp0_outstanding(osm_stats_t *stats) +{ + uint32_t outstanding; + +#ifdef HAVE_LIBPTHREAD + pthread_mutex_lock(&stats->mutex); + outstanding = --stats->qp0_mads_outstanding; + if (!outstanding) + pthread_cond_signal(&stats->cond); + pthread_mutex_unlock(&stats->mutex); +#else + outstanding = cl_atomic_dec(&stats->qp0_mads_outstanding); + if (!outstanding) + cl_event_signal(&stats->event); +#endif + + return outstanding; +} + +END_C_DECLS +#endif /* _OSM_STATS_H_ */ diff --git a/include/opensm/osm_subnet.h b/include/opensm/osm_subnet.h new file mode 100644 index 0000000..78111b5 --- /dev/null +++ b/include/opensm/osm_subnet.h @@ -0,0 +1,1647 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2009-2015 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_subn_t. + * This object represents an IBA subnet. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_SUBNET_H_ +#define _OSM_SUBNET_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#define OSM_SUBNET_VECTOR_MIN_SIZE 0 +#define OSM_SUBNET_VECTOR_GROW_SIZE 1 +#define OSM_SUBNET_VECTOR_CAPACITY 256 + +#define OSM_PARTITION_ENFORCE_BOTH "both" +#define OSM_PARTITION_ENFORCE_IN "in" +#define OSM_PARTITION_ENFORCE_OUT "out" +#define OSM_PARTITION_ENFORCE_OFF "off" + +typedef enum _osm_partition_enforce_type_enum { + OSM_PARTITION_ENFORCE_TYPE_BOTH, + OSM_PARTITION_ENFORCE_TYPE_IN, + OSM_PARTITION_ENFORCE_TYPE_OUT, + OSM_PARTITION_ENFORCE_TYPE_OFF +} osm_partition_enforce_type_enum; + +/* XXX: not actual max, max we're currently going to support */ +#define OSM_CCT_ENTRY_MAX 128 +#define OSM_CCT_ENTRY_MAD_BLOCKS (OSM_CCT_ENTRY_MAX/64) + +struct osm_opensm; +struct osm_qos_policy; + +/****h* OpenSM/Subnet +* NAME +* Subnet +* +* DESCRIPTION +* The Subnet object encapsulates the information needed by the +* OpenSM to manage a subnet. The OpenSM allocates one Subnet object +* per IBA subnet. +* +* The Subnet object is not thread safe, thus callers must provide +* serialization. +* +* This object is essentially a container for the various components +* of a subnet. Callers may directly access the member variables. +* +* AUTHOR +* Steve King, Intel +* +*********/ + +/****s* OpenSM: Subnet/osm_qos_options_t +* NAME +* osm_qos_options_t +* +* DESCRIPTION +* Subnet QoS options structure. This structure contains the various +* QoS specific configuration parameters for the subnet. +* +* SYNOPSIS +*/ +typedef struct osm_qos_options { + unsigned max_vls; + int high_limit; + char *vlarb_high; + char *vlarb_low; + char *sl2vl; +} osm_qos_options_t; +/* +* FIELDS +* +* max_vls +* The number of maximum VLs on the Subnet (0 == use default) +* +* high_limit +* The limit of High Priority component of VL Arbitration +* table (IBA 7.6.9) (-1 == use default) +* +* vlarb_high +* High priority VL Arbitration table template. (NULL == use default) +* +* vlarb_low +* Low priority VL Arbitration table template. (NULL == use default) +* +* sl2vl +* SL2VL Mapping table (IBA 7.6.6) template. (NULL == use default) +* +*********/ + +/****s* OpenSM: Subnet/osm_cct_entry_t +* NAME +* osm_cct_entry_t +* +* DESCRIPTION +* Subnet Congestion Control Table entry. See A10.2.2.1.1 for format details. +* +* SYNOPSIS +*/ +typedef struct osm_cct_entry { + uint8_t shift; //Alex: shift 2 bits + uint16_t multiplier; //Alex multiplier 14 bits +} osm_cct_entry_t; +/* +* FIELDS +* +* shift +* shift field in CCT entry. See A10.2.2.1.1. +* +* multiplier +* multiplier field in CCT entry. See A10.2.2.1.1. +* +*********/ + +/****s* OpenSM: Subnet/osm_cacongestion_entry_t +* NAME +* osm_cacongestion_entry_t +* +* DESCRIPTION +* Subnet CA Congestion entry. See A10.4.3.8.4 for format details. +* +* SYNOPSIS +*/ +typedef struct osm_cacongestion_entry { + ib_net16_t ccti_timer; //Alex: ccti_timer and ccti_increase should be replaced + uint8_t ccti_increase; + uint8_t trigger_threshold; + uint8_t ccti_min; +} osm_cacongestion_entry_t; +/* +* FIELDS +* +* ccti_timer +* CCTI Timer +* +* ccti_increase +* CCTI Increase +* +* trigger_threshold +* CCTI trigger for log message +* +* ccti_min +* CCTI Minimum +* +*********/ + +/****s* OpenSM: Subnet/osm_cct_t +* NAME +* osm_cct_t +* +* DESCRIPTION +* Subnet CongestionControlTable. See A10.4.3.9 for format details. +* +* SYNOPSIS +*/ +typedef struct osm_cct { + osm_cct_entry_t entries[OSM_CCT_ENTRY_MAX]; + unsigned int entries_len; + char *input_str; +} osm_cct_t; +/* +* FIELDS +* +* entries +* Entries in CCT +* +* entries_len +* Length of entries +* +* input_str +* Original str input +* +*********/ + + +/****s* OpenSM: Subnet/osm_subn_opt_t +* NAME +* osm_subn_opt_t +* +* DESCRIPTION +* Subnet options structure. This structure contains the various +* site specific configuration parameters for the subnet. +* +* SYNOPSIS +*/ +typedef struct osm_subn_opt { + const char *config_file; + ib_net64_t guid; + ib_net64_t m_key; + ib_net64_t sm_key; + ib_net64_t sa_key; + ib_net64_t subnet_prefix; + ib_net16_t m_key_lease_period; + uint8_t m_key_protect_bits; + boolean_t m_key_lookup; + uint32_t sweep_interval; + uint32_t max_wire_smps; + uint32_t max_wire_smps2; + uint32_t max_smps_timeout; + uint32_t transaction_timeout; + uint32_t transaction_retries; + uint32_t long_transaction_timeout; + uint8_t sm_priority; + uint8_t lmc; + boolean_t lmc_esp0; + uint8_t max_op_vls; + uint8_t force_link_speed; + uint8_t force_link_speed_ext; + uint8_t force_link_width; + uint8_t fdr10; + boolean_t reassign_lids; + boolean_t ignore_other_sm; + boolean_t single_thread; + boolean_t disable_multicast; + boolean_t force_log_flush; + uint8_t subnet_timeout; + uint8_t packet_life_time; + uint8_t vl_stall_count; + uint8_t leaf_vl_stall_count; + uint8_t head_of_queue_lifetime; + uint8_t leaf_head_of_queue_lifetime; + uint8_t local_phy_errors_threshold; + uint8_t overrun_errors_threshold; + boolean_t use_mfttop; + uint32_t sminfo_polling_timeout; + uint32_t polling_retry_number; + uint32_t max_msg_fifo_timeout; + boolean_t force_heavy_sweep; + uint8_t log_flags; + char *dump_files_dir; + char *log_file; + uint32_t log_max_size; + char *partition_config_file; + boolean_t no_partition_enforcement; + char *part_enforce; + osm_partition_enforce_type_enum part_enforce_enum; + boolean_t allow_both_pkeys; + boolean_t keep_pkey_indexes; + uint8_t sm_assigned_guid; + boolean_t qos; + char *qos_policy_file; + boolean_t suppress_sl2vl_mad_status_errors; + boolean_t accum_log_file; + char *console; + uint16_t console_port; + char *port_prof_ignore_file; + char *hop_weights_file; + char *port_search_ordering_file; + boolean_t port_profile_switch_nodes; + boolean_t sweep_on_trap; + char *routing_engine_names; + boolean_t avoid_throttled_links; + boolean_t use_ucast_cache; + boolean_t connect_roots; + char *lid_matrix_dump_file; + char *lfts_file; + char *root_guid_file; + char *cn_guid_file; + char *io_guid_file; + boolean_t port_shifting; + uint32_t scatter_ports; + uint16_t max_reverse_hops; + char *ids_guid_file; + char *guid_routing_order_file; + boolean_t guid_routing_order_no_scatter; + char *sa_db_file; + boolean_t sa_db_dump; + char *torus_conf_file; + boolean_t do_mesh_analysis; + boolean_t exit_on_fatal; + boolean_t honor_guid2lid_file; + boolean_t daemon; + boolean_t sm_inactive; + boolean_t babbling_port_policy; + boolean_t drop_event_subscriptions; + boolean_t ipoib_mcgroup_creation_validation; + boolean_t mcgroup_join_validation; + boolean_t use_original_extended_sa_rates_only; + boolean_t use_optimized_slvl; + boolean_t fsync_high_avail_files; + osm_qos_options_t qos_options; + osm_qos_options_t qos_ca_options; + osm_qos_options_t qos_sw0_options; + osm_qos_options_t qos_swe_options; + osm_qos_options_t qos_rtr_options; + boolean_t congestion_control; + ib_net64_t cc_key; + uint32_t cc_max_outstanding_mads; + ib_net32_t cc_sw_cong_setting_control_map; + uint8_t cc_sw_cong_setting_victim_mask[IB_CC_PORT_MASK_DATA_SIZE]; + uint8_t cc_sw_cong_setting_credit_mask[IB_CC_PORT_MASK_DATA_SIZE]; + uint8_t cc_sw_cong_setting_threshold; + uint8_t cc_sw_cong_setting_packet_size; + uint8_t cc_sw_cong_setting_credit_starvation_threshold; + osm_cct_entry_t cc_sw_cong_setting_credit_starvation_return_delay; + ib_net16_t cc_sw_cong_setting_marking_rate; + ib_net16_t cc_ca_cong_setting_port_control; + ib_net16_t cc_ca_cong_setting_control_map; + osm_cacongestion_entry_t cc_ca_cong_entries[IB_CA_CONG_ENTRY_DATA_SIZE]; + osm_cct_t cc_cct; + boolean_t enable_quirks; + boolean_t no_clients_rereg; +#ifdef ENABLE_OSM_PERF_MGR + boolean_t perfmgr; + boolean_t perfmgr_redir; + uint16_t perfmgr_sweep_time_s; + uint32_t perfmgr_max_outstanding_queries; + boolean_t perfmgr_ignore_cas; + char *event_db_dump_file; + int perfmgr_rm_nodes; + boolean_t perfmgr_log_errors; + boolean_t perfmgr_query_cpi; + boolean_t perfmgr_xmit_wait_log; + uint32_t perfmgr_xmit_wait_threshold; +#endif /* ENABLE_OSM_PERF_MGR */ + char *event_plugin_name; + char *event_plugin_options; + char *node_name_map_name; + char *prefix_routes_file; + char *log_prefix; + boolean_t consolidate_ipv6_snm_req; + struct osm_subn_opt *file_opts; /* used for update */ + uint8_t lash_start_vl; /* starting vl to use in lash */ + uint8_t sm_sl; /* which SL to use for SM/SA communication */ + uint8_t nue_max_num_vls; /* maximum #VLs to use in nue */ + boolean_t nue_include_switches; /* control how nue treats switches */ + char *per_module_logging_file; + boolean_t quasi_ftree_indexing; +} osm_subn_opt_t; +/* +* FIELDS +* +* config_file +* The name of the config file. +* +* guid +* The port guid that the SM is binding to. +* +* m_key +* M_Key value sent to all ports qualifying all Set(PortInfo). +* +* sm_key +* SM_Key value of the SM used for SM authentication. +* +* sa_key +* SM_Key value to qualify rcv SA queries as "trusted". +* +* subnet_prefix +* Subnet prefix used on this subnet. +* +* m_key_lease_period +* The lease period used for the M_Key on this subnet. +* +* sweep_interval +* The number of seconds between subnet sweeps. A value of 0 +* disables sweeping. +* +* max_wire_smps +* The maximum number of SMPs sent in parallel. Default is 4. +* +* max_wire_smps2 +* The maximum number of timeout SMPs allowed to be outstanding. +* Default is same as max_wire_smps which disables the timeout +* mechanism. +* +* max_smps_timeout +* The wait time in usec for timeout based SMPs. Default is +* timeout * retries. +* +* transaction_timeout +* The maximum time in milliseconds allowed for a transaction +* to complete. Default is 200. +* +* transaction_retries +* The number of retries for a transaction. Default is 3. +* +* long_transaction_timeout +* The maximum time in milliseconds allowed for "long" transaction +* to complete. Default is 500. +* +* sm_priority +* The priority of this SM as specified by the user. This +* value is made available in the SMInfo attribute. +* +* lmc +* The LMC value used on this subnet. +* +* lmc_esp0 +* Whether LMC value used on subnet should be used for +* enhanced switch port 0 or not. If TRUE, it is used. +* Otherwise (the default), LMC is set to 0 for ESP0. +* +* max_op_vls +* Limit the maximal operational VLs. default is 1. +* +* reassign_lids +* If TRUE cause all lids to be re-assigend. +* Otherwise (the default), +* OpenSM always tries to preserve as LIDs as much as possible. +* +* ignore_other_sm_option +* This flag is TRUE if other SMs on the subnet should be ignored. +* +* disable_multicast +* This flag is TRUE if OpenSM should disable multicast support. +* +* max_msg_fifo_timeout +* The maximal time a message can stay in the incoming message +* queue. If there is more than one message in the queue and the +* last message stayed in the queue more than this value the SA +* request will be immediately returned with a BUSY status. +* +* subnet_timeout +* The subnet_timeout that will be set for all the ports in the +* design SubnSet(PortInfo.vl_stall_life)) +* +* vl_stall_count +* The number of sequential packets dropped that cause the port +* to enter the VLStalled state. +* +* leaf_vl_stall_count +* The number of sequential packets dropped that cause the port +* to enter the VLStalled state. This is for switch ports driving +* a CA or router port. +* +* head_of_queue_lifetime +* The maximal time a packet can live at the head of a VL queue +* on any port not driving a CA or router port. +* +* leaf_head_of_queue_lifetime +* The maximal time a packet can live at the head of a VL queue +* on switch ports driving a CA or router. +* +* local_phy_errors_threshold +* Threshold of local phy errors for sending Trap 129 +* +* overrun_errors_threshold +* Threshold of credits overrun errors for sending Trap 129 +* +* sminfo_polling_timeout +* Specifies the polling timeout (in milliseconds) - the timeout +* between one poll to another. +* +* packet_life_time +* The maximal time a packet can stay in a switch. +* The value is send to all switches as +* SubnSet(SwitchInfo.life_state) +* +* dump_files_dir +* The directory to be used for opensm-subnet.lst, opensm.fdbs, +* opensm.mcfdbs, and default log file (the latter for Windows, +* not Linux). +* +* log_file +* Name of the log file (or NULL) for stdout. +* +* log_max_size +* This option defines maximal log file size in MB. When +* specified the log file will be truncated upon reaching +* this limit. +* +* qos +* Boolean that specifies whether the OpenSM QoS functionality +* should be off or on. +* +* qos_policy_file +* Name of the QoS policy file. +* +* accum_log_file +* If TRUE (default) - the log file will be accumulated. +* If FALSE - the log file will be erased before starting +* current opensm run. +* +* port_prof_ignore_file +* Name of file with port guids to be ignored by port profiling. +* +* port_profile_switch_nodes +* If TRUE will count the number of switch nodes routed through +* the link. If FALSE - only CA/RT nodes are counted. +* +* sweep_on_trap +* Received traps will initiate a new sweep. +* +* routing_engine_names +* Name of routing engine(s) to use. +* +* avoid_throttled_links +* This option will enforce that throttled switch-to-switch links +* in the fabric are treated as 'broken' by the routing engines +* (if they support it), and hence no path is assigned to these +* underperforming links and a warning is logged instead. +* +* connect_roots +* The option which will enforce root to root connectivity with +* up/down and fat-tree routing engines (even if this violates +* "pure" deadlock free up/down or fat-tree algorithm) +* +* use_ucast_cache +* When TRUE enables unicast routing cache. +* +* lid_matrix_dump_file +* Name of the lid matrix dump file from where switch +* lid matrices (min hops tables) will be loaded +* +* lfts_file +* Name of the unicast LFTs routing file from where switch +* forwarding tables will be loaded +* +* root_guid_file +* Name of the file that contains list of root guids that +* will be used by fat-tree or up/dn routing (provided by User) +* +* cn_guid_file +* Name of the file that contains list of compute node guids that +* will be used by fat-tree routing (provided by User) +* +* io_guid_file +* Name of the file that contains list of I/O node guids that +* will be used by fat-tree routing (provided by User) +* +* port_shifting +* This option will turn on port_shifting in routing. +* +* ids_guid_file +* Name of the file that contains list of ids which should be +* used by Up/Down algorithm instead of node GUIDs +* +* guid_routing_order_file +* Name of the file that contains list of guids for routing order +* that will be used by minhop and up/dn routing (provided by User). +* +* sa_db_file +* Name of the SA database file. +* +* sa_db_dump +* When TRUE causes OpenSM to dump SA DB at the end of every +* light sweep regardless the current verbosity level. +* +* torus_conf_file +* Name of the file with extra configuration info for torus-2QoS +* routing engine. +* +* exit_on_fatal +* If TRUE (default) - SM will exit on fatal subnet initialization +* issues. +* If FALSE - SM will not exit. +* Fatal initialization issues: +* a. SM recognizes 2 different nodes with the same guid, or +* 12x link with lane reversal badly configured. +* +* honor_guid2lid_file +* Always honor the guid2lid file if it exists and is valid. This +* means that the file will be honored when SM is coming out of +* STANDBY. By default this is FALSE. +* +* daemon +* OpenSM will run in daemon mode. +* +* sm_inactive +* OpenSM will start with SM in not active state. +* +* babbling_port_policy +* OpenSM will enforce its "babbling" port policy. +* +* drop_event_subscriptions +* OpenSM will drop event subscriptions if the port goes away. +* +* ipoib_mcgroup_creation_validation +* OpenSM will validate IPoIB non-broadcast group parameters +* against IPoIB broadcast group. +* +* mcgroup_join_validation +* OpenSM will validate multicast join parameters against +* multicast group parameters when MC group already exists. +* +* use_original_extended_sa_rates_only +* Use only original extended SA rates (up through 300 Gbps +* for 12x EDR). Option is needed for subnets with +* old kernels/drivers that don't understand the +* new SA rates for 2x link width and/or HDR link speed (19-22). +* +* use_optimized_slvl +* Use optimized SLtoVLMappingTable programming if +* device indicates it supports this. +* +* fsync_high_avail_files +* Synchronize high availability in memory files +* with storage. +* +* perfmgr +* Enable or disable the performance manager +* +* perfmgr_redir +* Enable or disable the saving of redirection by PerfMgr +* +* perfmgr_sweep_time_s +* Define the period (in seconds) of PerfMgr sweeps +* +* event_db_dump_file +* File to dump the event database to +* +* event_plugin_name +* Specify the name(s) of the event plugin(s) +* +* event_plugin_options +* Options string that would be passed to the plugin(s) +* +* qos_options +* Default set of QoS options +* +* qos_ca_options +* QoS options for CA ports +* +* qos_sw0_options +* QoS options for switches' port 0 +* +* qos_swe_options +* QoS options for switches' external ports +* +* qos_rtr_options +* QoS options for router ports +* +* congestion_control +* Boolean that specifies whether OpenSM congestion control configuration +* should be off or no. +* +* cc_key +* CCkey to use when configuring congestion control. +* +* cc_max_outstanding_mads +* Max number of outstanding CC mads that can be on the wire. +* +* cc_sw_cong_setting_control_map +* Congestion Control Switch Congestion Setting Control Map +* configuration setting. +* +* cc_sw_cong_setting_victim_mask +* Congestion Control Switch Congestion Setting Victim Mask +* configuration setting. +* +* cc_sw_cong_setting_credit_mask +* Congestion Control Switch Congestion Setting Credit Mask +* configuration setting. +* +* cc_sw_cong_setting_threshold +* Congestion Control Switch Congestion Setting Threshold +* configuration setting. +* +* cc_sw_cong_setting_packet_size +* Congestion Control Switch Congestion Setting Packet Size +* configuration setting. +* +* cc_sw_cong_setting_credit_starvation_threshold +* Congestion Control Switch Congestion Setting Credit Starvation Threshold +* configuration setting. +* +* cc_sw_cong_setting_credit_starvation_return_delay +* Congestion Control Switch Congestion Setting Credit Starvation Return Delay +* configuration setting. +* +* cc_sw_cong_setting_marking_rate +* Congestion Control Switch Congestion Setting Marking Rate +* configuration setting. +* +* cc_ca_cong_setting_port_control +* Congestion Control CA Congestion Setting Port Control +* +* cc_ca_cong_setting_control_map +* Congestion Control CA Congestion Setting Control Map + +* cc_ca_cong_entries +* Congestion Control CA Congestion Setting Entries +* +* cc_cct +* Congestion Control Table array of entries +* +* enable_quirks +* Enable high risk new features and not fully qualified +* hardware specific work arounds +* +* no_clients_rereg +* When TRUE disables clients reregistration request +* +* scatter_ports +* When not zero, randomize best possible ports chosen +* for a route. The value is used as a random key seed. +* +* per_module_logging_file +* File name of per module logging configuration. +* +* SEE ALSO +* Subnet object +*********/ + +/****s* OpenSM: Subnet/osm_subn_t +* NAME +* osm_subn_t +* +* DESCRIPTION +* Subnet structure. Callers may directly access member components, +* after grabbing a lock. +* +* TO DO +* This structure should probably be volatile. +* +* SYNOPSIS +*/ +typedef struct osm_subn { + struct osm_opensm *p_osm; + cl_qmap_t sw_guid_tbl; + cl_qmap_t node_guid_tbl; + cl_qmap_t port_guid_tbl; + cl_qmap_t alias_port_guid_tbl; + cl_qmap_t assigned_guids_tbl; + cl_qmap_t rtr_guid_tbl; + cl_qlist_t prefix_routes_list; + cl_qmap_t prtn_pkey_tbl; + cl_qmap_t sm_guid_tbl; + cl_qlist_t sa_sr_list; + cl_qlist_t sa_infr_list; + cl_qlist_t alias_guid_list; + cl_ptr_vector_t port_lid_tbl; + ib_net16_t master_sm_base_lid; + ib_net16_t sm_base_lid; + ib_net64_t sm_port_guid; + uint8_t last_sm_port_state; + uint8_t sm_state; + osm_subn_opt_t opt; + struct osm_qos_policy *p_qos_policy; + uint16_t max_ucast_lid_ho; + uint16_t max_mcast_lid_ho; + uint8_t min_ca_mtu; + uint8_t min_ca_rate; + uint8_t min_data_vls; + uint8_t min_sw_data_vls; + boolean_t ignore_existing_lfts; + boolean_t subnet_initialization_error; + boolean_t force_heavy_sweep; + boolean_t force_reroute; + boolean_t in_sweep_hop_0; + boolean_t force_first_time_master_sweep; + boolean_t first_time_master_sweep; + boolean_t coming_out_of_standby; + boolean_t sweeping_enabled; + unsigned need_update; + cl_fmap_t mgrp_mgid_tbl; + osm_db_domain_t *p_g2m; + osm_db_domain_t *p_neighbor; + void *mboxes[IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + 1]; +} osm_subn_t; +/* +* FIELDS +* sw_guid_tbl +* Container of pointers to all Switch objects in the subnet. +* Indexed by node GUID. +* +* node_guid_tbl +* Container of pointers to all Node objects in the subnet. +* Indexed by node GUID. +* +* port_guid_tbl +* Container of pointers to all Port objects in the subnet. +* Indexed by port GUID. +* +* rtr_guid_tbl +* Container of pointers to all Router objects in the subnet. +* Indexed by node GUID. +* +* prtn_pkey_tbl +* Container of pointers to all Partition objects in the subnet. +* Indexed by P_KEY. +* +* sm_guid_tbl +* Container of pointers to SM objects representing other SMs +* on the subnet. +* +* port_lid_tbl +* Container of pointers to all Port objects in the subnet. +* Indexed by port LID. +* +* master_sm_base_lid +* The base LID owned by the subnet's master SM. +* +* sm_base_lid +* The base LID of the local port where the SM is. +* +* sm_port_guid +* This SM's own port GUID. +* +* last_sm_port_state +* Last state of this SM's port. +* 0 is down and 1 is up. +* +* sm_state +* The high-level state of the SM. This value is made available +* in the SMInfo attribute. +* +* opt +* Subnet options structure contains site specific configuration. +* +* p_qos_policy +* Subnet QoS policy structure. +* +* max_ucast_lid_ho +* The minimal max unicast lid reported by all switches +* +* max_mcast_lid_ho +* The minimal max multicast lid reported by all switches +* +* min_ca_mtu +* The minimal MTU reported by all CAs ports on the subnet +* +* min_ca_rate +* The minimal rate reported by all CA ports on the subnet +* +* ignore_existing_lfts +* This flag is a dynamic flag to instruct the LFT assignment to +* ignore existing legal LFT settings. +* The value will be set according to : +* - Any change to the list of switches will set it to high +* - Coming out of STANDBY it will be cleared (other SM worked) +* - Set to FALSE upon end of all lft assignments. +* +* subnet_initalization_error +* Similar to the force_heavy_sweep flag. If TRUE - means that +* we had errors during initialization (due to SubnSet requests +* that failed). We want to declare the subnet as unhealthy, and +* force another heavy sweep. +* +* force_heavy_sweep +* If TRUE - we want to force a heavy sweep. This can be done +* either due to receiving of trap - meaning there is some change +* on the subnet, or we received a handover from a remote sm. +* In this case we want to sweep and reconfigure the entire +* subnet. This will cause another heavy sweep to occure when +* the current sweep is done. +* +* force_reroute +* If TRUE - we want to force switches in the fabric to be +* rerouted. +* +* in_sweep_hop_0 +* When in_sweep_hop_0 flag is set to TRUE - this means we are +* in sweep_hop_0 - meaning we do not want to continue beyond +* the current node. +* This is relevant for the case of SM on switch, since in the +* switch info we need to signal somehow not to continue +* the sweeping. +* +* force_first_time_master_sweep +* This flag is used to avoid race condition when Master SM being +* in the middle of very long configuration stage of the heavy sweep, +* receives HANDOVER from another MASTER SM. When the current heavy sweep +* is finished, new heavy sweep will be started immediately. +* At the beginning of the sweep, opensm will set first_time_master_sweep, +* force_heavy_sweep and coming_out_of_standby flags in order to allow full +* reconfiguration of the fabric. This is required as another MASTER SM could +* change configuration of the fabric before sending HANDOVER to MASTER SM. +* +* first_time_master_sweep +* This flag is used for the PortInfo setting. On the first +* sweep as master (meaning after moving from Standby|Discovering +* state), the SM must send a PortInfoSet to all ports. After +* that - we want to minimize the number of PortInfoSet requests +* sent, and to send only requests that change the value from +* what is updated in the port (or send a first request if this +* is a new port). We will set this flag to TRUE when entering +* the master state, and set it back to FALSE at the end of the +* drop manager. This is done since at the end of the drop manager +* we have updated all the ports that are reachable, and from now +* on these are the only ports we have data of. We don't want +* to send extra set requests to these ports anymore. +* +* coming_out_of_standby +* TRUE on the first sweep after the SM was in standby. +* Used for nulling any cache of LID and Routing. +* The flag is set true if the SM state was standby and now +* changed to MASTER it is reset at the end of the sweep. +* +* sweeping_enabled +* FALSE - sweeping is administratively disabled, all +* sweeping is inhibited, TRUE - sweeping is done +* normally +* +* need_update +* This flag should be on during first non-master heavy +* (including pre-master discovery stage) +* +* mgrp_mgid_tbl +* Container of pointers to all Multicast group objects in +* the subnet. Indexed by MGID. +* +* mboxes +* Array of pointers to all Multicast MLID box objects in the +* subnet. Indexed by MLID offset from base MLID. +* +* SEE ALSO +* Subnet object +*********/ + +/****s* OpenSM: Subnet/osm_assigned_guids_t +* NAME +* osm_assigned_guids_t +* +* DESCRIPTION +* SA assigned GUIDs structure. +* +* SYNOPSIS +*/ +typedef struct osm_assigned_guids { + cl_map_item_t map_item; + ib_net64_t port_guid; + ib_net64_t assigned_guid[1]; +} osm_assigned_guids_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* port_guid +* Base port GUID. +* +* assigned_guids +* Table of persistent SA assigned GUIDs. +* +* SEE ALSO +* Subnet object +*********/ + +/****f* OpenSM: Subnet/osm_subn_construct +* NAME +* osm_subn_construct +* +* DESCRIPTION +* This function constructs a Subnet object. +* +* SYNOPSIS +*/ +void osm_subn_construct(IN osm_subn_t * p_subn); +/* +* PARAMETERS +* p_subn +* [in] Pointer to a Subnet object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_subn_init, and osm_subn_destroy. +* +* Calling osm_subn_construct is a prerequisite to calling any other +* method except osm_subn_init. +* +* SEE ALSO +* Subnet object, osm_subn_init, osm_subn_destroy +*********/ + +/****f* OpenSM: Subnet/osm_subn_destroy +* NAME +* osm_subn_destroy +* +* DESCRIPTION +* The osm_subn_destroy function destroys a subnet, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_subn_destroy(IN osm_subn_t * p_subn); +/* +* PARAMETERS +* p_subn +* [in] Pointer to a Subnet object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified Subnet object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_subn_construct +* or osm_subn_init. +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_init +*********/ + +/****f* OpenSM: Subnet/osm_subn_init +* NAME +* osm_subn_init +* +* DESCRIPTION +* The osm_subn_init function initializes a Subnet object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_subn_init(IN osm_subn_t * p_subn, + IN struct osm_opensm *p_osm, + IN const osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object to initialize. +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* IB_SUCCESS if the Subnet object was initialized successfully. +* +* NOTES +* Allows calling other Subnet methods. +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_destroy +*********/ + +/* + Forward references. +*/ +struct osm_mad_addr; +struct osm_log; +struct osm_switch; +struct osm_physp; +struct osm_port; +struct osm_mgrp; + +/****f* OpenSM: Helper/osm_get_gid_by_mad_addr +* NAME +* osm_get_gid_by_mad_addr +* +* DESCRIPTION +* Looks for the requester gid in the mad address. +* +* Note: This code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +ib_api_status_t osm_get_gid_by_mad_addr(IN struct osm_log *p_log, + IN const osm_subn_t * p_subn, + IN struct osm_mad_addr *p_mad_addr, + OUT ib_gid_t * p_gid); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_subn +* [in] Pointer to subnet object. +* +* p_mad_addr +* [in] Pointer to mad address object. +* +* p_gid +* [out] Pointer to the GID structure to fill in. +* +* RETURN VALUES +* IB_SUCCESS if able to find the GID by address given. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/osm_get_physp_by_mad_addr +* NAME +* osm_get_physp_by_mad_addr +* +* DESCRIPTION +* Looks for the requester physical port in the mad address. +* +* Note: This code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_physp *osm_get_physp_by_mad_addr(IN struct osm_log *p_log, + IN const osm_subn_t * p_subn, + IN struct osm_mad_addr + *p_mad_addr); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_subn +* [in] Pointer to subnet object. +* +* p_mad_addr +* [in] Pointer to mad address object. +* +* RETURN VALUES +* Pointer to requester physical port object if found. Null otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Helper/osm_get_port_by_mad_addr +* NAME +* osm_get_port_by_mad_addr +* +* DESCRIPTION +* Looks for the requester port in the mad address. +* +* Note: This code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_port *osm_get_port_by_mad_addr(IN struct osm_log *p_log, + IN const osm_subn_t * p_subn, + IN struct osm_mad_addr *p_mad_addr); +/* +* PARAMETERS +* p_log +* [in] Pointer to a log object. +* +* p_subn +* [in] Pointer to subnet object. +* +* p_mad_addr +* [in] Pointer to mad address object. +* +* RETURN VALUES +* Pointer to requester port object if found. Null otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Subnet/osm_get_switch_by_guid +* NAME +* osm_get_switch_by_guid +* +* DESCRIPTION +* Looks for the given switch guid in the subnet table of switches by guid. +* NOTE: this code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_switch *osm_get_switch_by_guid(IN const osm_subn_t * p_subn, + IN ib_net64_t guid); +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object +* +* guid +* [in] The node guid in network byte order +* +* RETURN VALUES +* The switch structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_destroy, +* osm_switch_t +*********/ + +/****f* OpenSM: Subnet/osm_get_node_by_guid +* NAME +* osm_get_node_by_guid +* +* DESCRIPTION +* This looks for the given node guid in the subnet table of nodes by guid. +* NOTE: this code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_node *osm_get_node_by_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t guid); +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object +* +* guid +* [in] The node guid in network byte order +* +* RETURN VALUES +* The node structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_destroy, +* osm_node_t +*********/ + +/****f* OpenSM: Subnet/osm_get_port_by_guid +* NAME +* osm_get_port_by_guid +* +* DESCRIPTION +* This looks for the given port guid in the subnet table of ports by guid. +* NOTE: this code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_port *osm_get_port_by_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t guid); +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object +* +* guid +* [in] The port guid in network order +* +* RETURN VALUES +* The port structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_destroy, +* osm_port_t +*********/ + +/****f* OpenSM: Port/osm_get_port_by_lid_ho +* NAME +* osm_get_port_by_lid_ho +* +* DESCRIPTION +* Returns a pointer of the port object for given lid value. +* +* SYNOPSIS +*/ +struct osm_port *osm_get_port_by_lid_ho(const osm_subn_t * subn, uint16_t lid); +/* +* PARAMETERS +* subn +* [in] Pointer to the subnet data structure. +* +* lid +* [in] LID requested in host byte order. +* +* RETURN VALUES +* The port structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_port_t +*********/ + +/****f* OpenSM: Subnet/osm_get_alias_guid_by_guid +* NAME +* osm_get_alias_guid_by_guid +* +* DESCRIPTION +* This looks for the given port guid in the subnet table of ports by +* alias guid. +* NOTE: this code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_alias_guid *osm_get_alias_guid_by_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t guid); +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object +* +* guid +* [in] The alias port guid in network order +* +* RETURN VALUES +* The alias guid structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_destroy, +* osm_alias_guid_t +*********/ + +/****f* OpenSM: Subnet/osm_get_port_by_alias_guid +* NAME +* osm_get_port_by_alias_guid +* +* DESCRIPTION +* This looks for the given port guid in the subnet table of ports by +* alias guid. +* NOTE: this code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_port *osm_get_port_by_alias_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t guid); +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object +* +* guid +* [in] The alias port guid in network order +* +* RETURN VALUES +* The port structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_destroy, +* osm_port_t +*********/ + +/****f* OpenSM: Port/osm_assigned_guids_new +* NAME +* osm_assigned_guids_new +* +* DESCRIPTION +* This function allocates and initializes an assigned guids object. +* +* SYNOPSIS +*/ +osm_assigned_guids_t *osm_assigned_guids_new(IN const ib_net64_t port_guid, + IN const uint32_t num_guids); +/* +* PARAMETERS +* port_guid +* [in] Base port GUID in network order +* +* RETURN VALUE +* Pointer to the initialized assigned alias guid object. +* +* SEE ALSO +* Subnet object, osm_assigned_guids_t, osm_assigned_guids_delete, +* osm_get_assigned_guids_by_guid +*********/ + +/****f* OpenSM: Port/osm_assigned_guids_delete +* NAME +* osm_assigned_guids_delete +* +* DESCRIPTION +* This function destroys and deallocates an assigned guids object. +* +* SYNOPSIS +*/ +void osm_assigned_guids_delete(IN OUT osm_assigned_guids_t ** pp_assigned_guids); +/* +* PARAMETERS +* pp_assigned_guids +* [in][out] Pointer to a pointer to an assigned guids object to delete. +* On return, this pointer is NULL. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified assigned guids object. +* +* SEE ALSO +* Subnet object, osm_assigned_guids_new, osm_get_assigned_guids_by_guid +*********/ + +/****f* OpenSM: Subnet/osm_get_assigned_guids_by_guid +* NAME +* osm_get_assigned_guids_by_guid +* +* DESCRIPTION +* This looks for the given port guid and returns a pointer +* to the guid table of SA assigned alias guids for that port. +* +* SYNOPSIS +*/ +osm_assigned_guids_t *osm_get_assigned_guids_by_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object +* +* port_guid +* [in] The base port guid in network order +* +* RETURN VALUES +* The osm_assigned_guids structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_assigned_guids_new, osm_assigned_guids_delete, +* osm_assigned_guids_t +*********/ + +/****f* OpenSM: Port/osm_get_port_by_lid +* NAME +* osm_get_port_by_lid +* +* DESCRIPTION +* Returns a pointer of the port object for given lid value. +* +* SYNOPSIS +*/ +static inline struct osm_port *osm_get_port_by_lid(IN osm_subn_t const * subn, + IN ib_net16_t lid) +{ + return osm_get_port_by_lid_ho(subn, cl_ntoh16(lid)); +} +/* +* PARAMETERS +* subn +* [in] Pointer to the subnet data structure. +* +* lid +* [in] LID requested in network byte order. +* +* RETURN VALUES +* The port structure pointer if found. NULL otherwise. +* +* SEE ALSO +* Subnet object, osm_port_t +*********/ + +/****f* OpenSM: Subnet/osm_get_mgrp_by_mgid +* NAME +* osm_get_mgrp_by_mgid +* +* DESCRIPTION +* This looks for the given multicast group in the subnet table by mgid. +* NOTE: this code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +struct osm_mgrp *osm_get_mgrp_by_mgid(IN osm_subn_t * subn, IN ib_gid_t * mgid); +/* +* PARAMETERS +* subn +* [in] Pointer to an osm_subn_t object +* +* mgid +* [in] The multicast group MGID value +* +* RETURN VALUES +* The multicast group structure pointer if found. NULL otherwise. +*********/ + +/****f* OpenSM: Subnet/osm_get_mbox_by_mlid +* NAME +* osm_get_mbox_by_mlid +* +* DESCRIPTION +* This looks for the given multicast group in the subnet table by mlid. +* NOTE: this code is not thread safe. Need to grab the lock before +* calling it. +* +* SYNOPSIS +*/ +static inline struct osm_mgrp_box *osm_get_mbox_by_mlid(osm_subn_t const *p_subn, ib_net16_t mlid) +{ + return (struct osm_mgrp_box *)p_subn->mboxes[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO]; +} +/* +* PARAMETERS +* p_subn +* [in] Pointer to an osm_subn_t object +* +* mlid +* [in] The multicast group mlid in network order +* +* RETURN VALUES +* The multicast group structure pointer if found. NULL otherwise. +*********/ + +int is_mlnx_ext_port_info_supported(ib_net32_t vendid, ib_net16_t devid); + +/****f* OpenSM: Subnet/osm_subn_set_default_opt +* NAME +* osm_subn_set_default_opt +* +* DESCRIPTION +* The osm_subn_set_default_opt function sets the default options. +* +* SYNOPSIS +*/ +void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +* Subnet object, osm_subn_construct, osm_subn_destroy +*********/ + +/****f* OpenSM: Subnet/osm_subn_parse_conf_file +* NAME +* osm_subn_parse_conf_file +* +* DESCRIPTION +* The osm_subn_parse_conf_file function parses the configuration file +* and sets the defaults accordingly. +* +* SYNOPSIS +*/ +int osm_subn_parse_conf_file(const char *conf_file, osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* 0 on success, positive value if file doesn't exist, +* negative value otherwise +*********/ + +/****f* OpenSM: Subnet/osm_subn_rescan_conf_files +* NAME +* osm_subn_rescan_conf_files +* +* DESCRIPTION +* The osm_subn_rescan_conf_files function parses the configuration +* files and update selected subnet options +* +* SYNOPSIS +*/ +int osm_subn_rescan_conf_files(IN osm_subn_t * p_subn); +/* +* PARAMETERS +* +* p_subn +* [in] Pointer to the subnet structure. +* +* RETURN VALUES +* 0 on success, positive value if file doesn't exist, +* negative value otherwise +* +*********/ + +/****f* OpenSM: Subnet/osm_subn_output_conf +* NAME +* osm_subn_output_conf +* +* DESCRIPTION +* Output configuration info +* +* SYNOPSIS +*/ +void osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* +* out +* [in] File stream to output to. +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* This method does not return a value +*********/ + +/****f* OpenSM: Subnet/osm_subn_write_conf_file +* NAME +* osm_subn_write_conf_file +* +* DESCRIPTION +* Write the configuration file into the cache +* +* SYNOPSIS +*/ +int osm_subn_write_conf_file(char *file_name, IN osm_subn_opt_t * p_opt); +/* +* PARAMETERS +* +* p_opt +* [in] Pointer to the subnet options structure. +* +* RETURN VALUES +* 0 on success, negative value otherwise +* +* NOTES +* Assumes the conf file is part of the cache dir which defaults to +* OSM_DEFAULT_CACHE_DIR or OSM_CACHE_DIR the name is opensm.opts +*********/ +int osm_subn_verify_config(osm_subn_opt_t * p_opt); + +END_C_DECLS +#endif /* _OSM_SUBNET_H_ */ diff --git a/include/opensm/osm_switch.h b/include/opensm/osm_switch.h new file mode 100644 index 0000000..72bda06 --- /dev/null +++ b/include/opensm/osm_switch.h @@ -0,0 +1,1189 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_switch_t. + * This object represents an IBA switch. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_SWITCH_H_ +#define _OSM_SWITCH_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Switch +* NAME +* Switch +* +* DESCRIPTION +* The Switch object encapsulates the information needed by the +* OpenSM to manage switches. The OpenSM allocates one switch object +* per switch in the IBA subnet. +* +* The Switch object is not thread safe, thus callers must provide +* serialization. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Switch/osm_switch_t +* NAME +* osm_switch_t +* +* DESCRIPTION +* Switch structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_switch { + cl_map_item_t map_item; + osm_node_t *p_node; + ib_switch_info_t switch_info; + uint16_t max_lid_ho; + uint8_t num_ports; + uint16_t num_hops; + uint8_t **hops; + osm_port_profile_t *p_prof; + uint8_t *search_ordering_ports; + uint8_t *lft; + uint8_t *new_lft; + uint16_t lft_size; + osm_mcast_tbl_t mcast_tbl; + int32_t mft_block_num; + uint32_t mft_position; + unsigned endport_links; + unsigned need_update; + void *priv; + cl_map_item_t mgrp_item; + uint32_t num_of_mcm; + uint8_t is_mc_member; +} osm_switch_t; +/* +* FIELDS +* map_item +* Linkage structure for cl_qmap. MUST BE FIRST MEMBER! +* +* p_node +* Pointer to the Node object for this switch. +* +* switch_info +* IBA defined SwitchInfo structure for this switch. +* +* max_lid_ho +* Max LID that is accessible from this switch. +* +* num_ports +* Number of ports for this switch. +* +* num_hops +* Size of hops table for this switch. +* +* hops +* LID Matrix for this switch containing the hop count +* to every LID from every port. +* +* p_prof +* Pointer to array of Port Profile objects for this switch. +* +* lft +* This switch's linear forwarding table. +* +* new_lft +* This switch's linear forwarding table, as was +* calculated by the last routing engine execution. +* +* mcast_tbl +* Multicast forwarding table for this switch. +* +* need_update +* When set indicates that switch was probably reset, so +* fwd tables and rest cached data should be flushed +* +* mgrp_item +* map item for switch in building mcast tree +* +* num_of_mcm +* number of mcast members(ports) connected to switch +* +* is_mc_member +* whether switch is a mcast member itself +* +* SEE ALSO +* Switch object +*********/ + +/****s* OpenSM: Switch/struct osm_remote_guids_count +* NAME +* struct osm_remote_guids_count +* +* DESCRIPTION +* Stores array of pointers to remote node and the numbers of +* times a switch has forwarded to it. +* +* SYNOPSIS +*/ +struct osm_remote_guids_count { + unsigned count; + struct osm_remote_node { + osm_node_t *node; + unsigned forwarded_to; + uint8_t port; + } guids[0]; +}; +/* +* FIELDS +* count +* A number of used entries in array. +* +* node +* A pointer to node. +* +* forwarded_to +* A count of lids forwarded to this node. +* +* port +* Port number on the node. +*********/ + +/****f* OpenSM: Switch/osm_switch_delete +* NAME +* osm_switch_delete +* +* DESCRIPTION +* Destroys and deallocates the object. +* +* SYNOPSIS +*/ +void osm_switch_delete(IN OUT osm_switch_t ** pp_sw); +/* +* PARAMETERS +* p_sw +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +* Switch object, osm_switch_new +*********/ + +/****f* OpenSM: Switch/osm_switch_new +* NAME +* osm_switch_new +* +* DESCRIPTION +* The osm_switch_new function initializes a Switch object for use. +* +* SYNOPSIS +*/ +osm_switch_t *osm_switch_new(IN osm_node_t * p_node, + IN const osm_madw_t * p_madw); +/* +* PARAMETERS +* p_node +* [in] Pointer to the node object of this switch +* +* p_madw +* [in] Pointer to the MAD Wrapper containing the switch's +* SwitchInfo attribute. +* +* RETURN VALUES +* Pointer to the new initialized switch object. +* +* NOTES +* +* SEE ALSO +* Switch object, osm_switch_delete +*********/ + +/****f* OpenSM: Switch/osm_switch_get_hop_count +* NAME +* osm_switch_get_hop_count +* +* DESCRIPTION +* Returns the hop count at the specified LID/Port intersection. +* +* SYNOPSIS +*/ +static inline uint8_t osm_switch_get_hop_count(IN const osm_switch_t * p_sw, + IN uint16_t lid_ho, + IN uint8_t port_num) +{ + return (lid_ho > p_sw->max_lid_ho || !p_sw->hops[lid_ho]) ? + OSM_NO_PATH : p_sw->hops[lid_ho][port_num]; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to a Switch object. +* +* lid_ho +* [in] LID value (host order) for which to return the hop count +* +* port_num +* [in] Port number in the switch +* +* RETURN VALUES +* Returns the hop count at the specified LID/Port intersection. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_set_hops +* NAME +* osm_switch_set_hops +* +* DESCRIPTION +* Sets the hop count at the specified LID/Port intersection. +* +* SYNOPSIS +*/ +cl_status_t osm_switch_set_hops(IN osm_switch_t * p_sw, IN uint16_t lid_ho, + IN uint8_t port_num, IN uint8_t num_hops); +/* +* PARAMETERS +* p_sw +* [in] Pointer to a Switch object. +* +* lid_ho +* [in] LID value (host order) for which to set the count. +* +* port_num +* [in] port number for which to set the count. +* +* num_hops +* [in] value to assign to this entry. +* +* RETURN VALUES +* Returns 0 if successful. -1 if it failed +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_clear_hops +* NAME +* osm_switch_clear_hops +* +* DESCRIPTION +* Cleanup existing hops tables (lid matrix) +* +* SYNOPSIS +*/ +void osm_switch_clear_hops(IN osm_switch_t * p_sw); +/* +* PARAMETERS +* p_sw +* [in] Pointer to a Switch object. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_get_least_hops +* NAME +* osm_switch_get_least_hops +* +* DESCRIPTION +* Returns the number of hops in the short path to this lid from +* any port on the switch. +* +* SYNOPSIS +*/ +static inline uint8_t osm_switch_get_least_hops(IN const osm_switch_t * p_sw, + IN uint16_t lid_ho) +{ + return (lid_ho > p_sw->max_lid_ho || !p_sw->hops[lid_ho]) ? + OSM_NO_PATH : p_sw->hops[lid_ho][0]; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* lid_ho +* [in] LID (host order) for which to retrieve the shortest hop count. +* +* RETURN VALUES +* Returns the number of hops in the short path to this lid from +* any port on the switch. +* +* NOTES +* +* SEE ALSO +* Switch object +*********/ + +/****f* OpenSM: Switch/osm_switch_get_port_least_hops +* NAME +* osm_switch_get_port_least_hops +* +* DESCRIPTION +* Returns the number of hops in the short path to this port from +* any port on the switch. +* +* SYNOPSIS +*/ +uint8_t osm_switch_get_port_least_hops(IN const osm_switch_t * p_sw, + IN const osm_port_t * p_port); +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* p_port +* [in] Pointer to an osm_port_t object for which to +* retrieve the shortest hop count. +* +* RETURN VALUES +* Returns the number of hops in the short path to this lid from +* any port on the switch. +* +* NOTES +* +* SEE ALSO +* Switch object +*********/ + +/****d* OpenSM: osm_lft_type_enum +* NAME +* osm_lft_type_enum +* +* DESCRIPTION +* Enumerates LFT sets types of a switch. +* +* SYNOPSIS +*/ +typedef enum osm_lft_type_enum { + OSM_LFT = 0, + OSM_NEW_LFT +} osm_lft_type_enum; +/***********/ + +/****f* OpenSM: Switch/osm_switch_get_port_by_lid +* NAME +* osm_switch_get_port_by_lid +* +* DESCRIPTION +* Returns the switch port number on which the specified LID is routed. +* +* SYNOPSIS +*/ +static inline uint8_t osm_switch_get_port_by_lid(IN const osm_switch_t * p_sw, + IN uint16_t lid_ho, + IN osm_lft_type_enum lft_enum) +{ + if (lid_ho == 0 || lid_ho > p_sw->max_lid_ho) + return OSM_NO_PATH; + return lft_enum == OSM_LFT ? p_sw->lft[lid_ho] : p_sw->new_lft[lid_ho]; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* lid_ho +* [in] LID (host order) for which to retrieve the shortest hop count. +* +* lft_enum +* [in] Use LFT that was calculated by routing engine, or +* current LFT on the switch. +* +* RETURN VALUES +* Returns the switch port on which the specified LID is routed. +* +* NOTES +* +* SEE ALSO +* Switch object +*********/ + +/****f* OpenSM: Switch/osm_switch_get_route_by_lid +* NAME +* osm_switch_get_route_by_lid +* +* DESCRIPTION +* Gets the physical port object that routes the specified LID. +* +* SYNOPSIS +*/ +static inline osm_physp_t *osm_switch_get_route_by_lid(IN const osm_switch_t * + p_sw, IN ib_net16_t lid) +{ + uint8_t port_num; + + CL_ASSERT(p_sw); + CL_ASSERT(lid); + + port_num = osm_switch_get_port_by_lid(p_sw, cl_ntoh16(lid), + OSM_NEW_LFT); + + /* + In order to avoid holes in the subnet (usually happens when + running UPDN algorithm), i.e. cases where port is + unreachable through a switch (we put an OSM_NO_PATH value at + the port entry, we do not assert on unreachable lid entries + at the fwd table but return NULL + */ + if (port_num != OSM_NO_PATH) + return (osm_node_get_physp_ptr(p_sw->p_node, port_num)); + else + return NULL; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* lid +* [in] LID for which to find a route. This must be a unicast +* LID value < 0xC000. +* +* RETURN VALUES +* Returns a pointer to the Physical Port Object object that +* routes the specified LID. A return value of zero means +* there is no route for the lid through this switch. +* The lid value must be a unicast LID. +* +* NOTES +* +* SEE ALSO +* Switch object +*********/ + +/****f* OpenSM: Switch/osm_switch_sp0_is_lmc_capable +* NAME +* osm_switch_sp0_is_lmc_capable +* +* DESCRIPTION +* Returns whether switch port 0 (SP0) can support LMC +* +*/ +static inline unsigned +osm_switch_sp0_is_lmc_capable(IN const osm_switch_t * p_sw, + IN osm_subn_t * p_subn) +{ + return (p_subn->opt.lmc_esp0 && + ib_switch_info_is_enhanced_port0(&p_sw->switch_info)) ? 1 : 0; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* p_subn +* [in] Pointer to an osm_subn_t object. +* +* RETURN VALUES +* TRUE if SP0 is enhanced and globally enabled. FALSE otherwise. +* +* NOTES +* This is workaround function, it takes into account user defined +* p_subn->opt.lmc_esp0 parameter. +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_get_max_block_id_in_use +* NAME +* osm_switch_get_max_block_id_in_use +* +* DESCRIPTION +* Returns the maximum block ID (host order) of this switch that +* is used for unicast routing. +* +* SYNOPSIS +*/ +static inline uint16_t +osm_switch_get_max_block_id_in_use(IN const osm_switch_t * p_sw) +{ + return cl_ntoh16(p_sw->switch_info.lin_top) / IB_SMP_DATA_SIZE; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* RETURN VALUES +* Returns the maximum block ID (host order) of this switch. +* +* NOTES +* +* SEE ALSO +* Switch object +*********/ + +/****f* OpenSM: Switch/osm_switch_get_lft_block +* NAME +* osm_switch_get_lft_block +* +* DESCRIPTION +* Retrieve a linear forwarding table block. +* +* SYNOPSIS +*/ +boolean_t osm_switch_get_lft_block(IN const osm_switch_t * p_sw, + IN uint16_t block_id, OUT uint8_t * p_block); +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* block_id +* [in] The block_id to retrieve. +* +* p_block +* [out] Pointer to the 64 byte array to store the +* forwarding table block specified by block_id. +* +* RETURN VALUES +* Returns true if there are more blocks necessary to +* configure all the LIDs reachable from this switch. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_supports_mcast +* NAME +* osm_switch_supports_mcast +* +* DESCRIPTION +* Indicates if a switch supports multicast. +* +* SYNOPSIS +*/ +static inline boolean_t osm_switch_supports_mcast(IN const osm_switch_t * p_sw) +{ + return (p_sw->switch_info.mcast_cap != 0); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to an osm_switch_t object. +* +* RETURN VALUES +* Returns TRUE if the switch supports multicast. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_set_switch_info +* NAME +* osm_switch_set_switch_info +* +* DESCRIPTION +* Updates the switch info attribute of this switch. +* +* SYNOPSIS +*/ +static inline void osm_switch_set_switch_info(IN osm_switch_t * p_sw, + IN const ib_switch_info_t * p_si) +{ + CL_ASSERT(p_sw); + CL_ASSERT(p_si); + p_sw->switch_info = *p_si; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to a Switch object. +* +* p_si +* [in] Pointer to the SwitchInfo attribute for this switch. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_count_path +* NAME +* osm_switch_count_path +* +* DESCRIPTION +* Counts this path in port profile. +* +* SYNOPSIS +*/ +static inline void osm_switch_count_path(IN osm_switch_t * p_sw, + IN uint8_t port) +{ + osm_port_prof_path_count_inc(&p_sw->p_prof[port]); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* port +* [in] Port to count path. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_set_lft_block +* NAME +* osm_switch_set_lft_block +* +* DESCRIPTION +* Copies in the specified block into +* the switch's Linear Forwarding Table. +* +* SYNOPSIS +*/ +static inline ib_api_status_t +osm_switch_set_lft_block(IN osm_switch_t * p_sw, IN const uint8_t * p_block, + IN uint32_t block_num) +{ + uint16_t lid_start = + (uint16_t) (block_num * IB_SMP_DATA_SIZE); + CL_ASSERT(p_sw); + + if (lid_start + IB_SMP_DATA_SIZE > p_sw->lft_size) + return IB_INVALID_PARAMETER; + + memcpy(&p_sw->lft[lid_start], p_block, IB_SMP_DATA_SIZE); + return IB_SUCCESS; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* p_block +* [in] Pointer to the forwarding table block. +* +* block_num +* [in] Block number for this block +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_set_mft_block +* NAME +* osm_switch_set_mft_block +* +* DESCRIPTION +* Sets a block of multicast port masks into the multicast table. +* +* SYNOPSIS +*/ +static inline ib_api_status_t +osm_switch_set_mft_block(IN osm_switch_t * p_sw, IN const ib_net16_t * p_block, + IN uint16_t block_num, IN uint8_t position) +{ + CL_ASSERT(p_sw); + return osm_mcast_tbl_set_block(&p_sw->mcast_tbl, p_block, block_num, + position); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* p_block +* [in] Pointer to the block of port masks to set. +* +* block_num +* [in] Block number (0-511) to set. +* +* position +* [in] Port mask position (0-15) to set. +* +* RETURN VALUE +* IB_SUCCESS on success. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_get_mft_block +* NAME +* osm_switch_get_mft_block +* +* DESCRIPTION +* Retrieve a block of multicast port masks from the multicast table. +* +* SYNOPSIS +*/ +static inline boolean_t osm_switch_get_mft_block(IN osm_switch_t * p_sw, + IN uint16_t block_num, + IN uint8_t position, + OUT ib_net16_t * p_block) +{ + CL_ASSERT(p_sw); + return osm_mcast_tbl_get_block(&p_sw->mcast_tbl, block_num, position, + p_block); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* block_num +* [in] Block number (0-511) to set. +* +* position +* [in] Port mask position (0-15) to set. +* +* p_block +* [out] Pointer to the block of port masks stored. +* +* RETURN VALUES +* Returns true if there are more blocks necessary to +* configure all the MLIDs reachable from this switch. +* FALSE otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_get_mft_max_block +* NAME +* osm_switch_get_mft_max_block +* +* DESCRIPTION +* Get the max_block from the associated multicast table. +* +* SYNOPSIS +*/ +static inline uint16_t osm_switch_get_mft_max_block(IN osm_switch_t * p_sw) +{ + CL_ASSERT(p_sw); + return osm_mcast_tbl_get_max_block(&p_sw->mcast_tbl); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* RETURN VALUE +*/ + +/****f* OpenSM: Switch/osm_switch_get_mft_max_block_in_use +* NAME +* osm_switch_get_mft_max_block_in_use +* +* DESCRIPTION +* Get the max_block_in_use from the associated multicast table. +* +* SYNOPSIS +*/ +static inline int16_t osm_switch_get_mft_max_block_in_use(IN osm_switch_t * p_sw) +{ + CL_ASSERT(p_sw); + return osm_mcast_tbl_get_max_block_in_use(&p_sw->mcast_tbl); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* RETURN VALUES +* Returns the maximum block ID in use in this switch's mcast table. +* A value of -1 indicates no blocks are in use. +* +* NOTES +* +* SEE ALSO +*/ + +/****f* OpenSM: Switch/osm_switch_get_mft_max_position +* NAME +* osm_switch_get_mft_max_position +* +* DESCRIPTION +* Get the max_position from the associated multicast table. +* +* SYNOPSIS +*/ +static inline uint8_t osm_switch_get_mft_max_position(IN osm_switch_t * p_sw) +{ + CL_ASSERT(p_sw); + return osm_mcast_tbl_get_max_position(&p_sw->mcast_tbl); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* RETURN VALUE +*/ + +/****f* OpenSM: Switch/osm_switch_get_dimn_port +* NAME +* osm_switch_get_dimn_port +* +* DESCRIPTION +* Get the routing ordered port +* +* SYNOPSIS +*/ +static inline uint8_t osm_switch_get_dimn_port(IN const osm_switch_t * p_sw, + IN uint8_t port_num) +{ + CL_ASSERT(p_sw); + if (p_sw->search_ordering_ports == NULL) + return port_num; + return p_sw->search_ordering_ports[port_num]; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* port_num +* [in] Port number in the switch +* +* RETURN VALUES +* Returns the port number ordered for routing purposes. +*/ + +/****f* OpenSM: Switch/osm_switch_recommend_path +* NAME +* osm_switch_recommend_path +* +* DESCRIPTION +* Returns the recommended port on which to route this LID. +* In cases where LMC > 0, the remote side system and node +* used for the routing are tracked in the provided arrays +* (and counts) such that other lid for the same port will +* try and avoid going through the same remote system/node. +* +* SYNOPSIS +*/ +uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw, + IN osm_port_t * p_port, IN uint16_t lid_ho, + IN unsigned start_from, + IN boolean_t ignore_existing, + IN boolean_t routing_for_lmc, + IN boolean_t dor, + IN boolean_t port_shifting, + IN uint32_t scatter_ports, + IN osm_lft_type_enum lft_enum); +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* p_port +* [in] Pointer to the port object for which to get a path +* advisory. +* +* lid_ho +* [in] LID value (host order) for which to get a path advisory. +* +* start_from +* [in] Port number from where to start balance counting. +* +* ignore_existing +* [in] Set to cause the switch to choose the optimal route +* regardless of existing paths. +* If false, the switch will choose an existing route if one +* exists, otherwise will choose the optimal route. +* +* routing_for_lmc +* [in] We support an enhanced LMC aware routing mode: +* In the case of LMC > 0, we can track the remote side +* system and node for all of the lids of the target +* and try and avoid routing again through the same +* system / node. +* +* Assume if routing_for_lmc is TRUE that this procedure +* was provided with the tracking array and counter via +* p_port->priv, and we can conduct this algorithm. +* +* dor +* [in] If TRUE, Dimension Order Routing will be done. +* +* port_shifting +* [in] If TRUE, port_shifting will be done. +* +* scatter_ports +* [in] If not zero, randomize the selection of the best ports. +* +* lft_enum +* [in] Use LFT that was calculated by routing engine, or +* current LFT on the switch. +* +* RETURN VALUE +* Returns the recommended port on which to route this LID. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_recommend_mcast_path +* NAME +* osm_switch_recommend_mcast_path +* +* DESCRIPTION +* Returns the recommended port on which to route this LID. +* +* SYNOPSIS +*/ +uint8_t osm_switch_recommend_mcast_path(IN osm_switch_t * p_sw, + IN osm_port_t * p_port, + IN uint16_t mlid_ho, + IN boolean_t ignore_existing); +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch object. +* +* p_port +* [in] Pointer to the port object for which to get +* the multicast path. +* +* mlid_ho +* [in] MLID for the multicast group in question. +* +* ignore_existing +* [in] Set to cause the switch to choose the optimal route +* regardless of existing paths. +* If false, the switch will choose an existing route if one exists, +* otherwise will choose the optimal route. +* +* RETURN VALUE +* Returns the recommended port on which to route this LID. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_get_mcast_fwd_tbl_size +* NAME +* osm_switch_get_mcast_fwd_tbl_size +* +* DESCRIPTION +* Returns the number of entries available in the multicast forwarding table. +* +* SYNOPSIS +*/ +static inline uint16_t +osm_switch_get_mcast_fwd_tbl_size(IN const osm_switch_t * p_sw) +{ + return cl_ntoh16(p_sw->switch_info.mcast_cap); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch. +* +* RETURN VALUE +* Returns the number of entries available in the multicast forwarding table. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_path_count_get +* NAME +* osm_switch_path_count_get +* +* DESCRIPTION +* Returns the count of the number of paths going through this port. +* +* SYNOPSIS +*/ +static inline uint32_t osm_switch_path_count_get(IN const osm_switch_t * p_sw, + IN uint8_t port_num) +{ + return osm_port_prof_path_count_get(&p_sw->p_prof[port_num]); +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the Switch object. +* +* port_num +* [in] Port number for which to get path count. +* +* RETURN VALUE +* Returns the count of the number of paths going through this port. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_prepare_path_rebuild +* NAME +* osm_switch_prepare_path_rebuild +* +* DESCRIPTION +* Prepares a switch to rebuild pathing information. +* +* SYNOPSIS +*/ +int osm_switch_prepare_path_rebuild(IN osm_switch_t * p_sw, + IN uint16_t max_lids); +/* +* PARAMETERS +* p_sw +* [in] Pointer to the Switch object. +* +* max_lids +* [in] Max number of lids in the subnet. +* +* RETURN VALUE +* Returns zero on success, or negative value if an error occurred. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_get_mcast_tbl_ptr +* NAME +* osm_switch_get_mcast_tbl_ptr +* +* DESCRIPTION +* Returns a pointer to the switch's multicast table. +* +* SYNOPSIS +*/ +static inline osm_mcast_tbl_t *osm_switch_get_mcast_tbl_ptr(IN const + osm_switch_t * p_sw) +{ + return (osm_mcast_tbl_t *) & p_sw->mcast_tbl; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch. +* +* RETURN VALUE +* Returns a pointer to the switch's multicast table. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Switch/osm_switch_is_in_mcast_tree +* NAME +* osm_switch_is_in_mcast_tree +* +* DESCRIPTION +* Returns true if this switch already belongs in the tree for the specified +* multicast group. +* +* SYNOPSIS +*/ +static inline boolean_t +osm_switch_is_in_mcast_tree(IN const osm_switch_t * p_sw, IN uint16_t mlid_ho) +{ + const osm_mcast_tbl_t *p_tbl; + + p_tbl = &p_sw->mcast_tbl; + if (p_tbl) + return osm_mcast_tbl_is_any_port(&p_sw->mcast_tbl, mlid_ho); + else + return FALSE; +} +/* +* PARAMETERS +* p_sw +* [in] Pointer to the switch. +* +* mlid_ho +* [in] MLID (host order) of the multicast tree to check. +* +* RETURN VALUE +* Returns true if this switch already belongs in the tree for the specified +* multicast group. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_SWITCH_H_ */ diff --git a/include/opensm/osm_ucast_cache.h b/include/opensm/osm_ucast_cache.h new file mode 100644 index 0000000..316a7f6 --- /dev/null +++ b/include/opensm/osm_ucast_cache.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Header file that describes Unicast Cache functions. + * + * Environment: + * Linux User Mode + * + * $Revision: 1.4 $ + */ + +#ifndef _OSM_UCAST_CACHE_H_ +#define _OSM_UCAST_CACHE_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +struct osm_ucast_mgr; + +/****h* OpenSM/Unicast Manager/Unicast Cache +* NAME +* Unicast Cache +* +* DESCRIPTION +* The Unicast Cache object encapsulates the information +* needed to cache and write unicast routing of the subnet. +* +* The Unicast Cache object is NOT thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Yevgeny Kliteynik, Mellanox +* +*********/ + +/****f* OpenSM: Unicast Cache/osm_ucast_cache_invalidate +* NAME +* osm_ucast_cache_invalidate +* +* DESCRIPTION +* The osm_ucast_cache_invalidate function purges the +* unicast cache and marks the cache as invalid. +* +* SYNOPSIS +*/ +void osm_ucast_cache_invalidate(IN struct osm_ucast_mgr *p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to the ucast mgr object. +* +* RETURN VALUE +* This function does not return any value. +* +* NOTES +* +* SEE ALSO +* Unicast Manager object +*********/ + +/****f* OpenSM: Unicast Cache/osm_ucast_cache_check_new_link +* NAME +* osm_ucast_cache_check_new_link +* +* DESCRIPTION +* The osm_ucast_cache_check_new_link checks whether +* the newly discovered link still allows us to use +* cached unicast routing. +* +* SYNOPSIS +*/ +void osm_ucast_cache_check_new_link(IN struct osm_ucast_mgr *p_mgr, + IN osm_node_t * p_node_1, + IN uint8_t port_num_1, + IN osm_node_t * p_node_2, + IN uint8_t port_num_2); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to the unicast manager object. +* +* p_node_1 +* [in] Pointer to the first node of the link. +* +* port_num_1 +* [in] Port number of the first node. +* +* p_node_2 +* [in] Pointer to the second node of the link. +* +* port_num_2 +* [in] Port number of the second node. +* +* RETURN VALUE +* This function does not return any value. +* +* NOTES +* The function checks whether the link was previously +* cached/dropped or is this a completely new link. +* If it decides that the new link makes cached routing +* invalid, the cache is purged and marked as invalid. +* +* SEE ALSO +* Unicast Cache object +*********/ + +/****f* OpenSM: Unicast Cache/osm_ucast_cache_add_link +* NAME +* osm_ucast_cache_add_link +* +* DESCRIPTION +* The osm_ucast_cache_add_link adds link to the cache. +* +* SYNOPSIS +*/ +void osm_ucast_cache_add_link(IN struct osm_ucast_mgr *p_mgr, + IN osm_physp_t * physp1, + IN osm_physp_t * physp2); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to the unicast manager object. +* +* physp1 +* [in] Pointer to the first physical port of the link. +* +* physp2 +* [in] Pointer to the second physical port of the link. +* +* RETURN VALUE +* This function does not return any value. +* +* NOTES +* Since the cache operates with ports and not links, +* the function adds two port entries (both sides of the +* link) to the cache. +* If it decides that the dropped link makes cached routing +* invalid, the cache is purged and marked as invalid. +* +* SEE ALSO +* Unicast Manager object +*********/ + +/****f* OpenSM: Unicast Cache/osm_ucast_cache_add_node +* NAME +* osm_ucast_cache_add_node +* +* DESCRIPTION +* The osm_ucast_cache_add_node adds node and all +* its links to the cache. +* +* SYNOPSIS +*/ +void osm_ucast_cache_add_node(IN struct osm_ucast_mgr *p_mgr, + IN osm_node_t * p_node); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to the unicast manager object. +* +* p_node +* [in] Pointer to the node object that should be cached. +* +* RETURN VALUE +* This function does not return any value. +* +* NOTES +* If the function decides that the dropped node makes cached +* routing invalid, the cache is purged and marked as invalid. +* +* SEE ALSO +* Unicast Manager object +*********/ + +/****f* OpenSM: Unicast Cache/osm_ucast_cache_process +* NAME +* osm_ucast_cache_process +* +* DESCRIPTION +* The osm_ucast_cache_process function writes the +* cached unicast routing on the subnet switches. +* +* SYNOPSIS +*/ +int osm_ucast_cache_process(IN struct osm_ucast_mgr *p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to the unicast manager object. +* +* RETURN VALUE +* This function returns zero on sucess and non-zero +* value otherwise. +* +* NOTES +* Iterates through all the subnet switches and writes +* the LFTs that were calculated during the last routing +* engine execution to the switches. +* +* SEE ALSO +* Unicast Manager object +*********/ + +END_C_DECLS +#endif /* _OSM_UCAST_CACHE_H_ */ diff --git a/include/opensm/osm_ucast_lash.h b/include/opensm/osm_ucast_lash.h new file mode 100644 index 0000000..dd90d5d --- /dev/null +++ b/include/opensm/osm_ucast_lash.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2008,2009 System Fabric Works, Inc. All rights reserved. + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2007 Simula Research Laboratory. All rights reserved. + * Copyright (c) 2007 Silicon Graphics Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declarations for LASH algorithm + */ + +#ifndef OSM_UCAST_LASH_H +#define OSM_UCAST_LASH_H + +#include + +enum { + UNQUEUED, + Q_MEMBER, + MST_MEMBER, + MAX_INT = 9999, + NONE = MAX_INT +}; + +typedef struct _cdg_vertex { + int from; + int to; + int seen; + int temp; + int visiting_number; + struct _cdg_vertex *next; + int num_temp_depend; + int num_using_vertex; + int num_deps; + struct vertex_deps { + struct _cdg_vertex *v; + int num_used; + } deps[0]; +} cdg_vertex_t; + +typedef struct _switch { + osm_switch_t *p_sw; + int id; + int used_channels; + int *dij_channels; + int q_state; + mesh_node_t *node; + struct routing_table { + unsigned out_link; + unsigned lane; + } routing_table[0]; +} switch_t; + +typedef struct _lash { + osm_opensm_t *p_osm; + int num_switches; + uint8_t vl_min; + int balance_limit; + switch_t **switches; + cdg_vertex_t ****cdg_vertex_matrix; + int num_mst_in_lane[IB_MAX_NUM_VLS]; + int ***virtual_location; +} lash_t; + +#endif diff --git a/include/opensm/osm_ucast_mgr.h b/include/opensm/osm_ucast_mgr.h new file mode 100644 index 0000000..5f4ce46 --- /dev/null +++ b/include/opensm/osm_ucast_mgr.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_ucast_mgr_t. + * This object represents the Unicast Manager object. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_UCAST_MGR_H_ +#define _OSM_UCAST_MGR_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Unicast Manager +* NAME +* Unicast Manager +* +* DESCRIPTION +* The Unicast Manager object encapsulates the information +* needed to control unicast LID forwarding on the subnet. +* +* The Unicast Manager object is thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +struct osm_sm; +/****s* OpenSM: Unicast Manager/osm_ucast_mgr_t +* NAME +* osm_ucast_mgr_t +* +* DESCRIPTION +* Unicast Manager structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_ucast_mgr { + struct osm_sm *sm; + osm_subn_t *p_subn; + osm_log_t *p_log; + cl_plock_t *p_lock; + uint16_t max_lid; + cl_qlist_t port_order_list; + boolean_t is_dor; + boolean_t some_hop_count_set; + cl_qmap_t cache_sw_tbl; + boolean_t cache_valid; +} osm_ucast_mgr_t; +/* +* FIELDS +* sm +* Pointer to the SM object. +* +* p_subn +* Pointer to the Subnet object for this subnet. +* +* p_log +* Pointer to the log object. +* +* p_lock +* Pointer to the serializing lock. +* +* max_lid +* Max LID of all the switches in the subnet. +* +* port_order_list +* List of ports ordered for routing. +* +* is_dor +* Dimension Order Routing (DOR) will be done +* +* some_hop_count_set +* Initialized to FALSE at the beginning of each the min hop +* tables calculation iteration cycle, set to TRUE to indicate +* that some hop count changes were done. +* +* cache_sw_tbl +* Cached switches table. +* +* cache_valid +* TRUE if the unicast cache is valid. +* +* SEE ALSO +* Unicast Manager object +*********/ + +/****f* OpenSM: Unicast Manager/osm_ucast_mgr_construct +* NAME +* osm_ucast_mgr_construct +* +* DESCRIPTION +* This function constructs a Unicast Manager object. +* +* SYNOPSIS +*/ +void osm_ucast_mgr_construct(IN osm_ucast_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to a Unicast Manager object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows osm_ucast_mgr_destroy +* +* Calling osm_ucast_mgr_construct is a prerequisite to calling any other +* method except osm_ucast_mgr_init. +* +* SEE ALSO +* Unicast Manager object, osm_ucast_mgr_init, +* osm_ucast_mgr_destroy +*********/ + +/****f* OpenSM: Unicast Manager/osm_ucast_mgr_destroy +* NAME +* osm_ucast_mgr_destroy +* +* DESCRIPTION +* The osm_ucast_mgr_destroy function destroys the object, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_ucast_mgr_destroy(IN osm_ucast_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to the object to destroy. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified +* Unicast Manager object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to +* osm_ucast_mgr_construct or osm_ucast_mgr_init. +* +* SEE ALSO +* Unicast Manager object, osm_ucast_mgr_construct, +* osm_ucast_mgr_init +*********/ + +/****f* OpenSM: Unicast Manager/osm_ucast_mgr_init +* NAME +* osm_ucast_mgr_init +* +* DESCRIPTION +* The osm_ucast_mgr_init function initializes a +* Unicast Manager object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_ucast_mgr_init(IN osm_ucast_mgr_t * p_mgr, + IN struct osm_sm * sm); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to an osm_ucast_mgr_t object to initialize. +* +* sm +* [in] Pointer to the SM object. +* +* RETURN VALUES +* IB_SUCCESS if the Unicast Manager object was initialized +* successfully. +* +* NOTES +* Allows calling other Unicast Manager methods. +* +* SEE ALSO +* Unicast Manager object, osm_ucast_mgr_construct, +* osm_ucast_mgr_destroy +*********/ + +/****f* OpenSM: Unicast Manager/osm_ucast_mgr_set_fwd_tables +* NAME +* osm_ucast_mgr_set_fwd_tables +* +* DESCRIPTION +* Setup forwarding table for the switch (from prepared new_lft). +* +* SYNOPSIS +*/ +void osm_ucast_mgr_set_fwd_tables(IN osm_ucast_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to an osm_ucast_mgr_t object. +* +* SEE ALSO +* Unicast Manager +*********/ + +/****f* OpenSM: Unicast Manager/osm_ucast_mgr_build_lid_matrices +* NAME +* osm_ucast_mgr_build_lid_matrices +* +* DESCRIPTION +* Build switches's lid matrices. +* +* SYNOPSIS +*/ +int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to an osm_ucast_mgr_t object. +* +* NOTES +* This function processes the subnet, configuring switches' +* min hops tables (aka lid matrices). +* +* SEE ALSO +* Unicast Manager +*********/ + +/****f* OpenSM: Unicast Manager/osm_ucast_mgr_process +* NAME +* osm_ucast_mgr_process +* +* DESCRIPTION +* Process and configure the subnet's unicast forwarding tables. +* +* SYNOPSIS +*/ +int osm_ucast_mgr_process(IN osm_ucast_mgr_t * p_mgr); +/* +* PARAMETERS +* p_mgr +* [in] Pointer to an osm_ucast_mgr_t object. +* +* RETURN VALUES +* Returns zero on success and negative value on failure. +* +* NOTES +* This function processes the subnet, configuring switch +* unicast forwarding tables. +* +* SEE ALSO +* Unicast Manager, Node Info Response Controller +*********/ + +int ucast_dummy_build_lid_matrices(void *context); +END_C_DECLS +#endif /* _OSM_UCAST_MGR_H_ */ diff --git a/include/opensm/osm_version.h.in b/include/opensm/osm_version.h.in new file mode 100644 index 0000000..d783245 --- /dev/null +++ b/include/opensm/osm_version.h.in @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_VERSION_H_ +#define _OSM_VERSION_H_ + +/****s* OpenSM: Base/OSM_VERSION +* NAME +* OSM_VERSION +* +* DESCRIPTION +* The version string for OpenSM +* +* SYNOPSIS +*/ +#define OSM_VERSION "OpenSM @VERSION@" +/********/ + +#endif /* _OSM_VERSION_H_ */ diff --git a/include/opensm/osm_vl15intf.h b/include/opensm/osm_vl15intf.h new file mode 100644 index 0000000..589e03a --- /dev/null +++ b/include/opensm/osm_vl15intf.h @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_vl15_t. + * This object represents a VL15 interface object. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_VL15INTF_H_ +#define _OSM_VL15INTF_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/VL15 +* NAME +* VL15 +* +* DESCRIPTION +* The VL15 object encapsulates the information needed by the +* OpenSM to instantiate the VL15 interface. The OpenSM allocates +* one VL15 object per subnet. +* +* The VL15 object transmits MADs to the wire at a throttled rate, +* so as to not overload the VL15 buffering of subnet components. +* OpenSM modules may post VL15 MADs to the VL15 interface as fast +* as possible. +* +* The VL15 object is thread safe. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****d* OpenSM: SM/osm_vl15_state_t +* NAME +* osm_vl15_state_t +* +* DESCRIPTION +* Enumerates the possible states of OpenSM VL15 object. +* +* SYNOPSIS +*/ +typedef enum _osm_vl15_state { + OSM_VL15_STATE_INIT = 0, + OSM_VL15_STATE_READY +} osm_vl15_state_t; +/***********/ + +/****s* OpenSM: VL15/osm_vl15_t +* NAME +* osm_vl15_t +* +* DESCRIPTION +* VL15 structure. +* +* This object should be treated as opaque and should +* be manipulated only through the provided functions. +* +* SYNOPSIS +*/ +typedef struct osm_vl15 { + osm_thread_state_t thread_state; + osm_vl15_state_t state; + uint32_t max_wire_smps; + uint32_t max_wire_smps2; + uint32_t max_smps_timeout; + cl_event_t signal; + cl_thread_t poller; + cl_qlist_t rfifo; + cl_qlist_t ufifo; + cl_spinlock_t lock; + osm_vendor_t *p_vend; + osm_log_t *p_log; + osm_stats_t *p_stats; + osm_subn_t *p_subn; +} osm_vl15_t; +/* +* FIELDS +* thread_state +* Tracks the thread state of the poller thread. +* +* state +* Tracks the state of the VL15 interface itself. +* +* max_wire_smps +* Maximum number of VL15 MADs allowed on the wire at one time. +* +* max_wire_smps2 +* Maximum number of timeout based SMPs allowed to be outstanding. +* +* max_smps_timeout +* Wait time in usec for timeout based SMPs. +* +* signal +* Event on which the poller sleeps. +* +* poller +* Worker thread pool that services the fifo to transmit VL15 MADs +* +* rfifo +* First-in First-out queue for outbound VL15 MADs for which +* a response is expected, aka the "response fifo" +* +* ufifo +* First-in First-out queue for outbound VL15 MADs for which +* no response is expected, aka the "unicast fifo". +* +* lock +* Spinlock guarding the FIFO. +* +* p_vend +* Pointer to the vendor transport object. +* +* p_log +* Pointer to the log object. +* +* p_stats +* Pointer to the OpenSM statistics block. +* +* p_subn +* Pointer to the OpenSM subnet object. +* +* SEE ALSO +* VL15 object +*********/ + +/****f* OpenSM: VL15/osm_vl15_construct +* NAME +* osm_vl15_construct +* +* DESCRIPTION +* This function constructs an VL15 object. +* +* SYNOPSIS +*/ +void osm_vl15_construct(IN osm_vl15_t * p_vl15); +/* +* PARAMETERS +* p_vl15 +* [in] Pointer to a VL15 object to construct. +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Allows calling osm_vl15_destroy. +* +* Calling osm_vl15_construct is a prerequisite to calling any other +* method except osm_vl15_init. +* +* SEE ALSO +* VL15 object, osm_vl15_init, osm_vl15_destroy +*********/ + +/****f* OpenSM: VL15/osm_vl15_destroy +* NAME +* osm_vl15_destroy +* +* DESCRIPTION +* The osm_vl15_destroy function destroys the object, releasing +* all resources. +* +* SYNOPSIS +*/ +void osm_vl15_destroy(IN osm_vl15_t * p_vl15, IN struct osm_mad_pool *p_pool); +/* +* PARAMETERS +* p_vl15 +* [in] Pointer to a VL15 object to destroy. +* +* p_pool +* [in] The pointer to the mad pool to return outstanding mads to +* +* RETURN VALUE +* This function does not return a value. +* +* NOTES +* Performs any necessary cleanup of the specified VL15 object. +* Further operations should not be attempted on the destroyed object. +* This function should only be called after a call to osm_vl15_construct or +* osm_vl15_init. +* +* SEE ALSO +* VL15 object, osm_vl15_construct, osm_vl15_init +*********/ + +/****f* OpenSM: VL15/osm_vl15_init +* NAME +* osm_vl15_init +* +* DESCRIPTION +* The osm_vl15_init function initializes a VL15 object for use. +* +* SYNOPSIS +*/ +ib_api_status_t osm_vl15_init(IN osm_vl15_t * p_vl15, IN osm_vendor_t * p_vend, + IN osm_log_t * p_log, IN osm_stats_t * p_stats, + IN osm_subn_t * p_subn, + IN int32_t max_wire_smps, + IN int32_t max_wire_smps2, + IN uint32_t max_smps_timeout); +/* +* PARAMETERS +* p_vl15 +* [in] Pointer to an osm_vl15_t object to initialize. +* +* p_vend +* [in] Pointer to the vendor transport object. +* +* p_log +* [in] Pointer to the log object. +* +* p_stats +* [in] Pointer to the OpenSM statistics block. +* +* p_subn +* [in] Pointer to the OpenSM subnet object. +* +* max_wire_smps +* [in] Maximum number of SMPs allowed on the wire at one time. +* +* max_wire_smps2 +* [in] Maximum number of timeout based SMPs allowed to be +* outstanding. +* +* max_smps_timeout +* [in] Wait time in usec for timeout based SMPs. +* +* +* RETURN VALUES +* IB_SUCCESS if the VL15 object was initialized successfully. +* +* NOTES +* Allows calling other VL15 methods. +* +* SEE ALSO +* VL15 object, osm_vl15_construct, osm_vl15_destroy +*********/ + +/****f* OpenSM: VL15/osm_vl15_post +* NAME +* osm_vl15_post +* +* DESCRIPTION +* Posts a MAD to the VL15 interface for transmission. +* +* SYNOPSIS +*/ +void osm_vl15_post(IN osm_vl15_t * p_vl15, IN osm_madw_t * p_madw); +/* +* PARAMETERS +* p_vl15 +* [in] Pointer to an osm_vl15_t object. +* +* p_madw +* [in] Pointer to a MAD wrapper structure containing the MAD. +* +* RETURN VALUES +* This function does not return a value. +* +* NOTES +* The osm_vl15_construct or osm_vl15_init must be called before using +* this function. +* +* SEE ALSO +* VL15 object, osm_vl15_construct, osm_vl15_init +*********/ + +/****f* OpenSM: VL15/osm_vl15_poll +* NAME +* osm_vl15_poll +* +* DESCRIPTION +* Causes the VL15 Interface to consider sending another QP0 MAD. +* +* SYNOPSIS +*/ +void osm_vl15_poll(IN osm_vl15_t * p_vl); +/* +* PARAMETERS +* p_vl15 +* [in] Pointer to an osm_vl15_t object. +* +* RETURN VALUES +* None. +* +* NOTES +* This function signals the VL15 that it may be possible to send +* a SMP. This function checks three criteria before sending a SMP: +* 1) The VL15 worker is IDLE +* 2) There are no QP0 SMPs currently outstanding +* 3) There is something on the VL15 FIFO to send +* +* SEE ALSO +* VL15 object, osm_vl15_construct, osm_vl15_init +*********/ + +/****f* OpenSM: VL15/osm_vl15_shutdown +* NAME +* osm_vl15_shutdown +* +* DESCRIPTION +* Cleanup all outstanding MADs on both fifo's. +* This is required to return all outstanding MAD resources. +* +* SYNOPSIS +*/ +void osm_vl15_shutdown(IN osm_vl15_t * p_vl, IN osm_mad_pool_t * p_mad_pool); +/* +* PARAMETERS +* p_vl15 +* [in] Pointer to an osm_vl15_t object. +* +* p_mad_pool +* [in] The MAD pool owning the mads. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* VL15 object, osm_vl15_construct, osm_vl15_init +*********/ + +END_C_DECLS +#endif /* _OSM_VL15INTF_H_ */ diff --git a/include/opensm/st.h b/include/opensm/st.h new file mode 100644 index 0000000..ad6c289 --- /dev/null +++ b/include/opensm/st.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* @(#) st.h 5.1 89/12/14 */ + +#ifndef ST_INCLUDED +#define ST_INCLUDED + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#define st_ptr_t uintptr_t +typedef st_ptr_t st_data_t; + +#define ST_DATA_T_DEFINED + +typedef struct st_table st_table; + +struct st_hash_type { + int (*compare) (void *, void *); + st_ptr_t(*hash) (void *); +}; + +struct st_table { + struct st_hash_type *type; + int num_bins; + int num_entries; + struct st_table_entry **bins; +}; + +#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) + +enum st_retval { ST_CONTINUE, ST_STOP, ST_DELETE }; + +st_table *st_init_table(struct st_hash_type *); +st_table *st_init_table_with_size(struct st_hash_type *, size_t); +st_table *st_init_numtable(void); +st_table *st_init_numtable_with_size(size_t); +st_table *st_init_strtable(void); +st_table *st_init_strtable_with_size(size_t); +int st_delete(st_table *, st_data_t *, st_data_t *); +int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t); +int st_insert(st_table *, st_data_t, st_data_t); +int st_lookup(st_table *, st_data_t, st_data_t *); +void st_foreach(st_table *, + int (*)(st_data_t key, st_data_t val, st_data_t arg), + st_data_t); +void st_add_direct(st_table *, st_data_t, st_data_t); +void st_free_table(st_table *); +void st_cleanup_safe(st_table *, st_data_t); +st_table *st_copy(st_table *); + +#define ST_NUMCMP ((int (*)()) 0) +#define ST_NUMHASH ((int (*)()) -2) + +#define st_numcmp ST_NUMCMP +#define st_numhash ST_NUMHASH + +/* int st_strhash(void); */ + +END_C_DECLS +#endif /* ST_INCLUDED */ diff --git a/include/vendor/osm_mtl_bind.h b/include/vendor/osm_mtl_bind.h new file mode 100644 index 0000000..3994d59 --- /dev/null +++ b/include/vendor/osm_mtl_bind.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_BIND_H_ +#define _OSM_BIND_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****s* OpenSM: Vendor/osm_vendor_mgt_bind +* NAME +* osm_vendor_mgt_bind_t +* +* DESCRIPTION +* Tracks the handles returned by IB_MGT to the SMI and GSI +* Nulled on init of the vendor obj. Populated on first bind. +* +* SYNOPSIS +*/ +typedef struct _osm_vendor_mgt_bind { + boolean_t smi_init, gsi_init; + IB_MGT_mad_hndl_t smi_mads_hdl; + IB_MGT_mad_hndl_t gsi_mads_hdl; + struct _osm_mtl_bind_info *smi_p_bind; +} osm_vendor_mgt_bind_t; + +/* +* FIELDS +* smi_mads_hdl +* Handle returned by IB_MGT_get_handle to the IB_MGT_SMI +* +* gsi_mads_hdl +* Handle returned by IB_MGT_get_handle to the IB_MGT_GSI +* +* SEE ALSO +*********/ + +/****s* OpenSM: Vendor osm_mtl_bind_info_t +* NAME +* osm_mtl_bind_info_t +* +* DESCRIPTION +* Handle to the result of binding a class callbacks to IB_MGT. +* +* SYNOPSIS +*/ +typedef struct _osm_mtl_bind_info { + IB_MGT_mad_hndl_t mad_hndl; + osm_vendor_t *p_vend; + void *client_context; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_id_t hca_id; + uint8_t port_num; + osm_vend_mad_recv_callback_t rcv_callback; + osm_vend_mad_send_err_callback_t send_err_callback; + osm_mad_pool_t *p_osm_pool; +} osm_mtl_bind_info_t; + +/* +* FIELDS +* mad_hndl +* the handle returned from the registration in IB_MGT +* +* p_vend +* Pointer to the vendor object. +* +* client_context +* User's context passed during osm_bind +* +* hca_id +* HCA Id we bind to. +* +* port_num +* Port number (within the HCA) of the bound port. +* +* rcv_callback +* OSM Callback function to be called on receive of MAD. +* +* send_err_callback +* OSM Callback to be called on send error. +* +* p_osm_pool +* Points to the MAD pool used by OSM +* +* +* SEE ALSO +*********/ +ib_api_status_t +osm_mtl_send_mad(IN osm_mtl_bind_info_t * p_bind, IN osm_madw_t * const p_madw); + +END_C_DECLS +#endif // _OSM_BIND_H_ diff --git a/include/vendor/osm_pkt_randomizer.h b/include/vendor/osm_pkt_randomizer.h new file mode 100644 index 0000000..5a8ef5f --- /dev/null +++ b/include/vendor/osm_pkt_randomizer.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_subn_t. + * This object represents an IBA subnet. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_PKT_RANDOMIZER_H_ +#define _OSM_PKT_RANDOMIZER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Packet Randomizer +* NAME +* Packet Randomizer +* +* DESCRIPTION +* The Packet Randomizer object encapsulates the information needed for +* randomly dropping packets for debug. +* +* The Packet Randomizer object is not thread safe, thus callers must +* provide serialization. +* +* AUTHOR +* Yael Kalka, Mellanox +* +*********/ +/****d* OpenSM: Pkt_Randomizer/osm_pkt_randomizer_t +* NAME +* osm_pkt_randomizer_t +* +* DESCRIPTION +* Packet randomizer structure. This structure contains the various +* parameters needed by the packet randomizer. +* +* SYNOPSIS +*/ +typedef struct _osm_pkt_randomizer { + uint8_t osm_pkt_drop_rate; + uint8_t osm_pkt_num_unstable_links; + uint8_t osm_pkt_unstable_link_rate; + osm_dr_path_t *fault_dr_paths; + uint8_t num_paths_initialized; +} osm_pkt_randomizer_t; + +/* +* FIELDS +* +* osm_pkt_drop_rate +* Used by the randomizer whether to drop a packet or not. +* Taken from the global variable OSM_PKT_DROP_RATE. If not given or +* if set to zero, the randomizer will not run. +* +* osm_pkt_num_unstable_links +* The number of unstable links to be drawn. +* Taken from the global variable OSM_PKT_NUM_UNSTABLE_LINKS. default = 1. +* +* osm_pkt_unstable_link_rate +* Used by the randomizer whether to add a packet to the unstable links +* list or not. Taken from the global variable OSM_PKT_UNSTABLE_LINK_RATE. +* default = 20. +* +* fault_dr_path +* Array of osm_dr_path_t objects, that includes all the dr_paths +* that are marked as errored. +* +* num_paths_initialized +* Describes the number of paths from the fault_dr_paths array that +* have already been initialized. +* +* SEE ALSO +* Packet Randomizer object +*********/ + +/****f* OpenSM: Pkt_Randomizer/osm_pkt_randomizer_init +* NAME +* osm_pkt_randomizer_init +* +* DESCRIPTION +* The osm_pkt_randomizer_init function initializes the Packet Randomizer object. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_pkt_randomizer_init(IN OUT osm_pkt_randomizer_t ** pp_pkt_randomizer, + IN osm_log_t * p_log); +/* +* PARAMETERS +* p_pkt_randomizer +* [in] Pointer to the Packet Randomizer object to be initialized. +* +* p_log +* [in] Pointer to the log object. +* +* RETURN VALUE +* None +* +* NOTES +* +* SEE ALSO +* +*********/ + +/****f* OpenSM: Pkt_Randomizer/osm_pkt_randomizer_destroy +* NAME +* osm_pkt_randomizer_destroy +* +* DESCRIPTION +* The osm_pkt_randomizer_destroy function destroys the Packet Randomizer object. +* +* SYNOPSIS +*/ +void +osm_pkt_randomizer_destroy(IN osm_pkt_randomizer_t ** pp_pkt_randomizer, + IN osm_log_t * p_log); +/* +* PARAMETERS +* p_pkt_randomizer +* [in] Pointer to the Packet Randomizer object to be destroyed. +* +* p_log +* [in] Pointer to the log object. +* +* RETURN VALUE +* None +* +* NOTES +* +* SEE ALSO +* +*********/ + +/****f* OpenSM: Pkt_Randomizer/osm_pkt_randomizer_madw_drop +* NAME +* osm_pkt_randomizer_madw_drop +* +* DESCRIPTION +* The osm_pkt_randomizer_madw_drop is base function of the packet +* randomizer. +* It decides according to different random criteria whether or not +* the packet received should be dropped (according to its dr_path). +* This function is relevant both for mads sent by the SM and mads +* received by the SM. +* It returns TRUE if the mad should be dropped, and FALSE otherwise. +* +* SYNOPSIS +*/ +boolean_t +osm_pkt_randomizer_mad_drop(IN osm_log_t * p_log, + IN osm_pkt_randomizer_t * p_pkt_randomizer, + IN const ib_mad_t * p_mad); +/* +* PARAMETERS +* p_subn +* [in] Pointer to the Subnet object for this subnet. +* +* p_log +* [in] Pointer to the log object. +* +* p_mad +* [in] Pointer to the ib_mad_t mad to be checked. +* +* RETURN VALUE +* TRUE if the mad should be dropped. FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* +*********/ + +END_C_DECLS +#endif /* _OSM_PKT_RANDOMIZER_H */ diff --git a/include/vendor/osm_ts_useraccess.h b/include/vendor/osm_ts_useraccess.h new file mode 100644 index 0000000..d68c924 --- /dev/null +++ b/include/vendor/osm_ts_useraccess.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "ts_ib_useraccess.h" + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +typedef struct ib_user_mad_filter osm_ts_user_mad_filter; +typedef struct ib_set_port_info_ioctl osm_ts_set_port_info_ioctl; +typedef struct ib_get_port_info_ioctl osm_ts_get_port_info_ioctl; +typedef struct ib_gid_entry_ioctl osm_ts_gid_entry_ioctl; + +END_C_DECLS diff --git a/include/vendor/osm_umadt.h b/include/vendor/osm_umadt.h new file mode 100644 index 0000000..129627d --- /dev/null +++ b/include/vendor/osm_umadt.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mad_wrapper_t. + * This object represents the context wrapper for OpenSM MAD processing. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_UMADT_h_ +#define _OSM_UMADT_h_ + +#include "iba/ib_types.h" +#include +#include +#include "umadt.h" +#include "ibt.h" + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef struct _umadt_obj_t { + void *umadt_handle; + UMADT_INTERFACE uMadtInterface; + IBT_INTERFACE IbtInterface; + boolean init_done; + cl_spinlock_t register_lock; + cl_qlist_t register_list; + osm_log_t *p_log; + uint32_t timeout; + +} umadt_obj_t; +/*********/ + +/****s* OpenSM: Umadt MAD Wrapper/osm_bind_info +* NAME +* osm_bind_info +* +* DESCRIPTION +* Context needed for processing individual MADs +* +* SYNOPSIS +*/ + +typedef struct _mad_bind_info_t { + cl_list_item_t list_item; + umadt_obj_t *p_umadt_obj; + osm_mad_pool_t *p_mad_pool; + osm_vend_mad_recv_callback_t mad_recv_callback; + void *client_context; + cl_thread_t recv_processor_thread; + cl_spinlock_t trans_ctxt_lock; + cl_qlist_t trans_ctxt_list; + cl_timer_t timeout_timer; + cl_spinlock_t timeout_list_lock; + cl_qlist_t timeout_list; + RegisterClassStruct umadt_reg_class; + MADT_HANDLE umadt_handle; /* Umadt type */ + +} mad_bind_info_t; + +typedef struct _trans_context_t { + cl_list_item_t list_item; + uint64_t trans_id; + uint64_t sent_time; /* micro secs */ + void *context; +} trans_context_t; + +/* +* FIELDS +* list_item +* List linkage for pools and lists. MUST BE FIRST MEMBER! +* +* p_mad_pool +* Pointer to the MAD pool to be used by mads with this bind handle. +* +* mad_recv_callback +* Callback function called by the mad receive processor. +* +* client_context +* context to be passed to the receive callback. +* +* recv_processor_thread +* Thread structure for the receive processor thread. +* +* umadt_reg_class +* Umadt register class struct used to register with Umadt. +* +* umadt_handle +* Umadt returns this handle from a registration call. The transport layer +* uses this handle to talk to Umadt. +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /*_OSM_UMADT_h_ */ diff --git a/include/vendor/osm_vendor.h b/include/vendor/osm_vendor.h new file mode 100644 index 0000000..31b8eb8 --- /dev/null +++ b/include/vendor/osm_vendor.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Include file used by OpenSM to pull in the correct vendor file. + */ + +/* + this is the generic include file which includes + the proper vendor specific file +*/ +#include + +#if defined( OSM_VENDOR_INTF_TEST ) +#include +#elif defined( OSM_VENDOR_INTF_UMADT ) +#include +#elif defined( OSM_VENDOR_INTF_MTL ) +/* HACK - I do not know how to prevent complib from loading kernel H files */ +#undef __init +#include +#elif defined( OSM_VENDOR_INTF_TS ) +#undef __init +#include +#elif defined( OSM_VENDOR_INTF_ANAFA ) +#undef __init +#include +#elif defined( OSM_VENDOR_INTF_SIM ) +#undef __init +#include +#elif defined( OSM_VENDOR_INTF_OPENIB ) +#include +#elif defined( OSM_VENDOR_INTF_AL ) +#include +#else +#error No MAD Interface selected! +#error Choose an interface in osm_config.h +#endif diff --git a/include/vendor/osm_vendor_al.h b/include/vendor/osm_vendor_al.h new file mode 100644 index 0000000..e7371c9 --- /dev/null +++ b/include/vendor/osm_vendor_al.h @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mad_wrapper_t. + * This object represents the context wrapper for OpenSM MAD processing. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_VENDOR_AL_H_ +#define _OSM_VENDOR_AL_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Vendor AL +* NAME +* Vendor AL +* +* DESCRIPTION +* +* The Vendor AL object is thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* Enable various hacks to compensate for bugs in external code... +* +* +* AUTHOR +* +* +*********/ +/****h* OpenSM/Vendor Access Layer (AL) +* NAME +* Vendor AL +* +* DESCRIPTION +* This file is the vendor specific file for the AL Infiniband API. +* +* AUTHOR +* Steve King, Intel +* +*********/ +#define OSM_AL_SQ_SGE 256 +#define OSM_AL_RQ_SGE 256 +#define OSM_DEFAULT_RETRY_COUNT 3 +/* AL supports RMPP */ +#define VENDOR_RMPP_SUPPORT 1 +/****s* OpenSM: Vendor AL/osm_ca_info_t +* NAME +* osm_ca_info_t +* +* DESCRIPTION +* Structure containing information about local Channle Adapters. +* +* SYNOPSIS +*/ +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; + +} osm_ca_info_t; +/* +* FIELDS +* guid +* Node GUID of the local CA. +* +* attr_size +* Size of the CA attributes for this CA. +* +* p_attr +* Pointer to dynamicly allocated CA Attribute structure. +* +* SEE ALSO +*********/ + +/****f* OpenSM: CA Info/osm_ca_info_get_num_ports +* NAME +* osm_ca_info_get_num_ports +* +* DESCRIPTION +* Returns the number of ports owned by this CA. +* +* SYNOPSIS +*/ +static inline uint8_t +osm_ca_info_get_num_ports(IN const osm_ca_info_t * const p_ca_info) +{ + return (p_ca_info->p_attr->num_ports); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* RETURN VALUE +* Returns the number of ports owned by this CA. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: CA Info/osm_ca_info_get_port_guid +* NAME +* osm_ca_info_get_port_guid +* +* DESCRIPTION +* Returns the port GUID of the specified port owned by this CA. +* +* SYNOPSIS +*/ +static inline ib_net64_t +osm_ca_info_get_port_guid(IN const osm_ca_info_t * const p_ca_info, + IN const uint8_t index) +{ + return (p_ca_info->p_attr->p_port_attr[index].port_guid); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* index +* [in] Port "index" for which to retrieve the port GUID. +* The index is the offset into the ca's internal array +* of port attributes. +* +* RETURN VALUE +* Returns the port GUID of the specified port owned by this CA. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: CA Info/osm_ca_info_get_port_num +* NAME +* osm_ca_info_get_port_num +* +* DESCRIPTION +* Returns the port number of the specified port owned by this CA. +* Port numbers start with 1 for HCA's. +* +* SYNOPSIS +*/ +static inline uint8_t +osm_ca_info_get_port_num(IN const osm_ca_info_t * const p_ca_info, + IN const uint8_t index) +{ + return (p_ca_info->p_attr->p_port_attr[index].port_num); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* index +* [in] Port "index" for which to retrieve the port GUID. +* The index is the offset into the ca's internal array +* of port attributes. +* +* RETURN VALUE +* Returns the port GUID of the specified port owned by this CA. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: CA Info/osm_ca_info_get_ca_guid +* NAME +* osm_ca_info_get_ca_guid +* +* DESCRIPTION +* Returns the GUID of the specified CA. +* +* SYNOPSIS +*/ +static inline ib_net64_t +osm_ca_info_get_ca_guid(IN const osm_ca_info_t * const p_ca_info) +{ + return (p_ca_info->p_attr->ca_guid); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* RETURN VALUE +* Returns the GUID of the specified CA. +* +* NOTES +* +* SEE ALSO +*********/ + +/****s* OpenSM: Vendor AL/osm_bind_handle_t +* NAME +* osm_bind_handle_t +* +* DESCRIPTION +* handle returned by the vendor transport bind call. +* +* SYNOPSIS +*/ +typedef struct _osm_vendor { + ib_al_handle_t h_al; + osm_log_t *p_log; + uint32_t ca_count; + osm_ca_info_t *p_ca_info; + uint32_t timeout; + ib_ca_handle_t h_ca; + ib_pd_handle_t h_pd; + +} osm_vendor_t; +/* +* FIELDS +* h_al +* Handle returned by AL open call (ib_open_al). +* +* p_log +* Pointer to the log object. +* +* ca_count +* Number of CA's in the array pointed to by p_ca_info. +* +* p_ca_info +* Pointer to dynamically allocated array of CA info objects. +* +* h_pool +* MAD Pool handle returned by ib_create_mad_pool at init time. +* +* timeout +* Transaction timeout time in milliseconds. +* +* SEE ALSO +*********/ + +#define OSM_BIND_INVALID_HANDLE 0 + +/****s* OpenSM: Vendor AL/osm_bind_handle_t +* NAME +* osm_bind_handle_t +* +* DESCRIPTION +* handle returned by the vendor transport bind call. +* +* SYNOPSIS +*/ +typedef void *osm_bind_handle_t; +/***********/ + +/****s* OpenSM/osm_vend_wrap_t +* NAME +* AL Vendor MAD Wrapper +* +* DESCRIPTION +* AL specific MAD wrapper. AL transport layer uses this for +* housekeeping. +* +* SYNOPSIS +*********/ +typedef struct _osm_vend_wrap_t { + uint32_t size; + osm_bind_handle_t h_bind; + ib_mad_element_t *p_elem; + ib_av_handle_t h_av; + void *p_resp_madw; + +} osm_vend_wrap_t; +/* +* FIELDS +* size +* Size of the allocated MAD +* +* h_bind +* Bind handle used on this transaction +* +* p_elem +* Pointer to the mad element structure associated with +* this mad. +* +* h_av +* Address vector handle used for this transaction. +* +* p_resp_madw +* Pointer to the mad wrapper structure used to hold the pending +* reponse to the mad, if any. If a response is expected, the +* wrapper for the reponse is allocated during the send call. +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_VENDOR_AL_H_ */ diff --git a/include/vendor/osm_vendor_api.h b/include/vendor/osm_vendor_api.h new file mode 100644 index 0000000..4973417 --- /dev/null +++ b/include/vendor/osm_vendor_api.h @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Specification of the OpenSM transport API. This API is OpenSM's view + * of the Infiniband transport. + */ + +#ifndef _OSM_VENDOR_API_H_ +#define _OSM_VENDOR_API_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****s* OpenSM Vendor API/osm_vend_mad_recv_callback_t +* NAME +* osm_vend_mad_recv_callback_t +* +* DESCRIPTION +* Function prototype for the vendor MAD receive callback. +* The vendor layer calls this function for MAD receives. +* +* SYNOPSIS +*/ +typedef void (*osm_vend_mad_recv_callback_t) (IN osm_madw_t * p_madw, + IN void *bind_context, + IN osm_madw_t * p_req_madw); +/* +* PARAMETERS +* p_madw +* [in] The received MAD wrapper. +* +* bind_context +* [in] User context supplied during the bind call. +* +* p_req_madw +* [in] Pointer to the request mad wrapper that generated this response. +* If the inbound MAD is not a response, this field is NULL. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****s* OpenSM Vendor API/osm_vend_mad_send_err_callback_t +* NAME +* osm_vend_mad_send_err_callback_t +* +* DESCRIPTION +* Function prototype for the vendor send failure callback. +* The vendor layer calls this function when MADs expecting +* a response are completed in error, most likely due to a +* timeout. +* +* SYNOPSIS +*/ +typedef void (*osm_vend_mad_send_err_callback_t) (IN void *bind_context, + IN osm_madw_t * p_madw); +/* +* PARAMETERS +* bind_context +* [in] User context supplied during the bind call. +* +* p_madw +* [in] Pointer to the request mad that failed. +* +* RETURN VALUES +* None. +* +* NOTES +* The vendor layer does not call this function (or any other) +* for MADs that were not expecting a response. +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_new +* NAME +* osm_vendor_new +* +* DESCRIPTION +* Allocates and initializes a new osm_vendor_t object. +* OpenSM calls this function before any other in the vendor API. +* This object is passed as a parameter to all other vendor functions. +* +* SYNOPSIS +*/ +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout); +/* +* PARAMETERS +* p_log +* [in] Pointer to the log object to use. +* +* timeout +* [in] transaction timeout +* +* RETURN VALUES +* Returns a pointer to the vendor object. +* +* NOTES +* +* SEE ALSO +*********/ + +/****s* OpenSM Vendor API/osm_vendor_delete +* NAME +* osm_vendor_delete +* +* DESCRIPTION +* Dealocate the vendor object. +* +* SYNOPSIS +*/ +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend); +/* +* PARAMETERS +* pp_vend +* [in/out] pointer to pointer to vendor objcet to be deleted +* +* RETURN VALUES +* None +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_get_all_port_attr +* NAME +* osm_vendor_get_all_port_attr +* +* DESCRIPTION +* Returns an array of available port attribute structures. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports); +/* +* PARAMETERS +* p_vend +* [in] Pointer to the vendor object to initialize. +* +* p_attr_array +* [in/out] Pointer to pre-allocated array of port attributes. +* If it is NULL - then the command only updates the p_num_ports, +* and return IB_INSUFFICIENT_MEMORY. +* +* p_num_ports +* [in/out] Pointer to a variable to hold the total number of ports +* available on the local machine. +* +* RETURN VALUES +* IB_SUCCESS on success. +* IB_INSUFFICIENT_MEMORY if the attribute array was not large enough. +* The number of attributes needed is returned in num_guids. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_init +* NAME +* osm_vendor_init +* +* DESCRIPTION +* The osm_vendor_init function initializes the vendor transport layer. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, + IN const uint32_t timeout); +/* +* PARAMETERS +* p_vend +* [in] Pointer to the vendor object to initialize. +* +* p_log +* [in] Pointer to OpenSM's log object. Vendor code may +* use the log object to send messages to OpenSM's log. +* +* timeout +* [in] Transaction timeout value in milliseconds. +* A value of 0 disables timeouts. +* +* RETURN VALUE +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_bind +* NAME +* osm_vendor_bind +* +* DESCRIPTION +* The osm_vendor_bind function registers with the vendor transport layer +* per Mad Class per PortGuid for mad transport capability. +* +* SYNOPSIS +*/ +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_bind_info, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN osm_vend_mad_send_err_callback_t send_err_callback, + IN void *context); +/* +* PARAMETERS +* p_vend +* [in] pointer to the vendor object +* +* p_osm_bind_info +* [in] pointer to a struct defining the type of bind to perform. +* +* p_mad_pool +* [in] pointer to a mad wrappers pool to be used for allocating +* mad wrappers on send and receive. +* +* mad_recv_callback +* [in] the callback function to be invoked on mad receive. +* +* send_err_callback +* [in] the callback function to be invoked on mad transaction errors. +* +* context +* [in] the context to be provided to the callbacks as bind_ctx. +* +* RETURN VALUE +* On success, a valid bind handle. +* OSM_BIND_INVALID_HANDLE otherwise. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_unbind +* NAME +* osm_vendor_unbind +* +* DESCRIPTION +* Unbind the given bind handle (obtained by osm_vendor_bind). +* +* SYNOPSIS +*/ +void osm_vendor_unbind(IN osm_bind_handle_t h_bind); +/* +* PARAMETERS +* h_bind +* [in] the bind handle to release. +* +* RETURN VALUE +* NONE. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_get +* NAME +* osm_vendor_get +* +* DESCRIPTION +* Obtain a mad wrapper holding actual mad buffer to be sent via +* the transport. +* +* SYNOPSIS +*/ +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * const p_vend_wrap); +/* +* PARAMETERS +* h_bind +* [in] the bind handle obtained by calling osm_vendor_bind +* +* mad_size +* [in] the actual mad size required +* +* p_vend_wrap +* [out] the returned mad vendor wrapper +* +* RETURN VALUE +* IB_SUCCESS on succesful completion. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_send +* NAME +* osm_vendor_send +* +* DESCRIPTION +* +* SYNOPSIS +*/ +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, IN boolean_t const resp_expected); +/* +* PARAMETERS +* h_bind +* [in] the bind handle obtained by calling osm_vendor_bind +* +* p_madw +* [in] pointer to the Mad Wrapper structure for the MAD to be sent. +* +* resp_expected +* [in] boolean value declaring the mad as a request (expecting a response). +* +* RETURN VALUE +* IB_SUCCESS on succesful completion. +* +* NOTES +* 1. Only mads that expect a response are tracked for transaction competion. +* 2. A mad that does not expect a response is being put back immediately +* after being sent. +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_put +* NAME +* osm_vendor_put +* +* DESCRIPTION +* Return a mad vendor wrapper to the mad pool. It also means that the +* mad buffer is returned to the transport. +* +* SYNOPSIS +*/ +void +osm_vendor_put(IN osm_bind_handle_t h_bind, + IN osm_vend_wrap_t * const p_vend_wrap); +/* +* PARAMETERS +* h_bind +* [in] the bind handle obtained by calling osm_vendor_bind +* +* p_vend_wrap +* [in] pointer to the mad vendor wrapper to put back into the pool. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****i* OpenSM Vendor API/osm_vendor_local_lid_change +* NAME +* osm_vendor_local_lid_change +* +* DESCRIPTION +* Notifies the vendor transport layer that the local address +* has changed. This allows the vendor layer to perform housekeeping +* functions such as address vector updates. +* +* SYNOPSIS +*/ +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind); +/* +* PARAMETERS +* h_bind +* [in] the bind handle obtained by calling osm_vendor_bind +* +* RETURN VALUE +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_set_sm +* NAME +* osm_vendor_set_sm +* +* DESCRIPTION +* Modifies the port info for the bound port to set the "IS_SM" bit +* according to the value given (TRUE or FALSE). +* +* SYNOPSIS +*/ +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val); +/* +* PARAMETERS +* h_bind +* [in] bind handle for this port. +* +* is_sm_val +* [in] If TRUE - will set the is_sm to TRUE, if FALSE - will set the +* the is_sm to FALSE. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM Vendor API/osm_vendor_set_debug +* NAME +* osm_vendor_set_debug +* +* DESCRIPTION +* Modifies the vendor specific debug level. +* +* SYNOPSIS +*/ +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level); +/* +* PARAMETERS +* p_vend +* [in] vendor handle. +* +* level +* [in] vendor specific debug level. +* +* RETURN VALUE +* None. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_VENDOR_API_H_ */ diff --git a/include/vendor/osm_vendor_ibumad.h b/include/vendor/osm_vendor_ibumad.h new file mode 100644 index 0000000..e26afd3 --- /dev/null +++ b/include/vendor/osm_vendor_ibumad.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_VENDOR_UMAD_H_ +#define _OSM_VENDOR_UMAD_H_ + +#include +#include +#include +#include + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Vendor Access Layer (UMAD) +* NAME +* Vendor UMAD +* +* DESCRIPTION +* This file is the vendor specific file for the UMAD Infiniband API. +* +* AUTHOR +* +* +*********/ +#define OSM_DEFAULT_RETRY_COUNT 3 +#define OSM_UMAD_MAX_CAS UMAD_MAX_DEVICES +#define OSM_UMAD_MAX_PORTS_PER_CA 2 +#define OSM_UMAD_MAX_AGENTS 32 + +/****s* OpenSM: Vendor UMAD/osm_ca_info_t +* NAME +* osm_ca_info_t +* +* DESCRIPTION +* Structure containing information about local Channel Adapters. +* +* SYNOPSIS +*/ +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; +} osm_ca_info_t; +/* +* FIELDS +* guid +* Node GUID of the local CA. +* +* attr_size +* Size of the CA attributes for this CA. +* +* p_attr +* Pointer to dynamicly allocated CA Attribute structure. +* +* SEE ALSO +*********/ + +/****f* OpenSM: CA Info/osm_ca_info_get_num_ports +* NAME +* osm_ca_info_get_num_ports +* +* DESCRIPTION +* Returns the number of ports owned by this CA. +* +* SYNOPSIS +*/ +static inline uint8_t +osm_ca_info_get_num_ports(IN const osm_ca_info_t * const p_ca_info) +{ + return (p_ca_info->p_attr->num_ports); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* RETURN VUMADUE +* Returns the number of ports owned by this CA. +* +* NOTES +* +* SEE ALSO +*********/ + +/****s* OpenSM: Vendor UMAD/osm_bind_handle_t +* NAME +* osm_bind_handle_t +* +* DESCRIPTION +* handle returned by the vendor transport bind call. +* +* SYNOPSIS +*/ +typedef void *osm_bind_handle_t; +/***********/ + +typedef struct _umad_match { + ib_net64_t tid; + void *v; + uint32_t version; + uint8_t mgmt_class; +} umad_match_t; + +#define DEFAULT_OSM_UMAD_MAX_PENDING 1000 + +typedef struct vendor_match_tbl { + uint32_t last_version; + int max; + umad_match_t *tbl; +} vendor_match_tbl_t; + +typedef struct _osm_vendor { + osm_log_t *p_log; + uint32_t ca_count; + osm_ca_info_t *p_ca_info; + uint32_t timeout; + int max_retries; + osm_bind_handle_t agents[OSM_UMAD_MAX_AGENTS]; + char ca_names[OSM_UMAD_MAX_CAS][UMAD_CA_NAME_LEN]; + vendor_match_tbl_t mtbl; + umad_port_t umad_port; + pthread_mutex_t cb_mutex; + pthread_mutex_t match_tbl_mutex; + int umad_port_id; + void *receiver; + int issmfd; + char issm_path[256]; +} osm_vendor_t; + +#define OSM_BIND_INVALID_HANDLE NULL + +typedef struct _osm_vend_wrap { + int agent; + int size; + int retries; + void *umad; + osm_bind_handle_t h_bind; +} osm_vend_wrap_t; + +END_C_DECLS +#endif /* _OSM_VENDOR_UMAD_H_ */ diff --git a/include/vendor/osm_vendor_mlx.h b/include/vendor/osm_vendor_mlx.h new file mode 100644 index 0000000..bc0847b --- /dev/null +++ b/include/vendor/osm_vendor_mlx.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007,2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_H_ +#define _OSMV_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + Forward reference +*/ +struct _osm_pkt_randomizer; + +/* The structure behind the OSM Vendor handle */ + +typedef struct _osm_vendor { + + /* for holding common transport info - useful at ibmgt transport */ + void *p_transport_info; + + osm_log_t *p_log; + + /* Uniform timeout for every ACK/single MAD */ + uint32_t resp_timeout; + + /* Uniform timeout for every rmpp transaction */ + uint32_t ttime_timeout; + + /* All the bind handles associated with the vendor */ + cl_qlist_t bind_handles; + + /* run randomizer flag */ + boolean_t run_randomizer; + + /* Packet Randomizer object */ + struct _osm_pkt_randomizer *p_pkt_randomizer; + +} osm_vendor_t; + +/* Repeating the definitions in osm_vendor_api.h */ + +typedef void *osm_bind_handle_t; + +typedef struct _osm_vend_wrap { + ib_mad_t *p_mad; +} osm_vend_wrap_t; + +#ifndef OSM_BIND_INVALID_HANDLE +#define OSM_BIND_INVALID_HANDLE NULL +#endif + +/* The maximum number of retransmissions of the same MAD */ +#define OSM_DEFAULT_RETRY_COUNT 3 + +END_C_DECLS +#endif diff --git a/include/vendor/osm_vendor_mlx_defs.h b/include/vendor/osm_vendor_mlx_defs.h new file mode 100644 index 0000000..c4ee053 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_defs.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_DEFS_H_ +#define _OSMV_DEFS_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* The maximum number of outstanding MADs an RMPP sender can transmit */ +#define OSMV_RMPP_RECV_WIN 16 +/* Transaction Timeout = OSMV_TXN_TIMEOUT_FACTOR * Response Timeout */ +#define OSMV_TXN_TIMEOUT_FACTOR 128 +/************/ +/****s* OSM Vendor: Types/osmv_bind_obj_t +* NAME +* osmv_bind_obj_t +* +* DESCRIPTION +* The object managing a single bind context. +* The bind handle is a direct pointer to it. +* +* SYNOPSIS +*/ +typedef struct _osmv_bind_obj { + /* Used to signal when the struct is being destroyed */ + struct _osmv_bind_obj *magic_ptr; + + osm_vendor_t /*const */ * p_vendor; + + uint32_t hca_hndl; + uint32_t port_num; + + /* Atomic access protector */ + cl_spinlock_t lock; + + /* is_closing == TRUE --> the handle is being unbound */ + boolean_t is_closing; + + /* Event callbacks */ + osm_vend_mad_recv_callback_t recv_cb; + osm_vend_mad_send_err_callback_t send_err_cb; + /* ... and their context */ + void *cb_context; + + /* A pool to manage MAD wrappers */ + osm_mad_pool_t *p_osm_pool; + + /* each subvendor implements its own transport mgr */ + void *p_transp_mgr; + + /* The transaction DB */ + osmv_txn_mgr_t txn_mgr; + +} osmv_bind_obj_t; + +END_C_DECLS +#endif /* _OSMV_DEFS_H_ */ diff --git a/include/vendor/osm_vendor_mlx_dispatcher.h b/include/vendor/osm_vendor_mlx_dispatcher.h new file mode 100644 index 0000000..ba83f30 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_dispatcher.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_MAD_DISPATCHER_H_ +#define _OSMV_MAD_DISPATCHER_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + * NAME + * osmv_dispatch_mad + * + * DESCRIPTION + * Lower-level MAD dispatcher. + * Implements a switch between the following MAD consumers: + * (1) Non-RMPP consumer (DATA) + * (2) RMPP receiver (DATA/ABORT/STOP) + * (3) RMPP sender (ACK/ABORT/STOP) + * + * PARAMETERS + * h_bind The bind handle + * p_mad_buf The 256 byte buffer of individual MAD + * p_mad_addr The MAD originator's address + */ +ib_api_status_t +osmv_dispatch_mad(IN osm_bind_handle_t h_bind, + IN const void *p_mad_buf, + IN const osm_mad_addr_t * p_mad_addr); + +END_C_DECLS +#endif diff --git a/include/vendor/osm_vendor_mlx_hca.h b/include/vendor/osm_vendor_mlx_hca.h new file mode 100644 index 0000000..9b56943 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_hca.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_HCA_H_ +#define _OSMV_HCA_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#if defined( OSM_VENDOR_INTF_TS_NO_VAPI ) || defined( OSM_VENDOR_INTF_SIM ) +#define VAPI_hca_hndl_t uint32_t +#define VAPI_hca_id_t char* +#endif +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t const *p_vend, + IN ib_net64_t const guid, + OUT uint32_t * p_hca_hndl, + OUT char *p_hca_id, + OUT uint8_t * p_hca_idx, + OUT uint32_t * p_port_num); + +END_C_DECLS +#endif /* _OSMV_HCA_H_ */ diff --git a/include/vendor/osm_vendor_mlx_inout.h b/include/vendor/osm_vendor_mlx_inout.h new file mode 100644 index 0000000..868639b --- /dev/null +++ b/include/vendor/osm_vendor_mlx_inout.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_INOUT_H_ +#define _OSMV_INOUT_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#ifdef IN +#undef IN +#endif +#ifdef OUT +#undef OUT +#endif +#ifndef OSM_VENDOR_INTF_ANAFA +#ifndef OSM_VENDOR_INTF_TS_NO_VAPI +#ifndef OSM_VENDOR_INTF_SIM +#include +#endif +#endif +#endif +#ifndef IN +#define IN +#endif +#ifndef OUT +#define OUT +#endif +#ifndef OSM_VENDOR_INTF_TS_NO_VAPI +#ifndef OSM_VENDOR_INTF_ANAFA +#ifndef OSM_VENDOR_INTF_SIM +#include +#include +#endif +#endif +#endif +END_C_DECLS +#endif /* _OSMV_INOUT_H_ */ diff --git a/include/vendor/osm_vendor_mlx_rmpp_ctx.h b/include/vendor/osm_vendor_mlx_rmpp_ctx.h new file mode 100644 index 0000000..dac1f13 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_rmpp_ctx.h @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_RMPP_CTX_H +#define _OSMV_RMPP_CTX_H + +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef struct _osmv_rmpp_send_ctx { + + uint8_t status; + + uint32_t window_first; + uint32_t window_last; + + uint32_t mad_sz; + boolean_t is_sa_mad; + + cl_event_t event; + + /* Segmentation engine */ + osmv_rmpp_sar_t sar; + osm_log_t *p_log; + +} osmv_rmpp_send_ctx_t; + +typedef struct _osmv_rmpp_recv_ctx { + + boolean_t is_sa_mad; + + uint32_t expected_seg; + + /* Reassembly buffer */ + cl_qlist_t *p_rbuf; + + /* Reassembly engine */ + osmv_rmpp_sar_t sar; + osm_log_t *p_log; + +} osmv_rmpp_recv_ctx_t; + +/* + * NAME + * osmv_rmpp_send_ctx_init + * + * DESCRIPTION + * c'tor for rmpp_send_ctx obj + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_rmpp_send_ctx_init(osmv_rmpp_send_ctx_t * p_ctx, void *arbt_mad, + uint32_t mad_sz, osm_log_t * p_log); + +/* + * NAME + * osmv_rmpp_send_ctx_done + * + * DESCRIPTION + * d'tor for rmpp_send_ctx obj + * + * SEE ALSO + * + */ +void osmv_rmpp_send_ctx_done(IN osmv_rmpp_send_ctx_t * ctx); + +/* + * NAME + * osmv_rmpp_send_ctx_get_wf + * + * DESCRIPTION + * returns number of first segment in current window + * SEE ALSO + * + */ +static inline uint32_t +osmv_rmpp_send_ctx_get_wf(IN const osmv_rmpp_send_ctx_t * p_ctx) +{ + CL_ASSERT(p_ctx); + return p_ctx->window_first; +} + +/* + * NAME + * osmv_rmpp_send_ctx_set_wf + * + * DESCRIPTION + * sets number of first segment in current window + * SEE ALSO + * + */ +static inline void +osmv_rmpp_send_ctx_set_wf(IN osmv_rmpp_send_ctx_t * p_ctx, IN uint32_t val) +{ + CL_ASSERT(p_ctx); + p_ctx->window_first = val; +} + +/* + * NAME + * osmv_rmpp_send_ctx_get_wl + * + * DESCRIPTION + * returns number of last segment in current window + * SEE ALSO + * + */ +static inline uint32_t +osmv_rmpp_send_ctx_get_wl(IN const osmv_rmpp_send_ctx_t * p_send_ctx) +{ + CL_ASSERT(p_send_ctx); + return p_send_ctx->window_last; +} + +/* + * NAME + * osmv_rmpp_send_ctx_set_wl + * + * DESCRIPTION + * sets number of last segment in current window + * SEE ALSO + * + */ +static inline void +osmv_rmpp_send_ctx_set_wl(IN osmv_rmpp_send_ctx_t * p_ctx, IN uint32_t val) +{ + CL_ASSERT(p_ctx); + p_ctx->window_last = val; +} + +/* + * NAME + * osmv_rmpp_send_ctx_get_num_segs + * + * DESCRIPTION + * returns the total number of mad segments to send + * SEE ALSO + * + */ +uint32_t osmv_rmpp_send_ctx_get_num_segs(IN osmv_rmpp_send_ctx_t * p_send_ctx); + +/* + * NAME + * osmv_rmpp_send_ctx_get_seg + * + * DESCRIPTION + * Retrieves the mad segment by seg number (including setting the mad relevant bits & hdrs) + * SEE ALSO + * + */ +ib_api_status_t +osmv_rmpp_send_ctx_get_seg(IN osmv_rmpp_send_ctx_t * p_send_ctx, + IN uint32_t seg_idx, IN uint32_t resp_timeout, + OUT void *p_mad); + +/* + * NAME + * osmv_rmpp_recv_ctx_init + * + * DESCRIPTION + * c'tor for rmpp_recv_ctx obj + * SEE ALSO + * + */ +ib_api_status_t +osmv_rmpp_recv_ctx_init(osmv_rmpp_recv_ctx_t * p_ctx, osm_log_t * p_log); + +/* + * NAME + * osmv_rmpp_recv_ctx_done + * + * DESCRIPTION + * d'tor for rmpp_recv_ctx obj + * SEE ALSO + * + */ +void osmv_rmpp_recv_ctx_done(IN osmv_rmpp_recv_ctx_t * p_ctx); + +/* + * NAME + * osmv_rmpp_recv_ctx_get_es + * + * DESCRIPTION + * retrunes index of expected segement in the curr window + * + */ +static inline uint32_t +osmv_rmpp_recv_ctx_get_es(IN const osmv_rmpp_recv_ctx_t * p_recv_ctx) +{ + CL_ASSERT(p_recv_ctx); + return p_recv_ctx->expected_seg; +} + +/* + * NAME + * osmv_rmpp_recv_ctx_set_es + * + * DESCRIPTION + * sets index of expected segement in the curr window + * + */ +static inline void +osmv_rmpp_recv_ctx_set_es(IN osmv_rmpp_recv_ctx_t * p_recv_ctx, IN uint32_t val) +{ + CL_ASSERT(p_recv_ctx); + p_recv_ctx->expected_seg = val; +} + +/* + * NAME + * osmv_rmpp_recv_ctx_store_madw_seg + * + * DESCRIPTION + * stores rmpp mad in the list + * + */ +ib_api_status_t +osmv_rmpp_recv_ctx_store_mad_seg(IN osmv_rmpp_recv_ctx_t * p_recv_ctx, + IN void *p_mad); + +uint32_t +osmv_rmpp_recv_ctx_get_cur_byte_num(IN osmv_rmpp_recv_ctx_t * p_recv_ctx); + +uint32_t +osmv_rmpp_recv_ctx_get_byte_num_from_first(IN osmv_rmpp_recv_ctx_t * + p_recv_ctx); + +uint32_t +osmv_rmpp_recv_ctx_get_byte_num_from_last(IN osmv_rmpp_recv_ctx_t * p_recv_ctx); + +/* + * NAME + * osmv_rmpp_recv_ctx_reassemble_arbt_mad + * + * DESCRIPTION + * reassembles all rmpp buffs to one big arbitrary mad + */ +ib_api_status_t +osmv_rmpp_recv_ctx_reassemble_arbt_mad(IN osmv_rmpp_recv_ctx_t * p_recv_ctx, + IN uint32_t size, IN void *p_arbt_mad); + +END_C_DECLS +#endif diff --git a/include/vendor/osm_vendor_mlx_sar.h b/include/vendor/osm_vendor_mlx_sar.h new file mode 100644 index 0000000..a65b4f2 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_sar.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_SAR_H_ +#define _OSMV_SAR_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef struct _osmv_rmpp_sar { + void *p_arbt_mad; + uint32_t data_len; /* total data len in all the mads */ + /* these data members contain only constants */ + uint32_t hdr_sz; + uint32_t data_sz; /*typical data sz for this kind of mad (sa or regular */ + +} osmv_rmpp_sar_t; + +/* + * NAME + * osmv_rmpp_sar_alloc + * + * DESCRIPTION + * c'tor for rmpp_sar object + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_rmpp_sar_init(osmv_rmpp_sar_t * p_sar, void *p_arbt_mad, + uint32_t mad_size, boolean_t is_sa_mad); + +/* + * NAME + * osmv_rmpp_sar_dealloc + * + * DESCRIPTION + * d'tor for rmpp_sar object + * + * SEE ALSO + * + */ +void osmv_rmpp_sar_done(osmv_rmpp_sar_t * p_sar); + +/* + * NAME + * osmv_rmpp_sar_get_mad_seg + * + * DESCRIPTION + * segments the original mad buffer . returnes a mad with the data of the i-th segment + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_rmpp_sar_get_mad_seg(osmv_rmpp_sar_t * p_sar, uint32_t seg_idx, + void *p_buf); + +/* + * NAME + * osmv_rmpp_sar_reassemble_arbt_mad + * + * DESCRIPTION + * gets a qlist of mads and reassmbles to one big mad buffer + * ALSO - deallocates the mad list + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_rmpp_sar_reassemble_arbt_mad(osmv_rmpp_sar_t * p_sar, cl_qlist_t * p_bufs); + +END_C_DECLS +#endif /* _OSMV_SAR_H_ */ diff --git a/include/vendor/osm_vendor_mlx_sender.h b/include/vendor/osm_vendor_mlx_sender.h new file mode 100644 index 0000000..e849744 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_sender.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_RMPP_SENDER_H_ +#define _OSMV_RMPP_SENDER_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****d* OSM Vendor/osmv_simple_send_madw + * NAME + * osmv_simple_send_madw + * + * DESCRIPTION + * Send a single MAD (256 bytes). + * + * If this MAD requires a response, set the timeout event. + * The function call returns when the MAD's send completion is received. + * + */ +ib_api_status_t +osmv_simple_send_madw(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN osmv_txn_ctx_t * p_txn, IN boolean_t is_retry); + +/****d* OSM Vendor/osmv_rmpp_send_madw + * NAME + * osmv_rmpp_send_madw + * + * DESCRIPTION + * Send a single MAD wrapper (of arbitrary length). + * Follow the RMPP semantics + * (segmentation, send window, timeouts etc). + * + * The function call returns either when the whole MAD + * has been acknowledged, or upon error. + */ +ib_api_status_t +osmv_rmpp_send_madw(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN osmv_txn_ctx_t * p_txn, IN boolean_t is_rmpp_ds); + +/* + * NAME osmv_rmpp_send_ack + * + * DESCRIPTION + */ + +ib_api_status_t +osmv_rmpp_send_ack(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_req_mad, + IN uint32_t seg_num, + IN uint32_t nwl, IN const osm_mad_addr_t * p_mad_addr); + +/* + * NAME osmv_rmpp_send_nak + * + * DESCRIPTION Send the RMPP ABORT or STOP packet + */ + +ib_api_status_t +osmv_rmpp_send_nak(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_req_mad, + IN const osm_mad_addr_t * p_mad_addr, + IN uint8_t nak_type, IN uint8_t status); + +/* + * NAME osmv_rmpp_snd_error + * + * DESCRIPTION Mark an error status and signal the sender thread to handle it + */ + +static inline void +osmv_rmpp_snd_error(IN osmv_rmpp_send_ctx_t * p_send_ctx, + IN ib_api_status_t status) +{ + p_send_ctx->status = status; + + /* Release the thread waiting on send() + * It will release the transaction's context + */ + cl_event_signal(&p_send_ctx->event); +} + +END_C_DECLS +#endif /* _OSMV_RMPP_SENDER_H_ */ diff --git a/include/vendor/osm_vendor_mlx_svc.h b/include/vendor/osm_vendor_mlx_svc.h new file mode 100644 index 0000000..43a83e3 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_svc.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_SVC_H_ +#define _OSMV_SVC_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +inline static uint8_t osmv_invert_method(IN uint8_t req_method) +{ + switch (req_method) { + case IB_MAD_METHOD_GET_RESP: + /* Not a 1-1 mapping! */ + return IB_MAD_METHOD_GET; + + case IB_MAD_METHOD_GET: + return IB_MAD_METHOD_GET_RESP; + + case IB_MAD_METHOD_SET: + return IB_MAD_METHOD_GET_RESP; + + case IB_MAD_METHOD_GETTABLE_RESP: + return IB_MAD_METHOD_GETTABLE; + + case IB_MAD_METHOD_GETTABLE: + return IB_MAD_METHOD_GETTABLE_RESP; + + case IB_MAD_METHOD_GETMULTI_RESP: + /* Not a 1-1 mapping! */ + return IB_MAD_METHOD_GETMULTI; + + case IB_MAD_METHOD_GETTRACETABLE: + case IB_MAD_METHOD_GETMULTI: + return IB_MAD_METHOD_GETMULTI_RESP; + + case IB_MAD_METHOD_TRAP: + return IB_MAD_METHOD_TRAP_REPRESS; + + case IB_MAD_METHOD_TRAP_REPRESS: + return IB_MAD_METHOD_TRAP; + + case IB_MAD_METHOD_REPORT: + return IB_MAD_METHOD_REPORT_RESP; + + case IB_MAD_METHOD_REPORT_RESP: + return IB_MAD_METHOD_REPORT; + + /* IB_MAD_METHOD_SEND does not have a response */ + case IB_MAD_METHOD_SEND: + return IB_MAD_METHOD_SEND; + + default: + CL_ASSERT(FALSE); + } + + return 0; /* Just make the compiler happy */ +} + +inline static boolean_t osmv_mad_is_rmpp(IN const ib_mad_t * p_mad) +{ + uint8_t rmpp_flags; + CL_ASSERT(NULL != p_mad); + + rmpp_flags = ((ib_rmpp_mad_t *) p_mad)->rmpp_flags; + /* HACK - JUST SA and DevMgt for now - need to add BIS and DevAdm */ + if ((p_mad->mgmt_class != IB_MCLASS_SUBN_ADM) && + (p_mad->mgmt_class != IB_MCLASS_DEV_MGMT)) + return (0); + return (0 != (rmpp_flags & IB_RMPP_FLAG_ACTIVE)); +} + +inline static boolean_t osmv_mad_is_multi_resp(IN const ib_mad_t * p_mad) +{ + CL_ASSERT(NULL != p_mad); + return (IB_MAD_METHOD_GETMULTI == p_mad->method + || IB_MAD_METHOD_GETTRACETABLE == p_mad->method); +} + +inline static boolean_t osmv_mad_is_sa(IN const ib_mad_t * p_mad) +{ + CL_ASSERT(NULL != p_mad); + return (IB_MCLASS_SUBN_ADM == p_mad->mgmt_class); +} + +inline static boolean_t osmv_rmpp_is_abort_stop(IN const ib_mad_t * p_mad) +{ + uint8_t rmpp_type; + CL_ASSERT(p_mad); + + rmpp_type = ((ib_rmpp_mad_t *) p_mad)->rmpp_type; + return (IB_RMPP_TYPE_STOP == rmpp_type + || IB_RMPP_TYPE_ABORT == rmpp_type); +} + +inline static boolean_t osmv_rmpp_is_data(IN const ib_mad_t * p_mad) +{ + CL_ASSERT(p_mad); + return (IB_RMPP_TYPE_DATA == ((ib_rmpp_mad_t *) p_mad)->rmpp_type); +} + +inline static boolean_t osmv_rmpp_is_ack(IN const ib_mad_t * p_mad) +{ + CL_ASSERT(p_mad); + return (IB_RMPP_TYPE_ACK == ((ib_rmpp_mad_t *) p_mad)->rmpp_type); +} + +inline static boolean_t osmv_rmpp_is_first(IN const ib_mad_t * p_mad) +{ + uint8_t rmpp_flags; + CL_ASSERT(NULL != p_mad); + + rmpp_flags = ((ib_rmpp_mad_t *) p_mad)->rmpp_flags; + return (0 != (IB_RMPP_FLAG_FIRST & rmpp_flags)); +} + +inline static boolean_t osmv_rmpp_is_last(IN const ib_mad_t * p_mad) +{ + uint8_t rmpp_flags; + CL_ASSERT(NULL != p_mad); + + rmpp_flags = ((ib_rmpp_mad_t *) p_mad)->rmpp_flags; + return (0 != (IB_RMPP_FLAG_LAST & rmpp_flags)); +} + +inline static uint8_t *osmv_mad_copy(IN const ib_mad_t * p_mad) +{ + uint8_t *p_copy; + + CL_ASSERT(p_mad); + p_copy = malloc(MAD_BLOCK_SIZE); + + if (NULL != p_copy) { + memset(p_copy, 0, MAD_BLOCK_SIZE); + memcpy(p_copy, p_mad, MAD_BLOCK_SIZE); + } + + return p_copy; +} + +/* Should be passed externally from the Makefile */ +/* #define OSMV_RANDOM_DROP 1 */ +#define OSMV_DROP_RATE 0.3 + +inline static boolean_t osmv_random_drop(void) +{ + srand(1); /* Pick a new base */ + return (rand() / (double)RAND_MAX < OSMV_DROP_RATE); +} + +END_C_DECLS +#endif /* _OSMV_SVC_H_ */ diff --git a/include/vendor/osm_vendor_mlx_transport.h b/include/vendor/osm_vendor_mlx_transport.h new file mode 100644 index 0000000..2840e49 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_transport.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/** + * FILE osmv_transport.h + * AUTHOR Edward Bortnikov + * + * DESCRIPTION + * The lower-level MAD transport interface implementation + * that allows sending a single MAD/receiving a callback + * when a single MAD is received. +*/ + +#ifndef _OSMV_TRANSPORT_H_ +#define _OSMV_TRANSPORT_H_ + +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* + * NAME + * osmv_transport_init + * + * DESCRIPTION + * Setup the MAD transport infrastructure (filters, callbacks etc). + */ +#define VENDOR_HCA_MAXNAMES 32 +ib_api_status_t +osmv_transport_init(IN osm_bind_info_t * p_info, + IN char hca_id[VENDOR_HCA_MAXNAMES], + IN uint8_t hca_idx, IN osmv_bind_obj_t * p_bo); + +/* + * NAME + * osmv_transport_send_mad + * + * DESCRIPTION + * Send a single MAD (256 byte) + */ +ib_api_status_t +osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, + IN void *p_mad, IN const osm_mad_addr_t * p_mad_addr); + +/* + * NAME + * osmv_transport_done + * + * DESCRIPTION + * deallocator of transportation infrastructure + */ +void osmv_transport_done(IN const osm_bind_handle_t h_bind); + +END_C_DECLS +#endif /* _OSMV_TRANSPORT_H_ */ diff --git a/include/vendor/osm_vendor_mlx_transport_anafa.h b/include/vendor/osm_vendor_mlx_transport_anafa.h new file mode 100644 index 0000000..dac26ed --- /dev/null +++ b/include/vendor/osm_vendor_mlx_transport_anafa.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/** + * FILE osmv_transport.h + * AUTHOR Edward Bortnikov + * + * DESCRIPTION + * The lower-level MAD transport interface implementation + * that allows sending a single MAD/receiving a callback + * when a single MAD is received. +*/ + +#ifndef _OSMV_TRANSPORT_ANAFA_H_ +#define _OSMV_TRANSPORT_ANAFA_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +#define OSMV_ANAFA_ID 0 +typedef struct _osmv_TOPSPIN_ANAFA_transport_mgr_ { + int device_fd; + cl_thread_t receiver; +} osmv_TOPSPIN_ANAFA_transport_mgr_t; + +typedef struct _osmv_TOPSPIN_ANAFA_transport_info_ { + int device_fd; +} osmv_TOPSPIN_ANAFA_transport_info_t; + +END_C_DECLS +#endif /* _OSMV_TRANSPORT_ANAFA_H_ */ diff --git a/include/vendor/osm_vendor_mlx_txn.h b/include/vendor/osm_vendor_mlx_txn.h new file mode 100644 index 0000000..ce59191 --- /dev/null +++ b/include/vendor/osm_vendor_mlx_txn.h @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSMV_TXN_H_ +#define _OSMV_TXN_H_ + +#include +#include + +#include +#include +#include + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef enum _osmv_txn_rmpp_state { + + OSMV_TXN_RMPP_NONE = 0, /* Not part of RMPP transaction */ + + OSMV_TXN_RMPP_SENDER, + OSMV_TXN_RMPP_RECEIVER +} osmv_txn_rmpp_state_t; + +typedef struct _osmv_rmpp_txfr { + + osmv_txn_rmpp_state_t rmpp_state; + boolean_t is_rmpp_init_by_peer; + osmv_rmpp_send_ctx_t *p_rmpp_send_ctx; + osmv_rmpp_recv_ctx_t *p_rmpp_recv_ctx; + +} osmv_rmpp_txfr_t; + +typedef struct _osmv_txn_ctx { + + /* The original Transaction ID */ + uint64_t tid; + /* The key by which the Transaction is stored */ + uint64_t key; + + /* RMPP Send/Receive contexts, if applicable */ + osmv_rmpp_txfr_t rmpp_txfr; + + /* A MAD that was sent during the transaction (request or response) */ + osm_madw_t *p_madw; + + /* Reference to a log to enable tracing */ + osm_log_t *p_log; + +} osmv_txn_ctx_t; + +typedef struct _osmv_txn_mgr { + + /* Container of all the transactions */ + cl_qmap_t *p_txn_map; + + /* The timeouts DB */ + cl_event_wheel_t *p_event_wheel; + + /* Reference to a log to enable tracing */ + osm_log_t *p_log; + +} osmv_txn_mgr_t; + +/* * * * * * * osmv_txn_ctx_t functions * * * * * * * * */ + +/* + * NAME + * osmv_txn_init + * + * DESCRIPTION + * allocs & inits the osmv_txn_ctx obj and insert it into the db + * SEE ALSO + * + */ +ib_api_status_t +osmv_txn_init(IN osm_bind_handle_t h_bind, + IN uint64_t tid, IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn); + +/* + * NAME + * osmv_rmpp_txfr_init_sender + * + * DESCRIPTION + * init the rmpp send ctx in the transaction + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_txn_init_rmpp_sender(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, IN osm_madw_t * p_madw); + +/* + * NAME + * osmv_rmpp_txfr_init_receiver + * + * DESCRIPTION + * init the rmpp recv ctx in the transaction + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_txn_init_rmpp_receiver(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, + IN boolean_t is_init_by_peer); + +/* + * NAME + * osmv_txn_done + * + * DESCRIPTION + * destroys txn object and removes it from the db + * + * SEE ALSO + * + */ +void +osmv_txn_done(IN osm_bind_handle_t h_bind, + IN uint64_t key, IN boolean_t is_in_cb); +/* + * NAME + * osmv_txn_get_tid + * + * DESCRIPTION + * returns tid of the transaction + * SEE ALSO + * + */ +static inline uint64_t osmv_txn_get_tid(IN osmv_txn_ctx_t * p_txn) +{ + CL_ASSERT(NULL != p_txn); + return p_txn->tid; +} + +/* + * NAME + * osmv_txn_get_key + * + * DESCRIPTION + * returns key of the transaction + * SEE ALSO + * + */ + +static inline uint64_t osmv_txn_get_key(IN osmv_txn_ctx_t * p_txn) +{ + CL_ASSERT(NULL != p_txn); + return p_txn->key; +} + +/* + * NAME + * osmv_txn_is_rmpp_init_by_peer + * + * DESCRIPTION + * returns whether the rmpp txfr was init by the peer + * + * SEE ALSO + * + */ +static inline boolean_t osmv_txn_is_rmpp_init_by_peer(IN osmv_txn_ctx_t * p_txn) +{ + CL_ASSERT(NULL != p_txn); + return p_txn->rmpp_txfr.is_rmpp_init_by_peer; +} + +/* + * NAME + * osmv_txn_get_rmpp_send_ctx + * + * DESCRIPTION + * returns osmv_rmpp_send_ctx obj + * SEE ALSO + * + */ +static inline osmv_rmpp_send_ctx_t *osmv_txn_get_rmpp_send_ctx(IN osmv_txn_ctx_t + * p_txn) +{ + CL_ASSERT(NULL != p_txn); + return p_txn->rmpp_txfr.p_rmpp_send_ctx; +} + +/* + * NAME + * osmv_txn_get_rmpp_recv_ctx + * + * DESCRIPTION + * returns osmv_rmpp_recv_ctx obj + * SEE ALSO + * + */ +static inline osmv_rmpp_recv_ctx_t *osmv_txn_get_rmpp_recv_ctx(IN osmv_txn_ctx_t + * p_txn) +{ + CL_ASSERT(NULL != p_txn); + return p_txn->rmpp_txfr.p_rmpp_recv_ctx; +} + +/* + * NAME + * osmv_txn_get_rmpp_state + * + * DESCRIPTION + * returns the rmpp role of the transactino ( send/ recv) + * SEE ALSO + * + */ +static inline osmv_txn_rmpp_state_t +osmv_txn_get_rmpp_state(IN osmv_txn_ctx_t * p_txn) +{ + CL_ASSERT(NULL != p_txn); + return p_txn->rmpp_txfr.rmpp_state; +} + +/* + * NAME + * osmv_txn_set_rmpp_state + * + * DESCRIPTION + * sets the rmpp role of the transaction (send/ recv) + * SEE ALSO + * + */ +static inline void +osmv_txn_set_rmpp_state(IN osmv_txn_ctx_t * p_txn, + IN osmv_txn_rmpp_state_t state) +{ + CL_ASSERT(NULL != p_txn); + p_txn->rmpp_txfr.rmpp_state = state; +} + +/* + * NAME + * osmv_txn_get_madw + * + * DESCRIPTION + * returns the requester madw + * SEE ALSO + * + */ +static inline osm_madw_t *osmv_txn_get_madw(IN osmv_txn_ctx_t * p_txn) +{ + CL_ASSERT(NULL != p_txn); + return p_txn->p_madw; +} + +/* + * NAME + * osmv_txn_set_madw + * + * DESCRIPTION + * sets the requester madw + * SEE ALSO + * + */ +static inline void +osmv_txn_set_madw(IN osmv_txn_ctx_t * p_txn, IN osm_madw_t * p_madw) +{ + CL_ASSERT(NULL != p_txn); + p_txn->p_madw = p_madw; +} + +/* + * NAME + * osmv_txn_set_timeout_ev + * + * DESCRIPTION + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_txn_set_timeout_ev(IN osm_bind_handle_t h_bind, + IN uint64_t key, IN uint64_t msec); +/* + * NAME + * osmv_txn_remove_timeout_ev + * + * DESCRIPTION + + * SEE ALSO + * + */ +void osmv_txn_remove_timeout_ev(IN osm_bind_handle_t h_bind, IN uint64_t key); +/* + * NAME + * osmv_txn_lookup + * + * DESCRIPTION + * get a transaction by its key + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_txn_lookup(IN osm_bind_handle_t h_bind, + IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn); + +void osmv_txn_abort_rmpp_txns(IN osm_bind_handle_t h_bind); + +/* * * * * * * * * * * * */ +/* + * NAME + * osmv_txnmgr_init + * + * DESCRIPTION + * c'tor for txn mgr obj + * SEE ALSO + * + */ +ib_api_status_t +osmv_txnmgr_init(IN osmv_txn_mgr_t * p_tx_mgr, + IN osm_log_t * p_log, IN cl_spinlock_t * p_lock); + +/* + * NAME + * osmv_txnmgr_done + * + * DESCRIPTION + * c'tor for txn mgr obj + * SEE ALSO + * + */ +void osmv_txnmgr_done(IN osm_bind_handle_t h_bind); + +void osmv_txn_lock(IN osm_bind_handle_t h_bind); +void osmv_txn_unlock(IN osm_bind_handle_t h_bind); + +inline static uint64_t osmv_txn_uniq_key(IN uint64_t tid) +{ + uint64_t pid = getpid(); + + return ((pid << 32) | (tid & 0xFFFFFFFF)); +} + +END_C_DECLS +#endif /* _OSMV_TXN_H_ */ diff --git a/include/vendor/osm_vendor_mtl.h b/include/vendor/osm_vendor_mtl.h new file mode 100644 index 0000000..df48260 --- /dev/null +++ b/include/vendor/osm_vendor_mtl.h @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Definition of interface for the MTL Vendor + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_VENDOR_MTL_H_ +#define _OSM_VENDOR_MTL_H_ + +#undef IN +#undef OUT +#include +#include +#include +#define IN +#define OUT +#include "iba/ib_types.h" +#include "iba/ib_al.h" +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/Vendor MTL +* NAME +* Vendor MTL +* +* DESCRIPTION +* +* The Vendor MTL object is thread safe. +* +* This object should be treated as opaque and should be +* manipulated only through the provided functions. +* +* +* AUTHOR +* +* +*********/ +/****s* OpenSM: Vendor MTL/osm_ca_info_t +* NAME +* osm_ca_info_t +* +* DESCRIPTION +* Structure containing information about local Channle Adapters. +* +* SYNOPSIS +*/ +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; + +} osm_ca_info_t; + +/* +* FIELDS +* guid +* Node GUID of the local CA. +* +* attr_size +* Size of the CA attributes for this CA. +* +* p_attr +* Pointer to dynamicly allocated CA Attribute structure. +* +* SEE ALSO +*********/ + +#define OSM_DEFAULT_RETRY_COUNT 3 + +/***** OpenSM: Vendor MTL/osm_vendor_t +* NAME +* osm_vendor_t +* +* DESCRIPTION +* The structure defining a vendor +* +* SYNOPSIS +*/ +typedef struct _osm_vendor { + ib_al_handle_t h_al; + osm_log_t *p_log; + uint32_t ca_count; + osm_ca_info_t *p_ca_info; + uint32_t timeout; + struct osm_transaction_mgr_t *p_transaction_mgr; +} osm_vendor_t; + +/* +* FIELDS +* h_al +* Handle returned by MTL open call (ib_open_al). +* +* p_log +* Pointer to the log object. +* +* ca_count +* Number of CA's in the array pointed to by p_ca_info. +* +* p_ca_info +* Pointer to dynamically allocated array of CA info objects. +* +* timeout +* Transaction timeout time in milliseconds. +* +* p_transaction_mgr +* Pointer to Transaction Manager. +* +* SEE ALSO +*********/ + +/****f* OpenSM: Vendor MTL/CA Info/osm_ca_info_get_port_guid +* NAME +* osm_ca_info_get_port_guid +* +* DESCRIPTION +* Returns the port GUID of the specified port owned by this CA. +* +* SYNOPSIS +*/ +static inline ib_net64_t +osm_ca_info_get_port_guid(IN const osm_ca_info_t * const p_ca_info, + IN const uint8_t index) +{ + return (p_ca_info->p_attr->p_port_attr[index].port_guid); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* index +* [in] Port "index" for which to retrieve the port GUID. +* The index is the offset into the ca's internal array +* of port attributes. +* +* RETURN VALUE +* Returns the port GUID of the specified port owned by this CA. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Vendor MTL/CA Info/osm_ca_info_get_num_ports +* NAME +* osm_ca_info_get_num_ports +* +* DESCRIPTION +* Returns the number of ports of the given ca_info +* +* SYNOPSIS +*/ +static inline uint8_t +osm_ca_info_get_num_ports(IN const osm_ca_info_t * const p_ca_info) +{ + return (p_ca_info->p_attr->num_ports); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* RETURN VALUE +* Returns the number of CA ports +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: SM Vendor/osm_vendor_get_guid_ca_and_port + * NAME + * osm_vendor_get_guid_ca_and_port + * + * DESCRIPTION + * Given the vendor obj and a guid + * return the ca id and port number that have that guid + * + * SYNOPSIS + */ +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, + IN ib_net64_t const guid, + OUT VAPI_hca_hndl_t * p_hca_hndl, + OUT VAPI_hca_id_t * p_hca_id, + OUT uint32_t * p_port_num); + +/* +* PARAMETERS +* p_vend +* [in] Pointer to an osm_vendor_t object. +* +* guid +* [in] The guid to search for. +* +* p_hca_id +* [out] The HCA Id (VAPI_hca_id_t *) that the port is found on. +* +* p_port_num +* [out] Pointer to a port number arg to be filled with the port number with the given guid. +* +* RETURN VALUES +* IB_SUCCESS on SUCCESS +* IB_INVALID_GUID if the guid is notfound on any Local HCA Port +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: Vendor MTL/osm_vendor_get_all_port_attr + * NAME + * osm_vendor_get_all_port_attr + * + * DESCRIPTION + * Fill in the array of port_attr with all available ports on ALL the + * avilable CAs on this machine. + * ALSO - + * UPDATE THE VENDOR OBJECT LIST OF CA_INFO STRUCTS + * + * SYNOPSIS + */ +ib_api_status_t osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * + const p_attr_array, + IN uint32_t * const p_num_ports); + +/* +* PARAMETERS +* p_vend +* [in] Pointer to an osm_vendor_t object. +* +* p_attr_array +* [out] Pre-allocated array of port attributes to be filled in +* +* p_num_ports +* [out] The size of the given array. Filled in by the actual numberof ports found. +* +* RETURN VALUES +* IB_SUCCESS if OK +* IB_INSUFFICIENT_MEMORY if not enough place for all ports was provided. +* +* NOTES +* +* SEE ALSO +*********/ + +#define OSM_BIND_INVALID_HANDLE 0 + +/****s* OpenSM: Vendor MTL/osm_bind_handle_t +* NAME +* osm_bind_handle_t +* +* DESCRIPTION +* handle returned by the vendor transport bind call. +* +* SYNOPSIS +*/ +typedef void *osm_bind_handle_t; + +/***********/ + +/****s* OpenSM: Vendor MTL/osm_vend_wrap_t +* NAME +* MTL Vendor MAD Wrapper +* +* DESCRIPTION +* MTL specific MAD wrapper. MTL transport layer uses this for +* housekeeping. +* +* SYNOPSIS +*********/ +typedef struct _osm_vend_wrap_t { + uint32_t size; + osm_bind_handle_t h_bind; + // ib_av_handle_t h_av; + ib_mad_t *mad_buf_p; + void *p_resp_madw; +} osm_vend_wrap_t; + +/* +* FIELDS +* size +* Size of the allocated MAD +* +* h_bind +* Bind handle used on this transaction +* +* h_av +* Address vector handle used for this transaction. +* +* p_resp_madw +* Pointer to the mad wrapper structure used to hold the pending +* reponse to the mad, if any. If a response is expected, the +* wrapper for the reponse is allocated during the send call. +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_VENDOR_MTL_H_ */ diff --git a/include/vendor/osm_vendor_mtl_hca_guid.h b/include/vendor/osm_vendor_mtl_hca_guid.h new file mode 100644 index 0000000..1b3da88 --- /dev/null +++ b/include/vendor/osm_vendor_mtl_hca_guid.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Provides interface over VAPI for obtaining the local ports guids or from guid + * obtaining the HCA and port number. + */ + +#ifndef _OSM_VENDOR_HCA_GUID_H_ +#define _OSM_VENDOR_HCA_GUID_H_ + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****s* OpenSM: Vendor AL/osm_ca_info_t +* NAME +* osm_ca_info_t +* +* DESCRIPTION +* Structure containing information about local Channle Adapters. +* +* SYNOPSIS +*/ +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; + +} osm_ca_info_t; + +/* +* FIELDS +* guid +* Node GUID of the local CA. +* +* attr_size +* Size of the CA attributes for this CA. +* +* p_attr +* Pointer to dynamicly allocated CA Attribute structure. +* +* SEE ALSO +*********/ + +/****f* OpenSM: CA Info/osm_ca_info_get_port_guid +* NAME +* osm_ca_info_get_port_guid +* +* DESCRIPTION +* Returns the port GUID of the specified port owned by this CA. +* +* SYNOPSIS +*/ +static inline ib_net64_t +osm_ca_info_get_port_guid(IN const osm_ca_info_t * const p_ca_info, + IN const uint8_t index) +{ + return (p_ca_info->p_attr->p_port_attr[index].port_guid); +} + +/* +* PARAMETERS +* p_ca_info +* [in] Pointer to a CA Info object. +* +* index +* [in] Port "index" for which to retrieve the port GUID. +* The index is the offset into the ca's internal array +* of port attributes. +* +* RETURN VALUE +* Returns the port GUID of the specified port owned by this CA. +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: SM Vendor/osm_vendor_get_guid_ca_and_port + * NAME + * osm_vendor_get_guid_ca_and_port + * + * DESCRIPTION + * Given the vendor obj and a guid + * return the ca id and port number that have that guid + * + * SYNOPSIS + */ +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, + IN ib_net64_t const guid, + OUT VAPI_hca_id_t * p_hca_id, + OUT uint32_t * p_port_num); + +/* +* PARAMETERS +* p_vend +* [in] Pointer to an osm_vendor_t object. +* +* guid +* [in] The guid to search for. +* +* p_hca_id +* [out] The HCA Id (VAPI_hca_id_t *) that the port is found on. +* +* p_port_num +* [out] Pointer to a port number arg to be filled with the port number with the given guid. +* +* RETURN VALUES +* IB_SUCCESS on SUCCESS +* IB_INVALID_GUID if the guid is notfound on any Local HCA Port +* +* NOTES +* +* SEE ALSO +*********/ + +/****f* OpenSM: SM Vendor/osm_vendor_get_all_port_attr + * NAME + * osm_vendor_get_all_port_attr + * + * DESCRIPTION + * Fill in the array of port_attr with all available ports on ALL the + * avilable CAs on this machine. + * ALSO - + * UPDATE THE VENDOR OBJECT LIST OF CA_INFO STRUCTS + * + * SYNOPSIS + */ +ib_api_status_t osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * + const p_attr_array, + IN uint32_t * const p_num_ports); + +/* +* PARAMETERS +* p_vend +* [in] Pointer to an osm_vendor_t object. +* +* p_attr_array +* [out] Pre-allocated array of port attributes to be filled in +* +* p_num_ports +* [out] The size of the given array. Filled in by the actual numberof ports found. +* +* RETURN VALUES +* IB_SUCCESS if OK +* IB_INSUFFICIENT_MEMORY if not enough place for all ports was provided. +* +* NOTES +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_VENDOR_HCA_GUID_H_ */ diff --git a/include/vendor/osm_vendor_mtl_transaction_mgr.h b/include/vendor/osm_vendor_mtl_transaction_mgr.h new file mode 100644 index 0000000..6ec5b86 --- /dev/null +++ b/include/vendor/osm_vendor_mtl_transaction_mgr.h @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Definition of interface for the MTL Vendor + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_TRANSACTION_MGR_H_ +#define _OSM_TRANSACTION_MGR_H_ + + /* + #include + #include + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef OSM_VENDOR_INTF_MTL +#include +#include +#endif + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****s* OpenSM: Transaction Manager/osm_madw_req_t +* NAME +* osm_madw_req_t +* +* DESCRIPTION +* The structure defining each object in the transaction_mgr. +* For every request mad sent, we will save such an object for it. +* +* SYNOPSIS +*/ +typedef struct _osm_madw_req { + cl_list_item_t list_item; + cl_map_item_t map_item; + osm_madw_t *p_madw; + uint64_t waking_time; + uint8_t retry_cnt; + osm_bind_handle_t *p_bind; +} osm_madw_req_t; + +/* +* FIELDS +* list_item +* List item for qlist linkage. Must be first element!! +* +* map_item +* Map item for qmap linkage. +* +* p_madw +* pointer to mad wrapper that is expecting to get a response. +* +* waking_time +* Time stamp (in microseconds) when the p_madw needs to wake up. +* This value is +* cl_get_time_stamp() + timeout during the sending of the mad. +* where timeout should be given in microseconds. +* +* retry_cnt +* The number of outstanding retries to be called. +*********/ + +/****s* OpenSM: Transaction Manager/osm_transaction_mgr_t +* NAME +* osm_transaction_mgr_t +* +* DESCRIPTION +* This structure defines the transaction manager. +* It holds a qlist and a qmap, a lock on the transaction manager, and +* a timer used for the list. +* The manager is responsible for keeping track of every request mad that was +* sent. It is used for finding mads according to their transaction id, and for +* acting as an event wheel - reporting as error each packet was supposed to get +* a response and didn't get one by the timeout time expected. +* +* Both the list and the map hold the osm_madw_req_t objects - one for every madw. +* +* Managing of the list: +* The timer wakes on the timeout of the first madw. If the waking_time is greater than +* the current time - then the mad received a response. If not - the mad didn't get +* its response. +* +* SYNOPSIS +*/ +typedef struct _osm_transaction_mgr { + cl_qmap_t *madw_by_tid_map_p; + cl_qlist_t *madw_reqs_list_p; + cl_spinlock_t transaction_mgr_lock; + cl_timer_t madw_list_timer; +} osm_transaction_mgr_t; + +/* +* FIELDS +* madw_by_tid_map_p +* A qmap with key = transaction id. and value of osm_madw_req_t. +* +* madw_reqs_list_p +* A qlist of all the madw with their waking time. +* +* transaction_mgr_lock +* Lock used on the transaction manager - make sure changes on it are serial. +* +* madw_list_timer +* Timer on the list. +*********/ + +/****f* OpenSM: Transaction Manager/osm_transaction_mgr_init +* NAME +* osm_transaction_mgr_init +* +* DESCRIPTION +* Initialize the transaction manager. +* Will update the p_transaction_mgr in the vendor object with +* the new Transaction Manager created.* +* +* SYNOPSIS +*/ +void osm_transaction_mgr_init(IN osm_vendor_t * const p_vend); + +/* +* PARAMETERS +* p_vend +* [in] Pointer to a Osm Vendor object. +* +*********/ + +/****f* OpenSM: Transaction Manager/osm_transaction_mgr_destroy +* NAME +* osm_transaction_mgr_destroy +* +* DESCRIPTION +* Destroy the transaction manager. +* Will de-allocate all memory allocated by the Transaction +* Manager up to now. +* +* SYNOPSIS +*/ +void osm_transaction_mgr_destroy(IN osm_vendor_t * const p_vend); + +/* +* PARAMETERS +* p_vend +* [in] Pointer to a Osm Vendor object. +* +*********/ + +/****f* OpenSM: Transaction Manager/osm_transaction_mgr_insert_madw +* NAME +* osm_transaction_mgr_insert_madw +* +* DESCRIPTION +* Insert a new madw to the manager. The madw is added with a waking_time, +* Which is equal to the current_time + timeout. This is the maximum time +* that the madw can leave without being handled (e.g - get a response). +* If there are no madw saved in the manager - start the timer for vendor +* timeout period. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_transaction_mgr_insert_madw(IN osm_bind_handle_t * p_bind, + IN osm_madw_t * p_madw); +/* +* PARAMETERS +* p_vend +* [in] Pointer to a mtl bind object. +* +* p_madw +* [in] Pointer to the Mad Wrapper to be added. +* +*********/ + +/****f* OpenSM: Transaction Manager/osm_transaction_mgr_erase_madw +* NAME +* osm_transaction_mgr_erase_madw +* +* DESCRIPTION +* Erase a madw object from the manager. +* The removal is done using the transaction id of the mad - using +* it the madw_p is allocated (in the qmap) and removed from the +* qmap and qlist. +* +* SYNOPSIS +*/ +ib_api_status_t +osm_transaction_mgr_erase_madw(IN osm_vendor_t * const p_vend, + IN ib_mad_t * p_mad); +/* +* PARAMETERS +* p_vend +* [in] Pointer to a Osm Vendor object. +* +* p_mad +* [in] Pointer to the Mad to be removed. +* +*********/ + +/****f* OpenSM: Transaction Manager/osm_transaction_mgr_get_madw_for_tid +* NAME +* osm_transaction_mgr_get_madw_for_tid +* +* DESCRIPTION +* Return the mad wrapper, given the p_mad (and in it the transaction id) +* +* SYNOPSIS +*/ +ib_api_status_t +osm_transaction_mgr_get_madw_for_tid(IN osm_vendor_t * const p_vend, + IN ib_mad_t * const p_mad, + OUT osm_madw_t ** req_madw_p); +/* +* PARAMETERS +* p_vend +* [in] Pointer to a Osm Vendor object. +* +* p_mad +* [in] Pointer to the Mad to be located. +* +* req_madw_p +* [out] Pointer to the mad Wrapper to be found. +* +*********/ + +/****f* OpenSM: Transaction Manager/osm_transaction_mgr_callback +* NAME +* osm_transaction_mgr_callback +* +* DESCRIPTION +* This callback is called on timeout of the timer. +* It checks the time of the head madw in the qlist, and compares it to +* the current time. +* Will send an error callback if the time of the madw is less than the +* current time - this means that the madw wasn't removed in the timeout +* it was supposed to be handled. +* +* SYNOPSIS +*/ +void osm_transaction_mgr_callback(IN void *context); +/* +* PARAMETERS +* context +* [in] void* context +* +*********/ + +END_C_DECLS +#endif /* _OSM_TRANSACTION_MGR_H_ */ diff --git a/include/vendor/osm_vendor_sa_api.h b/include/vendor/osm_vendor_sa_api.h new file mode 100644 index 0000000..d5f9376 --- /dev/null +++ b/include/vendor/osm_vendor_sa_api.h @@ -0,0 +1,874 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Specification of the OpenSM SA Client API. This API uses the basic osm + * vendor API to provide SA Client interface. + */ + +#ifndef _OSM_VENDOR_SA_API_H_ +#define _OSM_VENDOR_SA_API_H_ + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****d* OpenSM Vendor SA Client/osmv_flags_t +* NAME +* osmv_flags_t +* +* DESCRIPTION +* Access layer flags used to direct the operation of various calls. +* +* SYNOPSIS +*/ +typedef uint32_t osmv_flags_t; +#define OSM_SA_FLAGS_SYNC 0x00000001 +/* +* VALUES +* OSM_SA_FLAGS_SYNC +* Indicates that the given operation should be performed synchronously. +* The call will block until it completes. Callbacks will still be +* invoked. +* +* SEE ALSO +* osmv_query_sa +*****/ + +/****d* OpenSM Vendor SA Client/osmv_query_type_t +* NAME +* osmv_query_type_t +* +* DESCRIPTION +* Abstracted queries supported by the access layer. +* +* SYNOPSIS +*/ +typedef enum _osmv_query_type { + OSMV_QUERY_USER_DEFINED, + + OSMV_QUERY_ALL_SVC_RECS, + OSMV_QUERY_SVC_REC_BY_NAME, + OSMV_QUERY_SVC_REC_BY_ID, + + OSMV_QUERY_CLASS_PORT_INFO, + + OSMV_QUERY_NODE_REC_BY_NODE_GUID, + OSMV_QUERY_PORT_REC_BY_LID, + OSMV_QUERY_PORT_REC_BY_LID_AND_NUM, + + OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK, + OSMV_QUERY_SLVL_BY_LID_AND_PORTS, + + OSMV_QUERY_PATH_REC_BY_PORT_GUIDS, + OSMV_QUERY_PATH_REC_BY_GIDS, + OSMV_QUERY_PATH_REC_BY_LIDS, + + OSMV_QUERY_UD_MULTICAST_SET, + OSMV_QUERY_UD_MULTICAST_DELETE, + + OSMV_QUERY_MULTIPATH_REC, + +} osmv_query_type_t; +/* +* VALUES +* OSMV_QUERY_USER_DEFINED +* Query the SA based on user-defined input. Queries of this type +* should reference an osmv_user_query_t structure as input to the +* query. +* +* OSMV_QUERY_SVC_REC_BY_NAME +* Query for service records based on the service name. Queries of +* this type should reference an ib_svc_name_t structure as input +* to the query. +* +* OSMV_QUERY_SVC_REC_BY_ID +* Query for service records based on the service ID. Queries of +* this type should reference an ib_net64_t value that indicates +* the ID of the service being requested. +* +* OSMV_QUERY_NODE_REC_BY_NODE_GUID +* Query for node information based on the node's GUID. Queries of +* this type should reference an ib_net64_t value that indicates +* the GUID of the node being requested. +* +* OSMV_QUERY_PORT_REC_BY_LID +* Query for port information based on the port's base LID. Queries +* of this type should reference an ib_net16_t value that indicates +* the base LID of the port being requested. +* +* OSMV_QUERY_PORT_REC_BY_LID_AND_NUM +* Query for port information based on the port's LID and port num. +* Queries of this type should reference an osmv_user_query_t +* structure as input to the query. The port num and lid should +* be provided by it. +* +* OSMV_QUERY_PATH_REC_BY_PORT_GUIDS +* Query for path records between the specified pair of port GUIDs. +* Queries of this type should reference an osmv_guid_pair_t +* structure that indicates the GUIDs of the path being requested. +* +* OSMV_QUERY_PATH_REC_BY_GIDS +* Query for path records between the specified pair of port GIDs. +* Queries of this type should reference an osmv_gid_pair_t +* structure that indicates the GIDs of the path being requested. +* +* OSMV_QUERY_PATH_REC_BY_LIDS +* Query for path records between the specified pair of port LIDs. +* Queries of this type should reference an osmv_lid_pair_t +* structure that indicates the LIDs of the path being requested. +* +* NOTES +* This enum is used to define abstracted queries provided by the access +* layer. Users may issue queries not listed here by sending MADs directly +* to subnet administration or a class manager. These queries are +* intended to represent those most often used by clients. +* +* SEE ALSO +* osmv_query, osmv_query_req_t, osmv_user_query_t, osmv_gid_pair_t, +* osmv_lid_pair_t osmv_guid_pair_t +*****/ + +/****s* OpenSM Vendor SA Client/osmv_user_query_t +* NAME +* osmv_user_query_t +* +* DESCRIPTION +* User-defined query information. +* +* SYNOPSIS +*/ +typedef struct _osmv_user_query { + uint8_t method; + ib_net16_t attr_id; + ib_net16_t attr_offset; + ib_net32_t attr_mod; + ib_net64_t comp_mask; + void *p_attr; +} osmv_user_query_t; +/* +* FIELDS +* +* method +* Method to be used +* +* attr_id +* Attribute identifier of query data. +* +* attr_offset +* Size of the query attribute, in 8-byte words. Users can set +* this value by passing in the sizeof( attribute ) into the +* ib_get_attr_offset() routine. +* +* attr_mod +* Attribute modifier for query request. +* +* comp_mask +* Indicates the attribute components that are specified for the +* query. +* +* p_attr +* References the attribute structure used as input into the query. +* This field is ignored if comp_mask is set to 0. +* +* NOTES +* This structure is used to describe a user-defined query. The attribute +* ID, attribute offset, component mask, and attribute structure must match +* those defined by the IBA specification. Users should refer to chapter +* 15 of the IBA specification for additional details. +* +* SEE ALSO +* osmv_query_type_t, ib_get_attr_offset, ib_get_attr_size, osmv_query_sa +*****/ + +/****s* OpenSM Vendor SA Client/osmv_gid_pair_t +* NAME +* osmv_gid_pair_t +* +* DESCRIPTION +* Source and destination GIDs. +* +* SYNOPSIS +*/ +typedef struct _osmv_gid_pair { + ib_gid_t src_gid; + ib_gid_t dest_gid; +} osmv_gid_pair_t; +/* +* FIELDS +* src_gid +* Source GID of a path. +* +* dest_gid +* Destination GID of a path. +* +* NOTES +* This structure is used to describe the endpoints of a path. +* +* SEE ALSO +* ib_gid_t +*****/ + +/****s* OpenSM Vendor SA Client/osmv_lid_pair_t +* NAME +* osmv_lid_pair_t +* +* DESCRIPTION +* Source and destination LIDs. +* +* SYNOPSIS +*/ +typedef struct _osmv_lid_pair { + ib_net16_t src_lid; + ib_net16_t dest_lid; +} osmv_lid_pair_t; +/* +* FIELDS +* src_lid +* Source LID of a path. +* +* dest_lid +* Destination LID of a path. +* +* NOTES +* This structure is used to describe the endpoints of a path. +*****/ + +/****s* OpenSM Vendor SA Client/osmv_guid_pair_t +* NAME +* osmv_guid_pair_t +* +* DESCRIPTION +* Source and destination GUIDs. These may be port or channel adapter +* GUIDs, depending on the context in which this structure is used. +* +* SYNOPSIS +*/ +typedef struct _osmv_guid_pair { + ib_net64_t src_guid; + ib_net64_t dest_guid; +} osmv_guid_pair_t; +/* +* FIELDS +* src_guid +* Source GUID of a path. +* +* dest_guid +* Destination GUID of a path. +* +* NOTES +* This structure is used to describe the endpoints of a path. The given +* GUID pair may belong to either ports or channel adapters. +* +* SEE ALSO +* ib_guid_t +*****/ + +/****s* OpenSM Vendor SA Client/osmv_multipath_req_t +* NAME +* osmv_multipath_req_t +* +* DESCRIPTION +* Fields from which to generate a MultiPathRecord request. +* +* SYNOPSIS +*/ +typedef struct _osmv_multipath_req_t { + ib_net64_t comp_mask; + uint16_t pkey; + boolean_t reversible; + uint8_t num_path; + uint8_t sl; + uint8_t independence; + uint8_t sgid_count; + uint8_t dgid_count; + ib_gid_t gids[IB_MULTIPATH_MAX_GIDS]; +} osmv_multipath_req_t; +/* +* FIELDS +* +* NOTES +* This structure is used to describe a multipath request. +* +* SEE ALSO +*****/ + +/****s* OpenSM Vendor SA Client/osmv_query_res_t +* NAME +* osmv_query_res_t +* +* DESCRIPTION +* Contains the results of a subnet administration query. +* +* SYNOPSIS +*/ +typedef struct _osmv_query_res { + const void *query_context; + ib_api_status_t status; + osmv_query_type_t query_type; + uint32_t result_cnt; + osm_madw_t *p_result_madw; +} osmv_query_res_t; +/* +* FIELDS +* query_context +* User-defined context information associated with the query +* through the osm_vendor_query_sa call. +* +* status +* Indicates the success of the query operation. +* +* query_type +* Indicates the type of query for which the results are being +* returned. This matches the query_type specified through the +* osm_vendor_query_sa call. +* +* result_cnt +* The number of result structures that were returned by the query. +* +* p_result_madw +* For queries returning IB_SUCCESS or IB_REMOTE_ERROR, this +* references the MAD wrapper returned by subnet administration +* containing the list of results or the returned error code. +* +* NOTES +* A query result structure is returned to a client through their +* osmv_pfn_query_cb_t routine to notify them of the results of a subnet +* administration query. If the query was successful or received an error +* from subnet administration, p_result_madw will reference a MAD wrapper +* containing the results. The MAD referenced by p_result_madw is owned by +* the user and remains available even after their callback returns. Users +* must call osm_mad_pool_put() to return the MAD wrapper back to the +* mad pool when they are done accessing the results. +* +* To retrieve individual result structures from the p_result_madw, users +* may call osmv_get_query_result(). +* +* SEE ALSO +* osmv_query_sa, osmv_pfn_query_cb_t, ib_api_status_t, +* osmv_query_status_t, osmv_query_type_t, +* osmv_get_query_result +*****/ + +/****f* OpenSM Vendor SA Client/osmv_get_query_result +* NAME +* osmv_get_query_result +* +* DESCRIPTION +* Retrieves a result structure from a MADW returned by a call to +* osmv_query_sa(). +* +* SYNOPSIS +*/ +static inline void *osmv_get_query_result(IN osm_madw_t * p_result_madw, + IN uint32_t result_index) +{ + ib_sa_mad_t *p_sa_mad; + + CL_ASSERT(p_result_madw); + p_sa_mad = (ib_sa_mad_t *) osm_madw_get_mad_ptr(p_result_madw); + CL_ASSERT(p_sa_mad); + CL_ASSERT(ib_get_attr_size(p_sa_mad->attr_offset) * (result_index + 1) + + IB_SA_MAD_HDR_SIZE <= p_result_madw->mad_size); + + return (p_sa_mad->data + + (ib_get_attr_size(p_sa_mad->attr_offset) * result_index)); +} + +/* +* PARAMETERS +* p_result_madw +* [in] This is a reference to the MAD returned as a result of the +* query. +* +* result_index +* [in] A zero-based index indicating which result to return. +* +* NOTES +* This call returns a pointer to the start of a result structure from a +* call to osmv_query_sa(). The type of result structure must be known to +* the user either through the user's context or the query_type returned as +* part of the osmv_query_res_t structure. +* +* SEE ALSO +* osmv_query_res_t, osm_madw_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_get_query_path_rec +* NAME +* osmv_get_query_path_rec +* +* DESCRIPTION +* Retrieves a path record result from a MAD returned by a call to +* osmv_query_sa(). +* +* SYNOPSIS +*/ +static inline ib_path_rec_t *osmv_get_query_path_rec(IN osm_madw_t * + p_result_madw, + IN uint32_t result_index) +{ + ib_sa_mad_t __attribute__((__unused__)) *p_sa_mad; + + CL_ASSERT(p_result_madw); + p_sa_mad = (ib_sa_mad_t *) osm_madw_get_mad_ptr(p_result_madw); + CL_ASSERT(p_sa_mad && p_sa_mad->attr_id == IB_MAD_ATTR_PATH_RECORD); + + return ((ib_path_rec_t *) + osmv_get_query_result(p_result_madw, result_index)); +} + +/* +* PARAMETERS +* p_result_madw +* [in] This is a reference to the MAD returned as a result of the +* query. +* +* result_index +* [in] A zero-based index indicating which result to return. +* +* NOTES +* This call returns a pointer to the start of a path record result from +* a call to osmv_query_sa(). +* +* SEE ALSO +* osmv_query_res_t, osm_madw_t, osmv_get_query_result, ib_path_rec_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_get_query_portinfo_rec +* NAME +* osmv_get_query_portinfo_rec +* +* DESCRIPTION +* Retrieves a port info record result from a MAD returned by a call to +* osmv_query_sa(). +* +* SYNOPSIS +*/ +static inline ib_portinfo_record_t *osmv_get_query_portinfo_rec(IN osm_madw_t * + p_result_madw, + IN uint32_t + result_index) +{ + ib_sa_mad_t __attribute__((__unused__)) *p_sa_mad; + + CL_ASSERT(p_result_madw); + p_sa_mad = (ib_sa_mad_t *) osm_madw_get_mad_ptr(p_result_madw); + CL_ASSERT(p_sa_mad && p_sa_mad->attr_id == IB_MAD_ATTR_PORTINFO_RECORD); + + return ((ib_portinfo_record_t *) osmv_get_query_result(p_result_madw, + result_index)); +} + +/* +* PARAMETERS +* p_result_madw +* [in] This is a reference to the MAD returned as a result of the +* query. +* +* result_index +* [in] A zero-based index indicating which result to return. +* +* NOTES +* This call returns a pointer to the start of a port info record result +* from a call to osmv_query_sa(). +* +* SEE ALSO +* osmv_query_res_t, osm_madw_t, osmv_get_query_result, ib_portinfo_record_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_get_query_node_rec +* NAME +* osmv_get_query_node_rec +* +* DESCRIPTION +* Retrieves a node record result from a MAD returned by a call to +* osmv_query_sa(). +* +* SYNOPSIS +*/ +static inline ib_node_record_t *osmv_get_query_node_rec(IN osm_madw_t * + p_result_madw, + IN uint32_t + result_index) +{ + ib_sa_mad_t __attribute__((__unused__)) *p_sa_mad; + + CL_ASSERT(p_result_madw); + p_sa_mad = (ib_sa_mad_t *) osm_madw_get_mad_ptr(p_result_madw); + CL_ASSERT(p_sa_mad && p_sa_mad->attr_id == IB_MAD_ATTR_NODE_RECORD); + + return ((ib_node_record_t *) osmv_get_query_result(p_result_madw, + result_index)); +} + +/* +* PARAMETERS +* p_result_madw +* [in] This is a reference to the MAD returned as a result of the +* query. +* +* result_index +* [in] A zero-based index indicating which result to return. +* +* NOTES +* This call returns a pointer to the start of a node record result from +* a call to osmv_query_sa(). +* +* SEE ALSO +* osmv_query_res_t, osm_madw_t, osmv_get_query_result, ib_node_record_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_get_query_svc_rec +* NAME +* osmv_get_query_svc_rec +* +* DESCRIPTION +* Retrieves a service record result from a MAD returned by a call to +* osmv_query_sa(). +* +* SYNOPSIS +*/ +static inline ib_service_record_t *osmv_get_query_svc_rec(IN osm_madw_t * + p_result_madw, + IN uint32_t + result_index) +{ + ib_sa_mad_t __attribute__((__unused__)) *p_sa_mad; + + CL_ASSERT(p_result_madw); + p_sa_mad = (ib_sa_mad_t *) osm_madw_get_mad_ptr(p_result_madw); + CL_ASSERT(p_sa_mad && p_sa_mad->attr_id == IB_MAD_ATTR_SERVICE_RECORD); + + return ((ib_service_record_t *) osmv_get_query_result(p_result_madw, + result_index)); +} + +/* +* PARAMETERS +* p_result_madw +* [in] This is a reference to the MAD returned as a result of the +* query. +* +* result_index +* [in] A zero-based index indicating which result to return. +* +* NOTES +* This call returns a pointer to the start of a service record result from +* a call to osmv_query_sa(). +* +* SEE ALSO +* osmv_query_res_t, osm_madw_t, osmv_get_query_result, ib_service_record_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_get_query_mc_rec +* NAME +* osmv_get_query_mc_rec +* +* DESCRIPTION +* Retrieves a multicast record result from a MAD returned by a call to +* osmv_query_sa(). +* +* SYNOPSIS +*/ +static inline ib_member_rec_t *osmv_get_query_mc_rec(IN osm_madw_t * + p_result_madw, + IN uint32_t result_index) +{ + ib_sa_mad_t __attribute__((__unused__)) *p_sa_mad; + + CL_ASSERT(p_result_madw); + p_sa_mad = (ib_sa_mad_t *) osm_madw_get_mad_ptr(p_result_madw); + CL_ASSERT(p_sa_mad && p_sa_mad->attr_id == IB_MAD_ATTR_MCMEMBER_RECORD); + + return ((ib_member_rec_t *) osmv_get_query_result(p_result_madw, + result_index)); +} + +/* +* PARAMETERS +* p_result_madw +* [in] This is a reference to the MAD returned as a result of the +* query. +* +* result_index +* [in] A zero-based index indicating which result to return. +* +* NOTES +* This call returns a pointer to the start of a service record result from +* a call to osmv_query_sa(). +* +* SEE ALSO +* osmv_query_res_t, osm_madw_t, osmv_get_query_result, ib_member_rec_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_get_query_inform_info_rec +* NAME +* osmv_get_query_inform_info_rec +* +* DESCRIPTION +* Retrieves an InformInfo record result from a MAD returned by +* a call to osmv_query_sa(). +* +* SYNOPSIS +*/ +static inline ib_inform_info_record_t *osmv_get_query_inform_info_rec(IN + osm_madw_t + * + p_result_madw, + IN + uint32_t + result_index) +{ + ib_sa_mad_t __attribute__((__unused__)) *p_sa_mad; + + CL_ASSERT(p_result_madw); + p_sa_mad = (ib_sa_mad_t *) osm_madw_get_mad_ptr(p_result_madw); + CL_ASSERT(p_sa_mad + && p_sa_mad->attr_id == IB_MAD_ATTR_INFORM_INFO_RECORD); + + return ((ib_inform_info_record_t *) osmv_get_query_result(p_result_madw, + result_index)); +} + +/* +* PARAMETERS +* p_result_madw +* [in] This is a reference to the MAD returned as a result of the +* query. +* +* result_index +* [in] A zero-based index indicating which result to return. +* +* NOTES +* This call returns a pointer to the start of a service record result from +* a call to osmv_query_sa(). +* +* SEE ALSO +* osmv_query_res_t, osm_madw_t, osmv_get_query_result, ib_inform_info_record_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_pfn_query_cb_t +* NAME +* osmv_pfn_query_cb_t +* +* DESCRIPTION +* User-defined callback invoked on completion of subnet administration +* query. +* +* SYNOPSIS +*/ +typedef void + (*osmv_pfn_query_cb_t) (IN osmv_query_res_t * p_query_res); +/* +* PARAMETERS +* p_query_res +* [in] This is a reference to a structure containing the result of +* the query. +* +* NOTES +* This routine is invoked to notify a client of the result of a subnet +* administration query. The p_query_rec parameter references the result +* of the query and, in the case of a successful query, any information +* returned by subnet administration. +* +* In the kernel, this callback is usually invoked using a tasklet, +* dependent on the implementation of the underlying verbs provider driver. +* +* SEE ALSO +* osmv_query_res_t +*****/ + +/****s* OpenSM Vendor SA Client/osmv_query_req_t +* NAME +* osmv_query_req_t +* +* DESCRIPTION +* Information used to request an access layer provided query of subnet +* administration. +* +* SYNOPSIS +*/ +typedef struct _osmv_query_req { + osmv_query_type_t query_type; + const void *p_query_input; + ib_net64_t sm_key; + + uint32_t timeout_ms; + uint32_t retry_cnt; + osmv_flags_t flags; + + const void *query_context; + osmv_pfn_query_cb_t pfn_query_cb; + int with_grh; + ib_gid_t gid; +} osmv_query_req_t; +/* +* FIELDS +* query_type +* Indicates the type of query that the access layer should +* perform. +* +* p_query_input +* A pointer to the input for the query. The data referenced by +* this structure is dependent on the type of query being requested +* and is determined by the specified query_type. +* +* sm_key +* The SM_Key to be provided with the SA MAD for authentication. +* Normally 0 is used. +* +* timeout_ms +* Specifies the number of milliseconds to wait for a response for +* this query until retrying or timing out the request. +* +* retry_cnt +* Specifies the number of times that the query will be retried +* before failing the request. +* +* flags +* Used to describe the mode of operation. Set to IB_FLAGS_SYNC to +* process the called routine synchronously. +* +* query_context +* User-defined context information associated with this query. +* The context data is returned to the user as a part of their +* query callback. +* +* pfn_query_cb +* A user-defined callback that is invoked upon completion of the +* query. +* +* with_grh +* Indicates that SA queries should be sent with GRH. +* +* gid +* Used to store the SM/SA GID. +* +* NOTES +* This structure is used when requesting an osm vendor provided query +* of subnet administration. Clients specify the type of query through +* the query_type field. Based on the type of query, the p_query_input +* field is set to reference the appropriate data structure. +* +* The information referenced by the p_query_input field is one of the +* following: +* +* -- a NULL terminated service name +* -- a service id +* -- a single GUID +* -- a pair of GUIDs specified through an osmv_guid_pair_t structure +* -- a pair of GIDs specified through an osmv_gid_pair_t structure +* +* SEE ALSO +* osmv_query_type_t, osmv_pfn_query_cb_t, osmv_guid_pair_t, +* osmv_gid_pair_t +*****/ + +/****f* OpenSM Vendor SA Client/osmv_bind_sa +* NAME +* osmv_bind_sa +* +* DESCRIPTION +* Bind to the SA service and return a handle to be used for later +* queries. +* +* +* SYNOPSIS +*/ +osm_bind_handle_t +osmv_bind_sa(IN osm_vendor_t * const p_vend, + IN osm_mad_pool_t * const p_mad_pool, IN ib_net64_t port_guid); +/* +* PARAMETERS +* p_vend +* [in] an osm_vendor object to work with +* +* p_mad_pool +* [in] mad pool to obtain madw from +* +* port_guid +* [in] the port guid to attach to. +* +* RETURN VALUE +* Bind handle to be used for later SA queries or OSM_BIND_INVALID_HANDLE +* +* NOTES +* +* SEE ALSO +* osmv_query_sa +*********/ + +/****f* OpenSM Vendor SA Client/osmv_query_sa +* NAME +* osmv_query_sa +* +* DESCRIPTION +* Query the SA given an SA query request (similar to IBAL ib_query). +* +* SYNOPSIS +*/ +ib_api_status_t +osmv_query_sa(IN osm_bind_handle_t h_bind, + IN const osmv_query_req_t * const p_query_req); +/* +* PARAMETERS +* h_bind +* [in] bind handle for this port. Should be previously +* obtained by calling osmv_bind_sa +* +* p_query_req +* [in] an SA query request structure. +* +* RETURN VALUE +* IB_SUCCESS if completed successfuly (or in ASYNC mode +* if the request was sent). +* +* NOTES +* +* SEE ALSO +* osmv_bind_sa +*********/ + +END_C_DECLS +#endif /* _OSM_VENDOR_SA_API_H_ */ diff --git a/include/vendor/osm_vendor_test.h b/include/vendor/osm_vendor_test.h new file mode 100644 index 0000000..a1ae1eb --- /dev/null +++ b/include/vendor/osm_vendor_test.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_VENDOR_TEST_H_ +#define _OSM_VENDOR_TEST_H_ + +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/* This value must be zero for the TEST transport. */ +#define OSM_BIND_INVALID_HANDLE 0 +/* + * Abstract: + * Declaration of vendor specific transport interface. + * This is the "Test" vendor which allows compilation and some + * testing without a real vendor interface. + * These objects are part of the OpenSM family of objects. + */ +/****h* OpenSM/Vendor Test +* NAME +* Vendor Test +* +* DESCRIPTION +* The Vendor Test structure encapsulates an artificial transport layer +* interface for testing. +* +* AUTHOR +* Steve King, Intel +* +*********/ +/****s* OpenSM: Vendor Test/osm_vend_wrap_t +* NAME +* osm_vend_wrap_t +* +* DESCRIPTION +* Vendor specific MAD wrapper context. +* +* This structure allows direct access to member variables. +* +* SYNOPSIS +*/ +typedef struct _osm_vend_wrap { + uint32_t dummy; + +} osm_vend_wrap_t; +/*********/ + +/****s* OpenSM: Vendor Test/osm_vendor_t +* NAME +* osm_vendor_t +* +* DESCRIPTION +* Vendor specific MAD interface. +* +* This interface defines access to the vendor specific MAD +* transport layer. +* +* SYNOPSIS +*/ +typedef struct _osm_vendor { + osm_log_t *p_log; + uint32_t timeout; + +} osm_vendor_t; +/*********/ + +typedef struct _osm_bind_handle { + osm_vendor_t *p_vend; + ib_net64_t port_guid; + uint8_t mad_class; + uint8_t class_version; + boolean_t is_responder; + boolean_t is_trap_processor; + boolean_t is_report_processor; + uint32_t send_q_size; + uint32_t recv_q_size; + +} *osm_bind_handle_t; + +END_C_DECLS +#endif /* _OSM_VENDOR_TEST_H_ */ diff --git a/include/vendor/osm_vendor_ts.h b/include/vendor/osm_vendor_ts.h new file mode 100644 index 0000000..a43f4e4 --- /dev/null +++ b/include/vendor/osm_vendor_ts.h @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Definition of interface for the TS Vendor + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_VENDOR_TS_H_ +#define _OSM_VENDOR_TS_H_ + +#undef IN +#undef OUT +#include +#include +#include +#define IN +#define OUT +#include "iba/ib_types.h" +#include "iba/ib_al.h" +#include +#include +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****s* OpenSM: Vendor TS/osm_bind_handle_t + * NAME + * osm_bind_handle_t + * + * DESCRIPTION + * handle returned by the vendor transport bind call. + * + * SYNOPSIS + */ +typedef void *osm_bind_handle_t; +/* +**********/ +#define OSM_DEFAULT_RETRY_COUNT 3 + +/****s* OpenSM: Vendor osm_ts_bind_info_t + * NAME + * osm_ts_bind_info_t + * + * DESCRIPTION + * Handle to the result of binding a class callbacks . + * + * SYNOPSIS + */ +typedef struct _osm_ts_bind_info { + int ul_dev_fd; + VAPI_hca_hndl_t hca_hndl; + struct _osm_vendor *p_vend; + void *client_context; + uint8_t port_num; + void *rcv_callback; + void *send_err_callback; + struct _osm_mad_pool *p_osm_pool; + cl_thread_t poller; +} osm_ts_bind_info_t; +/* + * FIELDS + * ul_dev_file_hdl + * the file handle to be used for sending the MADs + * + * hca_hndl + * Handle to the HCA provided by the underlying VAPI + * + * p_vend + * Pointer to the vendor object. + * + * client_context + * User's context passed during osm_bind + * + * hca_id + * HCA Id we bind to. + * + * port_num + * Port number (within the HCA) of the bound port. + * + * rcv_callback + * OSM Callback function to be called on receive of MAD. + * + * send_err_callback + * OSM Callback to be called on send error. + * + * p_osm_pool + * Points to the MAD pool used by OSM + * + * poller + * A thread reading from the device file handle + * + * SEE ALSO + *********/ + +/****h* OpenSM/Vendor TS + * NAME + * Vendor TS + * + * DESCRIPTION + * + * The Vendor TS object is thread safe. + * + * This object should be treated as opaque and should be + * manipulated only through the provided functions. + * + * + * AUTHOR + * + * + *********/ + +/****s* OpenSM: Vendor TS/osm_ca_info_t + * NAME + * osm_ca_info_t + * + * DESCRIPTION + * Structure containing information about local Channle Adapters. + * + * SYNOPSIS + */ +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; + +} osm_ca_info_t; + +/* + * FIELDS + * guid + * Node GUID of the local CA. + * + * attr_size + * Size of the CA attributes for this CA. + * + * p_attr + * Pointer to dynamicly allocated CA Attribute structure. + * + * SEE ALSO + *********/ + +/***** OpenSM: Vendor TS/osm_vendor_t + * NAME + * osm_vendor_t + * + * DESCRIPTION + * The structure defining a TS vendor + * + * SYNOPSIS + */ +typedef struct _osm_vendor { + osm_log_t *p_log; + uint32_t ca_count; + osm_ca_info_t *p_ca_info; + uint32_t timeout; + struct _osm_transaction_mgr *p_transaction_mgr; + osm_ts_bind_info_t smi_bind; + osm_ts_bind_info_t gsi_bind; +} osm_vendor_t; + +/* + * FIELDS + * h_al + * Handle returned by TS open call . + * + * p_log + * Pointer to the log object. + * + * ca_count + * Number of CA's in the array pointed to by p_ca_info. + * + * p_ca_info + * Pointer to dynamically allocated array of CA info objects. + * + * timeout + * Transaction timeout time in milliseconds. + * + * p_transaction_mgr + * Pointer to Transaction Manager. + * + * smi_bind + * Bind information for handling SMI MADs + * + * gsi_bind + * Bind information for GSI MADs + * + * SEE ALSO + *********/ + +/****f* OpenSM: Vendor TS/CA Info/osm_ca_info_get_port_guid + * NAME + * osm_ca_info_get_port_guid + * + * DESCRIPTION + * Returns the port GUID of the specified port owned by this CA. + * + * SYNOPSIS + */ +static inline ib_net64_t +osm_ca_info_get_port_guid(IN const osm_ca_info_t * const p_ca_info, + IN const uint8_t index) +{ + return (p_ca_info->p_attr->p_port_attr[index].port_guid); +} + +/* + * PARAMETERS + * p_ca_info + * [in] Pointer to a CA Info object. + * + * index + * [in] Port "index" for which to retrieve the port GUID. + * The index is the offset into the ca's internal array + * of port attributes. + * + * RETURN VALUE + * Returns the port GUID of the specified port owned by this CA. + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OpenSM: Vendor TS/CA Info/osm_ca_info_get_num_ports + * NAME + * osm_ca_info_get_num_ports + * + * DESCRIPTION + * Returns the number of ports of the given ca_info + * + * SYNOPSIS + */ +static inline uint8_t +osm_ca_info_get_num_ports(IN const osm_ca_info_t * const p_ca_info) +{ + return (p_ca_info->p_attr->num_ports); +} + +/* + * PARAMETERS + * p_ca_info + * [in] Pointer to a CA Info object. + * + * RETURN VALUE + * Returns the number of CA ports + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OpenSM: SM Vendor/osm_vendor_get_guid_ca_and_port + * NAME + * osm_vendor_get_guid_ca_and_port + * + * DESCRIPTION + * Given the vendor obj and a guid + * return the ca id and port number that have that guid + * + * SYNOPSIS + */ +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, + IN ib_net64_t const guid, + OUT VAPI_hca_hndl_t * p_hca_hndl, + OUT VAPI_hca_id_t * p_hca_id, + OUT uint32_t * p_port_num); + +/* + * PARAMETERS + * p_vend + * [in] Pointer to an osm_vendor_t object. + * + * guid + * [in] The guid to search for. + * + * p_hca_id + * [out] The HCA Id (VAPI_hca_id_t *) that the port is found on. + * + * p_port_num + * [out] Pointer to a port number arg to be filled with the port number with the given guid. + * + * RETURN VALUES + * IB_SUCCESS on SUCCESS + * IB_INVALID_GUID if the guid is notfound on any Local HCA Port + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OpenSM: Vendor TS/osm_vendor_get_all_port_attr + * NAME + * osm_vendor_get_all_port_attr + * + * DESCRIPTION + * Fill in the array of port_attr with all available ports on ALL the + * avilable CAs on this machine. + * ALSO - + * UPDATE THE VENDOR OBJECT LIST OF CA_INFO STRUCTS + * + * SYNOPSIS + */ +ib_api_status_t osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * + const p_attr_array, + IN uint32_t * const p_num_ports); + +/* + * PARAMETERS + * p_vend + * [in] Pointer to an osm_vendor_t object. + * + * p_attr_array + * [out] Pre-allocated array of port attributes to be filled in + * + * p_num_ports + * [out] The size of the given array. Filled in by the actual numberof ports found. + * + * RETURN VALUES + * IB_SUCCESS if OK + * IB_INSUFFICIENT_MEMORY if not enough place for all ports was provided. + * + * NOTES + * + * SEE ALSO + *********/ + +#define OSM_BIND_INVALID_HANDLE 0 + +/****s* OpenSM: Vendor TS/osm_vend_wrap_t + * NAME + * TS Vendor MAD Wrapper + * + * DESCRIPTION + * TS specific MAD wrapper. TS transport layer uses this for + * housekeeping. + * + * SYNOPSIS + *********/ +typedef struct _osm_vend_wrap_t { + uint32_t size; + osm_bind_handle_t h_bind; + ib_mad_t *p_mad_buf; + void *p_resp_madw; +} osm_vend_wrap_t; + +/* + * FIELDS + * size + * Size of the allocated MAD + * + * h_bind + * Bind handle used on this transaction + * + * h_av + * Address vector handle used for this transaction. + * + * p_resp_madw + * Pointer to the mad wrapper structure used to hold the pending + * reponse to the mad, if any. If a response is expected, the + * wrapper for the reponse is allocated during the send call. + * + * SEE ALSO + *********/ + +END_C_DECLS +#endif /* _OSM_VENDOR_TS_H_ */ diff --git a/include/vendor/osm_vendor_umadt.h b/include/vendor/osm_vendor_umadt.h new file mode 100644 index 0000000..8cdb631 --- /dev/null +++ b/include/vendor/osm_vendor_umadt.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mad_wrapper_t. + * This object represents the context wrapper for OpenSM MAD processing. + * This object is part of the OpenSM family of objects. + */ + +#ifndef _OSM_VENDOR_UMADT_h_ +#define _OSM_VENDOR_UMADT_h_ + +#include "iba/ib_types.h" +#include "complib/cl_qlist.h" +#include "complib/cl_thread.h" +#include +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +/****h* OpenSM/ Vendor Umadt +* NAME +* MAD Wrapper +* +* DESCRIPTION +* +* +* AUTHOR +* Ranjit Pandit, Intel +* +*********/ +typedef void *osm_vendor_t; +#define OSM_BIND_INVALID_HANDLE 0 + +/****s* OpenSM: Vendor Umadt /osm_bind_handle_t +* NAME +* osm_bind_handle_t +* +* DESCRIPTION +* handle returned by the vendor transport bind call. +* +* SYNOPSIS +*/ + +typedef void *osm_bind_handle_t; + +/****s* OpenSM: Vendor Umadt /mad_direction_t +* NAME +* mad_direction_t +* +* DESCRIPTION +* Tags for mad wrapper to indicate the direction of mads. +* Umadt vendor transport layer uses this tag to call the appropriate +* Umadt APIs. +* +* SYNOPSIS +*/ +typedef enum _mad_direction_t { + SEND = 0, + RECEIVE, +} mad_direction_t; + +/****s* OpenSM/ osm_vend_wrap_t +* NAME +* Umadt Vendor MAD Wrapper +* +* DESCRIPTION +* Umadt specific MAD wrapper. Umadt transport layer sets this for +* housekeeping. +* +* SYNOPSIS +*********/ +typedef struct _osm_vend_wrap_t { + MadtStruct *p_madt_struct; + mad_direction_t direction; // send or receive + uint32_t size; +} osm_vend_wrap_t; +/* +* FIELDS +* p_madt_struct +* Umadt mad structure to identify a mad. +* +* direction +* Used to identify a mad with it's direction. +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /* _OSM_VENDOR_UMADT_h_ */ diff --git a/libopensm/Makefile.am b/libopensm/Makefile.am new file mode 100644 index 0000000..ded0cad --- /dev/null +++ b/libopensm/Makefile.am @@ -0,0 +1,29 @@ + +AM_CPPFLAGS = -I$(srcdir)/../include + +lib_LTLIBRARIES = libopensm.la + +if DEBUG +DBGFLAGS = -ggdb -D_DEBUG_ +else +DBGFLAGS = -g +endif + +libopensm_la_CFLAGS = -Wall -Wwrite-strings $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_GNU_SOURCE=1 + +if HAVE_LD_VERSION_SCRIPT + libopensm_version_script = -Wl,--version-script=$(srcdir)/libopensm.map +else + libopensm_version_script = +endif + +opensm_api_version=$(shell grep LIBVERSION= $(srcdir)/libopensm.ver | sed 's/LIBVERSION=//') + +libopensm_la_SOURCES = osm_log.c osm_helper.c + +libopensm_la_LIBADD = -L../complib -losmcomp +libopensm_la_LDFLAGS = -version-info $(opensm_api_version) \ + -export-dynamic $(libopensm_version_script) +libopensm_la_DEPENDENCIES = $(srcdir)/libopensm.map + +EXTRA_DIST = $(srcdir)/libopensm.map $(srcdir)/libopensm.ver diff --git a/libopensm/libopensm.map b/libopensm/libopensm.map new file mode 100644 index 0000000..28a3e0a --- /dev/null +++ b/libopensm/libopensm.map @@ -0,0 +1,95 @@ +OPENSM_1.5 { + global: + osm_log; + osm_log_msg_box; + osm_is_debug; + osm_log_init; + osm_log_init_v2; + osm_log_reopen_file; + ib_get_sa_method_str; + ib_get_sm_method_str; + ib_get_sm_attr_str; + ib_get_sa_attr_str; + ib_get_trap_str; + ib_zero_gid; + osm_dump_port_info; + osm_dump_port_info_v2; + osm_dump_portinfo_record; + osm_dump_portinfo_record_v2; + osm_dump_guid_info; + osm_dump_guid_info_v2; + osm_dump_guidinfo_record; + osm_dump_guidinfo_record_v2; + osm_dump_node_info; + osm_dump_node_info_v2; + osm_dump_node_record; + osm_dump_node_record_v2; + osm_dump_path_record; + osm_dump_path_record_v2; + osm_dump_multipath_record; + osm_dump_multipath_record_v2; + osm_dump_mc_record; + osm_dump_mc_record_v2; + osm_dump_service_record; + osm_dump_service_record_v2; + osm_dump_inform_info; + osm_dump_inform_info_v2; + osm_dump_inform_info_record; + osm_dump_inform_info_record_v2; + osm_dump_link_record; + osm_dump_link_record_v2; + osm_dump_switch_info; + osm_dump_switch_info_v2; + osm_dump_switch_info_record; + osm_dump_switch_info_record_v2; + osm_dump_slvl_map_table; + osm_dump_slvl_map_table_v2; + osm_dump_vl_arb_table; + osm_dump_vl_arb_table_v2; + osm_dump_sm_info; + osm_dump_sm_info_v2; + osm_dump_sm_info_record; + osm_dump_sm_info_record_v2; + osm_dump_notice; + osm_dump_notice_v2; + osm_dump_dr_smp; + osm_dump_dr_smp_v2; + osm_dump_sa_mad; + osm_dump_sa_mad_v2; + osm_dump_dr_path; + osm_dump_dr_path_v2; + osm_dump_dr_path_as_buf; + osm_dump_smp_dr_path; + osm_dump_smp_dr_path_v2; + osm_dump_pkey_block; + osm_dump_pkey_block_v2; + osm_log_raw; + osm_get_sm_state_str; + osm_get_sm_signal_str; + osm_get_disp_msg_str; + osm_get_port_state_str_fixed_width; + osm_get_node_type_str_fixed_width; + osm_get_manufacturer_str; + osm_get_mtu_str; + osm_get_lwa_str; + osm_get_lsa_str; + osm_get_sm_mgr_signal_str; + osm_get_sm_mgr_state_str; + ib_mtu_is_valid; + ib_rate_is_valid; + ib_path_compare_rates; + ib_path_rate_get_prev; + ib_path_rate_get_next; + osm_dump_mlnx_ext_port_info; + osm_dump_mlnx_ext_port_info_v2; + osm_log_v2; + osm_log_msg_box_v2; + osm_log_is_active_v2; + osm_get_log_per_module; + osm_set_log_per_module; + osm_reset_log_per_module; + sprint_uint8_arr; + ib_path_rate_max_12xedr; + ib_path_rate_2x_hdr_fixups; + local: *; +}; diff --git a/libopensm/libopensm.ver b/libopensm/libopensm.ver new file mode 100644 index 0000000..5034906 --- /dev/null +++ b/libopensm/libopensm.ver @@ -0,0 +1,9 @@ +# In this file we track the current API version +# of the opensm common interface (and libraries) +# The version is built of the following +# tree numbers: +# API_REV:RUNNING_REV:AGE +# API_REV - advance on any added API +# RUNNING_REV - advance any change to the vendor files +# AGE - number of backward versions the API still supports +LIBVERSION=10:0:1 diff --git a/libopensm/osm_helper.c b/libopensm/osm_helper.c new file mode 100644 index 0000000..9791172 --- /dev/null +++ b/libopensm/osm_helper.c @@ -0,0 +1,3409 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of opensm helper functions. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_HELPER_C +#include +#include + +#define LINE_LENGTH 256 + +#define ARR_SIZE(a) (sizeof(a)/sizeof((a)[0])) + +/* we use two tables - one for queries and one for responses */ +static const char *ib_sa_method_str[] = { + "RESERVED", /* 0 */ + "SubnAdmGet", /* 1 */ + "SubnAdmSet", /* 2 */ + "RESERVED", /* 3 */ + "RESERVED", /* 4 */ + "RESERVED", /* 5 */ + "SubnAdmReport", /* 6 */ + "RESERVED", /* 7 */ + "RESERVED", /* 8 */ + "RESERVED", /* 9 */ + "RESERVED", /* A */ + "RESERVED", /* B */ + "RESERVED", /* C */ + "RESERVED", /* D */ + "RESERVED", /* E */ + "RESERVED", /* F */ + "RESERVED", /* 10 */ + "RESERVED", /* 11 */ + "SubnAdmGetTable", /* 12 */ + "SubnAdmGetTraceTable", /* 13 */ + "SubnAdmGetMulti", /* 14 */ + "SubnAdmDelete", /* 15 */ + "UNKNOWN" /* 16 */ +}; + +#define OSM_SA_METHOD_STR_UNKNOWN_VAL (ARR_SIZE(ib_sa_method_str) - 1) + +static const char *ib_sa_resp_method_str[] = { + "RESERVED", /* 80 */ + "SubnAdmGetResp", /* 81 */ + "RESERVED (SetResp?)", /* 82 */ + "RESERVED", /* 83 */ + "RESERVED", /* 84 */ + "RESERVED", /* 85 */ + "SubnAdmReportResp", /* 86 */ + "RESERVED", /* 87 */ + "RESERVED", /* 88 */ + "RESERVED", /* 89 */ + "RESERVED", /* 8A */ + "RESERVED", /* 8B */ + "RESERVED", /* 8C */ + "RESERVED", /* 8D */ + "RESERVED", /* 8E */ + "RESERVED", /* 8F */ + "RESERVED", /* 90 */ + "RESERVED", /* 91 */ + "SubnAdmGetTableResp", /* 92 */ + "RESERVED", /* 93 */ + "SubnAdmGetMultiResp", /* 94 */ + "SubnAdmDeleteResp", /* 95 */ + "UNKNOWN" +}; + +static const char *ib_sm_method_str[] = { + "RESERVED0", /* 0 */ + "SubnGet", /* 1 */ + "SubnSet", /* 2 */ + "RESERVED3", /* 3 */ + "RESERVED4", /* 4 */ + "SubnTrap", /* 5 */ + "RESERVED6", /* 6 */ + "SubnTrapRepress", /* 7 */ + "RESERVED8", /* 8 */ + "RESERVED9", /* 9 */ + "RESERVEDA", /* A */ + "RESERVEDB", /* B */ + "RESERVEDC", /* C */ + "RESERVEDD", /* D */ + "RESERVEDE", /* E */ + "RESERVEDF", /* F */ + "RESERVED10", /* 10 */ + "SubnGetResp", /* 11 */ + "RESERVED12", /* 12 */ + "RESERVED13", /* 13 */ + "RESERVED14", /* 14 */ + "RESERVED15", /* 15 */ + "RESERVED16", /* 16 */ + "RESERVED17", /* 17 */ + "RESERVED18", /* 18 */ + "RESERVED19", /* 19 */ + "RESERVED1A", /* 1A */ + "RESERVED1B", /* 1B */ + "RESERVED1C", /* 1C */ + "RESERVED1D", /* 1D */ + "RESERVED1E", /* 1E */ + "RESERVED1F", /* 1F */ + "UNKNOWN" /* 20 */ +}; + +#define OSM_SM_METHOD_STR_UNKNOWN_VAL (ARR_SIZE(ib_sm_method_str) - 1) + +static const char *ib_sm_attr_str[] = { + "RESERVED", /* 0 */ + "ClassPortInfo", /* 1 */ + "Notice", /* 2 */ + "InformInfo", /* 3 */ + "RESERVED", /* 4 */ + "RESERVED", /* 5 */ + "RESERVED", /* 6 */ + "RESERVED", /* 7 */ + "RESERVED", /* 8 */ + "RESERVED", /* 9 */ + "RESERVED", /* A */ + "RESERVED", /* B */ + "RESERVED", /* C */ + "RESERVED", /* D */ + "RESERVED", /* E */ + "RESERVED", /* F */ + "NodeDescription", /* 10 */ + "NodeInfo", /* 11 */ + "SwitchInfo", /* 12 */ + "UNKNOWN", /* 13 */ + "GUIDInfo", /* 14 */ + "PortInfo", /* 15 */ + "P_KeyTable", /* 16 */ + "SLtoVLMappingTable", /* 17 */ + "VLArbitrationTable", /* 18 */ + "LinearForwardingTable", /* 19 */ + "RandomForwardingTable", /* 1A */ + "MulticastForwardingTable", /* 1B */ + "UNKNOWN", /* 1C */ + "UNKNOWN", /* 1D */ + "UNKNOWN", /* 1E */ + "UNKNOWN", /* 1F */ + "SMInfo", /* 20 */ + "UNKNOWN" /* 21 - always highest value */ +}; + +#define OSM_SM_ATTR_STR_UNKNOWN_VAL (ARR_SIZE(ib_sm_attr_str) - 1) + +static const char *ib_sa_attr_str[] = { + "RESERVED", /* 0 */ + "ClassPortInfo", /* 1 */ + "Notice", /* 2 */ + "InformInfo", /* 3 */ + "RESERVED", /* 4 */ + "RESERVED", /* 5 */ + "RESERVED", /* 6 */ + "RESERVED", /* 7 */ + "RESERVED", /* 8 */ + "RESERVED", /* 9 */ + "RESERVED", /* A */ + "RESERVED", /* B */ + "RESERVED", /* C */ + "RESERVED", /* D */ + "RESERVED", /* E */ + "RESERVED", /* F */ + "RESERVED", /* 10 */ + "NodeRecord", /* 11 */ + "PortInfoRecord", /* 12 */ + "SLtoVLMappingTableRecord", /* 13 */ + "SwitchInfoRecord", /* 14 */ + "LinearForwardingTableRecord", /* 15 */ + "RandomForwardingTableRecord", /* 16 */ + "MulticastForwardingTableRecord", /* 17 */ + "SMInfoRecord", /* 18 */ + "RESERVED", /* 19 */ + "RandomForwardingTable", /* 1A */ + "MulticastForwardingTable", /* 1B */ + "UNKNOWN", /* 1C */ + "UNKNOWN", /* 1D */ + "UNKNOWN", /* 1E */ + "UNKNOWN", /* 1F */ + "LinkRecord", /* 20 */ + "UNKNOWN", /* 21 */ + "UNKNOWN", /* 22 */ + "UNKNOWN", /* 23 */ + "UNKNOWN", /* 24 */ + "UNKNOWN", /* 25 */ + "UNKNOWN", /* 26 */ + "UNKNOWN", /* 27 */ + "UNKNOWN", /* 28 */ + "UNKNOWN", /* 29 */ + "UNKNOWN", /* 2A */ + "UNKNOWN", /* 2B */ + "UNKNOWN", /* 2C */ + "UNKNOWN", /* 2D */ + "UNKNOWN", /* 2E */ + "UNKNOWN", /* 2F */ + "GuidInfoRecord", /* 30 */ + "ServiceRecord", /* 31 */ + "UNKNOWN", /* 32 */ + "P_KeyTableRecord", /* 33 */ + "UNKNOWN", /* 34 */ + "PathRecord", /* 35 */ + "VLArbitrationTableRecord", /* 36 */ + "UNKNOWN", /* 37 */ + "MCMemberRecord", /* 38 */ + "TraceRecord", /* 39 */ + "MultiPathRecord", /* 3A */ + "ServiceAssociationRecord", /* 3B */ + "UNKNOWN", /* 3C */ + "UNKNOWN", /* 3D */ + "UNKNOWN", /* 3E */ + "UNKNOWN", /* 3F */ + "UNKNOWN", /* 40 */ + "UNKNOWN", /* 41 */ + "UNKNOWN", /* 42 */ + "UNKNOWN", /* 43 */ + "UNKNOWN", /* 44 */ + "UNKNOWN", /* 45 */ + "UNKNOWN", /* 46 */ + "UNKNOWN", /* 47 */ + "UNKNOWN", /* 48 */ + "UNKNOWN", /* 49 */ + "UNKNOWN", /* 4A */ + "UNKNOWN", /* 4B */ + "UNKNOWN", /* 4C */ + "UNKNOWN", /* 4D */ + "UNKNOWN", /* 4E */ + "UNKNOWN", /* 4F */ + "UNKNOWN", /* 50 */ + "UNKNOWN", /* 51 */ + "UNKNOWN", /* 52 */ + "UNKNOWN", /* 53 */ + "UNKNOWN", /* 54 */ + "UNKNOWN", /* 55 */ + "UNKNOWN", /* 56 */ + "UNKNOWN", /* 57 */ + "UNKNOWN", /* 58 */ + "UNKNOWN", /* 59 */ + "UNKNOWN", /* 5A */ + "UNKNOWN", /* 5B */ + "UNKNOWN", /* 5C */ + "UNKNOWN", /* 5D */ + "UNKNOWN", /* 5E */ + "UNKNOWN", /* 5F */ + "UNKNOWN", /* 60 */ + "UNKNOWN", /* 61 */ + "UNKNOWN", /* 62 */ + "UNKNOWN", /* 63 */ + "UNKNOWN", /* 64 */ + "UNKNOWN", /* 65 */ + "UNKNOWN", /* 66 */ + "UNKNOWN", /* 67 */ + "UNKNOWN", /* 68 */ + "UNKNOWN", /* 69 */ + "UNKNOWN", /* 6A */ + "UNKNOWN", /* 6B */ + "UNKNOWN", /* 6C */ + "UNKNOWN", /* 6D */ + "UNKNOWN", /* 6E */ + "UNKNOWN", /* 6F */ + "UNKNOWN", /* 70 */ + "UNKNOWN", /* 71 */ + "UNKNOWN", /* 72 */ + "UNKNOWN", /* 73 */ + "UNKNOWN", /* 74 */ + "UNKNOWN", /* 75 */ + "UNKNOWN", /* 76 */ + "UNKNOWN", /* 77 */ + "UNKNOWN", /* 78 */ + "UNKNOWN", /* 79 */ + "UNKNOWN", /* 7A */ + "UNKNOWN", /* 7B */ + "UNKNOWN", /* 7C */ + "UNKNOWN", /* 7D */ + "UNKNOWN", /* 7E */ + "UNKNOWN", /* 7F */ + "UNKNOWN", /* 80 */ + "UNKNOWN", /* 81 */ + "UNKNOWN", /* 82 */ + "UNKNOWN", /* 83 */ + "UNKNOWN", /* 84 */ + "UNKNOWN", /* 85 */ + "UNKNOWN", /* 86 */ + "UNKNOWN", /* 87 */ + "UNKNOWN", /* 88 */ + "UNKNOWN", /* 89 */ + "UNKNOWN", /* 8A */ + "UNKNOWN", /* 8B */ + "UNKNOWN", /* 8C */ + "UNKNOWN", /* 8D */ + "UNKNOWN", /* 8E */ + "UNKNOWN", /* 8F */ + "UNKNOWN", /* 90 */ + "UNKNOWN", /* 91 */ + "UNKNOWN", /* 92 */ + "UNKNOWN", /* 93 */ + "UNKNOWN", /* 94 */ + "UNKNOWN", /* 95 */ + "UNKNOWN", /* 96 */ + "UNKNOWN", /* 97 */ + "UNKNOWN", /* 98 */ + "UNKNOWN", /* 99 */ + "UNKNOWN", /* 9A */ + "UNKNOWN", /* 9B */ + "UNKNOWN", /* 9C */ + "UNKNOWN", /* 9D */ + "UNKNOWN", /* 9E */ + "UNKNOWN", /* 9F */ + "UNKNOWN", /* A0 */ + "UNKNOWN", /* A1 */ + "UNKNOWN", /* A2 */ + "UNKNOWN", /* A3 */ + "UNKNOWN", /* A4 */ + "UNKNOWN", /* A5 */ + "UNKNOWN", /* A6 */ + "UNKNOWN", /* A7 */ + "UNKNOWN", /* A8 */ + "UNKNOWN", /* A9 */ + "UNKNOWN", /* AA */ + "UNKNOWN", /* AB */ + "UNKNOWN", /* AC */ + "UNKNOWN", /* AD */ + "UNKNOWN", /* AE */ + "UNKNOWN", /* AF */ + "UNKNOWN", /* B0 */ + "UNKNOWN", /* B1 */ + "UNKNOWN", /* B2 */ + "UNKNOWN", /* B3 */ + "UNKNOWN", /* B4 */ + "UNKNOWN", /* B5 */ + "UNKNOWN", /* B6 */ + "UNKNOWN", /* B7 */ + "UNKNOWN", /* B8 */ + "UNKNOWN", /* B9 */ + "UNKNOWN", /* BA */ + "UNKNOWN", /* BB */ + "UNKNOWN", /* BC */ + "UNKNOWN", /* BD */ + "UNKNOWN", /* BE */ + "UNKNOWN", /* BF */ + "UNKNOWN", /* C0 */ + "UNKNOWN", /* C1 */ + "UNKNOWN", /* C2 */ + "UNKNOWN", /* C3 */ + "UNKNOWN", /* C4 */ + "UNKNOWN", /* C5 */ + "UNKNOWN", /* C6 */ + "UNKNOWN", /* C7 */ + "UNKNOWN", /* C8 */ + "UNKNOWN", /* C9 */ + "UNKNOWN", /* CA */ + "UNKNOWN", /* CB */ + "UNKNOWN", /* CC */ + "UNKNOWN", /* CD */ + "UNKNOWN", /* CE */ + "UNKNOWN", /* CF */ + "UNKNOWN", /* D0 */ + "UNKNOWN", /* D1 */ + "UNKNOWN", /* D2 */ + "UNKNOWN", /* D3 */ + "UNKNOWN", /* D4 */ + "UNKNOWN", /* D5 */ + "UNKNOWN", /* D6 */ + "UNKNOWN", /* D7 */ + "UNKNOWN", /* D8 */ + "UNKNOWN", /* D9 */ + "UNKNOWN", /* DA */ + "UNKNOWN", /* DB */ + "UNKNOWN", /* DC */ + "UNKNOWN", /* DD */ + "UNKNOWN", /* DE */ + "UNKNOWN", /* DF */ + "UNKNOWN", /* E0 */ + "UNKNOWN", /* E1 */ + "UNKNOWN", /* E2 */ + "UNKNOWN", /* E3 */ + "UNKNOWN", /* E4 */ + "UNKNOWN", /* E5 */ + "UNKNOWN", /* E6 */ + "UNKNOWN", /* E7 */ + "UNKNOWN", /* E8 */ + "UNKNOWN", /* E9 */ + "UNKNOWN", /* EA */ + "UNKNOWN", /* EB */ + "UNKNOWN", /* EC */ + "UNKNOWN", /* ED */ + "UNKNOWN", /* EE */ + "UNKNOWN", /* EF */ + "UNKNOWN", /* F0 */ + "UNKNOWN", /* F1 */ + "UNKNOWN", /* F2 */ + "InformInfoRecord", /* F3 */ + "UNKNOWN" /* F4 - always highest value */ +}; + +#define OSM_SA_ATTR_STR_UNKNOWN_VAL (ARR_SIZE(ib_sa_attr_str) - 1) + +static int ordered_rates[] = { + 0, 0, /* 0, 1 - reserved */ + 1, /* 2 - 2.5 Gbps */ + 3, /* 3 - 10 Gbps */ + 6, /* 4 - 30 Gbps */ + 2, /* 5 - 5 Gbps */ + 5, /* 6 - 20 Gbps */ + 9, /* 7 - 40 Gbps */ + 10, /* 8 - 60 Gbps */ + 13, /* 9 - 80 Gbps */ + 14, /* 10 - 120 Gbps */ + 4, /* 11 - 14 Gbps (17 Gbps equiv) */ + 12, /* 12 - 56 Gbps (68 Gbps equiv) */ + 16, /* 13 - 112 Gbps (136 Gbps equiv) */ + 17, /* 14 - 168 Gbps (204 Gbps equiv) */ + 7, /* 15 - 25 Gbps (31.25 Gbps equiv) */ + 15, /* 16 - 100 Gbps (125 Gbps equiv) */ + 18, /* 17 - 200 Gbps (250 Gbps equiv) */ + 19, /* 18 - 300 Gbps (375 Gbps equiv) */ + 8, /* 19 - 28 Gbps (35 Gbps equiv) */ + 11, /* 20 - 50 Gbps (62.5 Gbps equiv) */ + 20, /* 21 - 400 Gbps (500 Gbps equiv) */ + 21, /* 22 - 600 Gbps (750 Gbps equiv) */ +}; + +int sprint_uint8_arr(char *buf, size_t size, + const uint8_t * arr, size_t len) +{ + int n; + unsigned int i; + for (i = 0, n = 0; i < len; i++) { + n += snprintf(buf + n, size - n, "%s%u", i == 0 ? "" : ",", + arr[i]); + if (n >= size) + break; + } + return n; +} + +const char *ib_get_sa_method_str(IN uint8_t method) +{ + if (method & 0x80) { + method = method & 0x7f; + if (method > OSM_SA_METHOD_STR_UNKNOWN_VAL) + method = OSM_SA_METHOD_STR_UNKNOWN_VAL; + /* it is a response - use the response table */ + return ib_sa_resp_method_str[method]; + } else { + if (method > OSM_SA_METHOD_STR_UNKNOWN_VAL) + method = OSM_SA_METHOD_STR_UNKNOWN_VAL; + return ib_sa_method_str[method]; + } +} + +const char *ib_get_sm_method_str(IN uint8_t method) +{ + if (method & 0x80) + method = (method & 0x0F) | 0x10; + if (method > OSM_SM_METHOD_STR_UNKNOWN_VAL) + method = OSM_SM_METHOD_STR_UNKNOWN_VAL; + return ib_sm_method_str[method]; +} + +const char *ib_get_sm_attr_str(IN ib_net16_t attr) +{ + uint16_t host_attr = cl_ntoh16(attr); + + if (attr == IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO) + return "MLNXExtendedPortInfo"; + + if (host_attr > OSM_SM_ATTR_STR_UNKNOWN_VAL) + host_attr = OSM_SM_ATTR_STR_UNKNOWN_VAL; + + return ib_sm_attr_str[host_attr]; +} + +const char *ib_get_sa_attr_str(IN ib_net16_t attr) +{ + uint16_t host_attr = cl_ntoh16(attr); + + if (host_attr > OSM_SA_ATTR_STR_UNKNOWN_VAL) + host_attr = OSM_SA_ATTR_STR_UNKNOWN_VAL; + + return ib_sa_attr_str[host_attr]; +} + +const char *ib_get_trap_str(ib_net16_t trap_num) +{ + switch (cl_ntoh16(trap_num)) { + case SM_GID_IN_SERVICE_TRAP: /* 64 */ + return "GID in service"; + case SM_GID_OUT_OF_SERVICE_TRAP: /* 65 */ + return "GID out of service"; + case SM_MGID_CREATED_TRAP: /* 66 */ + return "New mcast group created"; + case SM_MGID_DESTROYED_TRAP: /* 67 */ + return "Mcast group deleted"; + case SM_UNPATH_TRAP: /* 68 */ + return "UnPath, Path no longer valid"; + case SM_REPATH_TRAP: /* 69 */ + return "RePath, Path recomputed"; + case SM_LINK_STATE_CHANGED_TRAP: /* 128 */ + return "Link state change"; + case SM_LINK_INTEGRITY_THRESHOLD_TRAP: /* 129 */ + return "Local Link integrity threshold reached"; + case SM_BUFFER_OVERRUN_THRESHOLD_TRAP: /* 130 */ + return "Excessive Buffer Overrun Threshold reached"; + case SM_WATCHDOG_TIMER_EXPIRED_TRAP: /* 131 */ + return "Flow Control Update watchdog timer expired"; + case SM_LOCAL_CHANGES_TRAP: /* 144 */ + return + "CapabilityMask, NodeDescription, Link [Width|Speed] Enabled, SM priority changed"; + case SM_SYS_IMG_GUID_CHANGED_TRAP: /* 145 */ + return "System Image GUID changed"; + case SM_BAD_MKEY_TRAP: /* 256 */ + return "Bad M_Key"; + case SM_BAD_PKEY_TRAP: /* 257 */ + return "Bad P_Key"; + case SM_BAD_QKEY_TRAP: /* 258 */ + return "Bad Q_Key"; + case SM_BAD_SWITCH_PKEY_TRAP: /* 259 */ + return "Bad P_Key (switch external port)"; + default: + break; + } + return "Unknown"; +} + +const ib_gid_t ib_zero_gid = { {0} }; + +static ib_api_status_t dbg_do_line(IN char **pp_local, IN uint32_t buf_size, + IN const char *p_prefix_str, + IN const char *p_new_str, + IN uint32_t * p_total_len) +{ + char line[LINE_LENGTH]; + uint32_t len; + + sprintf(line, "%s%s", p_prefix_str, p_new_str); + len = (uint32_t) strlen(line); + *p_total_len += len; + if (*p_total_len + sizeof('\0') > buf_size) + return IB_INSUFFICIENT_MEMORY; + + strcpy(*pp_local, line); + *pp_local += len; + return IB_SUCCESS; +} + +static void dbg_get_capabilities_str(IN char *p_buf, IN uint32_t buf_size, + IN const char *p_prefix_str, + IN const ib_port_info_t * p_pi) +{ + uint32_t total_len = 0; + char *p_local = p_buf; + + strcpy(p_local, "Capability Mask:\n"); + p_local += strlen(p_local); + + if (p_pi->capability_mask & IB_PORT_CAP_RESV0) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_RESV0\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_IS_SM) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_IS_SM\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_NOTICE) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_NOTICE\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_TRAP) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_TRAP\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_IPD) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_IPD\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_AUTO_MIG) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_AUTO_MIG\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_SL_MAP) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_SL_MAP\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_NV_MKEY) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_NV_MKEY\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_NV_PKEY) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_NV_PKEY\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_LED_INFO) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_LED_INFO\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_SM_DISAB) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_SM_DISAB\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_SYS_IMG_GUID) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_SYS_IMG_GUID\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_PKEY_SW_EXT_PORT_TRAP) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_PKEY_SW_EXT_PORT_TRAP\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_CABLE_INFO) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_CABLE_INFO\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_EXT_SPEEDS\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_CAP_MASK2\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_COM_MGT) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_COM_MGT\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_SNMP) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_SNMP\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_REINIT) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_REINIT\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_DEV_MGT) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_DEV_MGT\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_VEND_CLS) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_VEND_CLS\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_DR_NTC) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_DR_NTC\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_NTC) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_CAP_NTC\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_BM) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_BM\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_LINK_RT_LATENCY) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_LINK_RT_LATENCY\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_CLIENT_REREG) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_CLIENT_REREG\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_OTHER_LOCAL_CHANGES_NTC) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_OTHER_LOCAL_CHANGES_NTC\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_LINK_SPEED_WIDTH_PAIRS_TBL) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_LINK_SPEED_WIDTH_PAIRS_TBL\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_VEND_MADS) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_VEND_MADS\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_MCAST_PKEY_TRAP_SUPPRESS) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_MCAST_PKEY_TRAP_SUPPRESS\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_MCAST_FDB_TOP) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_MCAST_FDB_TOP\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask & IB_PORT_CAP_HAS_HIER_INFO) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP_HAS_HIER_INFO\n", + &total_len) != IB_SUCCESS) + return; + } +} + +static void dbg_get_capabilities2_str(IN char *p_buf, IN uint32_t buf_size, + IN const char *p_prefix_str, + IN const ib_port_info_t * p_pi) +{ + uint32_t total_len = 0; + char *p_local = p_buf; + + strcpy(p_local, "Capability Mask2:\n"); + p_local += strlen(p_local); + + if (p_pi->capability_mask2 & IB_PORT_CAP2_IS_SET_NODE_DESC_SUPPORTED) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP2_IS_SET_NODE_DESC_SUPPORTED\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask2 & IB_PORT_CAP2_IS_PORT_INFO_EXT_SUPPORTED) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP2_IS_PORT_INFO_EXT_SUPPORTED\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask2 & IB_PORT_CAP2_IS_VIRT_SUPPORTED) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP2_IS_VIRT_SUPPORTED\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask2 & IB_PORT_CAP2_IS_SWITCH_PORT_STATE_TBL_SUPP) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP2_IS_SWITCH_PORT_STATE_TBL_SUPP\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask2 & IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED\n", + &total_len) != IB_SUCCESS) + return; + } + if (p_pi->capability_mask2 & IB_PORT_CAP2_IS_LINK_SPEED_HDR_SUPPORTED) { + if (dbg_do_line(&p_local, buf_size, p_prefix_str, + "IB_PORT_CAP2_IS_LINK_SPEED_HDR_SUPPORTED\n", + &total_len) != IB_SUCCESS) + return; + } +} + +static void osm_dump_port_info_to_buf(IN ib_net64_t node_guid, + IN ib_net64_t port_guid, + IN uint8_t port_num, + IN const ib_port_info_t * p_pi, + OUT char * buf) +{ + if (!buf || !p_pi) + return; + else { + sprintf(buf, + "PortInfo dump:\n" + "\t\t\t\tport number..............%u\n" + "\t\t\t\tnode_guid................0x%016" PRIx64 "\n" + "\t\t\t\tport_guid................0x%016" PRIx64 "\n" + "\t\t\t\tm_key....................0x%016" PRIx64 "\n" + "\t\t\t\tsubnet_prefix............0x%016" PRIx64 "\n" + "\t\t\t\tbase_lid.................%u\n" + "\t\t\t\tmaster_sm_base_lid.......%u\n" + "\t\t\t\tcapability_mask..........0x%X\n" + "\t\t\t\tdiag_code................0x%X\n" + "\t\t\t\tm_key_lease_period.......0x%X\n" + "\t\t\t\tlocal_port_num...........%u\n" + "\t\t\t\tlink_width_enabled.......0x%X\n" + "\t\t\t\tlink_width_supported.....0x%X\n" + "\t\t\t\tlink_width_active........0x%X\n" + "\t\t\t\tlink_speed_supported.....0x%X\n" + "\t\t\t\tport_state...............%s\n" + "\t\t\t\tstate_info2..............0x%X\n" + "\t\t\t\tm_key_protect_bits.......0x%X\n" + "\t\t\t\tlmc......................0x%X\n" + "\t\t\t\tlink_speed...............0x%X\n" + "\t\t\t\tmtu_smsl.................0x%X\n" + "\t\t\t\tvl_cap_init_type.........0x%X\n" + "\t\t\t\tvl_high_limit............0x%X\n" + "\t\t\t\tvl_arb_high_cap..........0x%X\n" + "\t\t\t\tvl_arb_low_cap...........0x%X\n" + "\t\t\t\tinit_rep_mtu_cap.........0x%X\n" + "\t\t\t\tvl_stall_life............0x%X\n" + "\t\t\t\tvl_enforce...............0x%X\n" + "\t\t\t\tm_key_violations.........0x%X\n" + "\t\t\t\tp_key_violations.........0x%X\n" + "\t\t\t\tq_key_violations.........0x%X\n" + "\t\t\t\tguid_cap.................0x%X\n" + "\t\t\t\tclient_reregister........0x%X\n" + "\t\t\t\tmcast_pkey_trap_suppr....0x%X\n" + "\t\t\t\tsubnet_timeout...........0x%X\n" + "\t\t\t\tresp_time_value..........0x%X\n" + "\t\t\t\terror_threshold..........0x%X\n" + "\t\t\t\tmax_credit_hint..........0x%X\n" + "\t\t\t\tlink_round_trip_latency..0x%X\n" + "\t\t\t\tcapability_mask2.........0x%X\n" + "\t\t\t\tlink_speed_ext_active....0x%X\n" + "\t\t\t\tlink_speed_ext_supported.0x%X\n" + "\t\t\t\tlink_speed_ext_enabled...0x%X\n", + port_num, cl_ntoh64(node_guid), cl_ntoh64(port_guid), + cl_ntoh64(p_pi->m_key), cl_ntoh64(p_pi->subnet_prefix), + cl_ntoh16(p_pi->base_lid), + cl_ntoh16(p_pi->master_sm_base_lid), + cl_ntoh32(p_pi->capability_mask), + cl_ntoh16(p_pi->diag_code), + cl_ntoh16(p_pi->m_key_lease_period), + p_pi->local_port_num, p_pi->link_width_enabled, + p_pi->link_width_supported, p_pi->link_width_active, + ib_port_info_get_link_speed_sup(p_pi), + ib_get_port_state_str(ib_port_info_get_port_state + (p_pi)), p_pi->state_info2, + ib_port_info_get_mpb(p_pi), ib_port_info_get_lmc(p_pi), + p_pi->link_speed, p_pi->mtu_smsl, p_pi->vl_cap, + p_pi->vl_high_limit, p_pi->vl_arb_high_cap, + p_pi->vl_arb_low_cap, p_pi->mtu_cap, + p_pi->vl_stall_life, p_pi->vl_enforce, + cl_ntoh16(p_pi->m_key_violations), + cl_ntoh16(p_pi->p_key_violations), + cl_ntoh16(p_pi->q_key_violations), p_pi->guid_cap, + ib_port_info_get_client_rereg(p_pi), + ib_port_info_get_mcast_pkey_trap_suppress(p_pi), + ib_port_info_get_timeout(p_pi), + ib_port_info_get_resp_time_value(p_pi), + p_pi->error_threshold, cl_ntoh16(p_pi->max_credit_hint), + cl_ntoh32(p_pi->link_rt_latency), + cl_ntoh16(p_pi->capability_mask2), + ib_port_info_get_link_speed_ext_active(p_pi), + ib_port_info_get_link_speed_ext_sup(p_pi), + p_pi->link_speed_ext_enabled); + } +} + +void osm_dump_port_info(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_port_info_t * p_pi, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_port_info_to_buf(node_guid, port_guid, + port_num, p_pi, buf); + + osm_log(p_log, log_level, "%s", buf); + + /* show the capabilities masks */ + if (p_pi->capability_mask) { + dbg_get_capabilities_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log(p_log, log_level, "%s", buf); + } + if ((p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) && + p_pi->capability_mask2) { + dbg_get_capabilities2_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log(p_log, log_level, "%s", buf); + } + } +} + +void osm_dump_port_info_v2(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_port_info_t * p_pi, IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_port_info_to_buf(node_guid, port_guid, + port_num, p_pi, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + + /* show the capabilities masks */ + if (p_pi->capability_mask) { + dbg_get_capabilities_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } + if ((p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) && + p_pi->capability_mask2) { + dbg_get_capabilities2_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log(p_log, log_level, "%s", buf); + } + } +} + +static void osm_dump_mlnx_ext_port_info_to_buf(IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_mlnx_ext_port_info_t * p_pi, + OUT char * buf) +{ + if (!buf || !p_pi) + return; + else { + sprintf(buf, + "MLNX ExtendedPortInfo dump:\n" + "\t\t\t\tport number..............%u\n" + "\t\t\t\tnode_guid................0x%016" PRIx64 "\n" + "\t\t\t\tport_guid................0x%016" PRIx64 "\n" + "\t\t\t\tStateChangeEnable........0x%X\n" + "\t\t\t\tLinkSpeedSupported.......0x%X\n" + "\t\t\t\tLinkSpeedEnabled.........0x%X\n" + "\t\t\t\tLinkSpeedActive..........0x%X\n", + port_num, cl_ntoh64(node_guid), cl_ntoh64(port_guid), + p_pi->state_change_enable, p_pi->link_speed_supported, + p_pi->link_speed_enabled, p_pi->link_speed_active); + } +} + +void osm_dump_mlnx_ext_port_info(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_mlnx_ext_port_info_t * p_pi, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_mlnx_ext_port_info_to_buf(node_guid, port_guid, + port_num, p_pi, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_mlnx_ext_port_info_v2(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t port_num, + IN const ib_mlnx_ext_port_info_t * p_pi, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_mlnx_ext_port_info_to_buf(node_guid, port_guid, + port_num, p_pi, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_portinfo_record_to_buf(IN const ib_portinfo_record_t * p_pir, + OUT char * buf) +{ + if (!buf || !p_pir) + return; + else { + const ib_port_info_t *p_pi = &p_pir->port_info; + + sprintf(buf, + "PortInfo Record dump:\n" + "\t\t\t\tRID\n" + "\t\t\t\tEndPortLid...............%u\n" + "\t\t\t\tPortNum..................%u\n" + "\t\t\t\tOptions..................0x%X\n" + "\t\t\t\tPortInfo dump:\n" + "\t\t\t\tm_key....................0x%016" PRIx64 "\n" + "\t\t\t\tsubnet_prefix............0x%016" PRIx64 "\n" + "\t\t\t\tbase_lid.................%u\n" + "\t\t\t\tmaster_sm_base_lid.......%u\n" + "\t\t\t\tcapability_mask..........0x%X\n" + "\t\t\t\tdiag_code................0x%X\n" + "\t\t\t\tm_key_lease_period.......0x%X\n" + "\t\t\t\tlocal_port_num...........%u\n" + "\t\t\t\tlink_width_enabled.......0x%X\n" + "\t\t\t\tlink_width_supported.....0x%X\n" + "\t\t\t\tlink_width_active........0x%X\n" + "\t\t\t\tlink_speed_supported.....0x%X\n" + "\t\t\t\tport_state...............%s\n" + "\t\t\t\tstate_info2..............0x%X\n" + "\t\t\t\tm_key_protect_bits.......0x%X\n" + "\t\t\t\tlmc......................0x%X\n" + "\t\t\t\tlink_speed...............0x%X\n" + "\t\t\t\tmtu_smsl.................0x%X\n" + "\t\t\t\tvl_cap_init_type.........0x%X\n" + "\t\t\t\tvl_high_limit............0x%X\n" + "\t\t\t\tvl_arb_high_cap..........0x%X\n" + "\t\t\t\tvl_arb_low_cap...........0x%X\n" + "\t\t\t\tinit_rep_mtu_cap.........0x%X\n" + "\t\t\t\tvl_stall_life............0x%X\n" + "\t\t\t\tvl_enforce...............0x%X\n" + "\t\t\t\tm_key_violations.........0x%X\n" + "\t\t\t\tp_key_violations.........0x%X\n" + "\t\t\t\tq_key_violations.........0x%X\n" + "\t\t\t\tguid_cap.................0x%X\n" + "\t\t\t\tclient_reregister........0x%X\n" + "\t\t\t\tmcast_pkey_trap_suppr....0x%X\n" + "\t\t\t\tsubnet_timeout...........0x%X\n" + "\t\t\t\tresp_time_value..........0x%X\n" + "\t\t\t\terror_threshold..........0x%X\n" + "\t\t\t\tmax_credit_hint..........0x%X\n" + "\t\t\t\tlink_round_trip_latency..0x%X\n" + "\t\t\t\tcapability_mask2.........0x%X\n" + "\t\t\t\tlink_speed_ext_active....0x%X\n" + "\t\t\t\tlink_speed_ext_supported.0x%X\n" + "\t\t\t\tlink_speed_ext_enabled...0x%X\n", + cl_ntoh16(p_pir->lid), p_pir->port_num, p_pir->options, + cl_ntoh64(p_pi->m_key), cl_ntoh64(p_pi->subnet_prefix), + cl_ntoh16(p_pi->base_lid), + cl_ntoh16(p_pi->master_sm_base_lid), + cl_ntoh32(p_pi->capability_mask), + cl_ntoh16(p_pi->diag_code), + cl_ntoh16(p_pi->m_key_lease_period), + p_pi->local_port_num, p_pi->link_width_enabled, + p_pi->link_width_supported, p_pi->link_width_active, + ib_port_info_get_link_speed_sup(p_pi), + ib_get_port_state_str(ib_port_info_get_port_state + (p_pi)), p_pi->state_info2, + ib_port_info_get_mpb(p_pi), ib_port_info_get_lmc(p_pi), + p_pi->link_speed, p_pi->mtu_smsl, p_pi->vl_cap, + p_pi->vl_high_limit, p_pi->vl_arb_high_cap, + p_pi->vl_arb_low_cap, p_pi->mtu_cap, + p_pi->vl_stall_life, p_pi->vl_enforce, + cl_ntoh16(p_pi->m_key_violations), + cl_ntoh16(p_pi->p_key_violations), + cl_ntoh16(p_pi->q_key_violations), p_pi->guid_cap, + ib_port_info_get_client_rereg(p_pi), + ib_port_info_get_mcast_pkey_trap_suppress(p_pi), + ib_port_info_get_timeout(p_pi), + ib_port_info_get_resp_time_value(p_pi), + p_pi->error_threshold, cl_ntoh16(p_pi->max_credit_hint), + cl_ntoh32(p_pi->link_rt_latency), + cl_ntoh16(p_pi->capability_mask2), + ib_port_info_get_link_speed_ext_active(p_pi), + ib_port_info_get_link_speed_ext_sup(p_pi), + p_pi->link_speed_ext_enabled); + } +} + +void osm_dump_portinfo_record(IN osm_log_t * p_log, + IN const ib_portinfo_record_t * p_pir, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + const ib_port_info_t *p_pi = &p_pir->port_info; + + osm_dump_portinfo_record_to_buf(p_pir, buf); + + osm_log(p_log, log_level, "%s", buf); + + /* show the capabilities masks */ + if (p_pi->capability_mask) { + dbg_get_capabilities_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log(p_log, log_level, "%s", buf); + } + if ((p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) && + p_pi->capability_mask2) { + dbg_get_capabilities2_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log(p_log, log_level, "%s", buf); + } + } +} + +void osm_dump_portinfo_record_v2(IN osm_log_t * p_log, + IN const ib_portinfo_record_t * p_pir, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + const ib_port_info_t *p_pi = &p_pir->port_info; + + osm_dump_portinfo_record_to_buf(p_pir, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + + /* show the capabilities masks */ + if (p_pi->capability_mask) { + dbg_get_capabilities_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } + if ((p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) && + p_pi->capability_mask2) { + dbg_get_capabilities2_str(buf, BUF_SIZE, "\t\t\t\t", + p_pi); + osm_log(p_log, log_level, "%s", buf); + } + } +} + +static void osm_dump_guid_info_to_buf(IN ib_net64_t node_guid, + IN ib_net64_t port_guid, + IN uint8_t block_num, + IN const ib_guid_info_t * p_gi, + OUT char * buf) +{ + if (!buf || !p_gi) + return; + else { + sprintf(buf, + "GUIDInfo dump:\n" + "\t\t\t\tblock number............%u\n" + "\t\t\t\tnode_guid...............0x%016" PRIx64 "\n" + "\t\t\t\tport_guid...............0x%016" PRIx64 "\n" + "\t\t\t\tGUID 0..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 1..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 2..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 3..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 4..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 5..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 6..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 7..................0x%016" PRIx64 "\n", + block_num, cl_ntoh64(node_guid), cl_ntoh64(port_guid), + cl_ntoh64(p_gi->guid[0]), cl_ntoh64(p_gi->guid[1]), + cl_ntoh64(p_gi->guid[2]), cl_ntoh64(p_gi->guid[3]), + cl_ntoh64(p_gi->guid[4]), cl_ntoh64(p_gi->guid[5]), + cl_ntoh64(p_gi->guid[6]), cl_ntoh64(p_gi->guid[7])); + } +} + +void osm_dump_guid_info(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t block_num, + IN const ib_guid_info_t * p_gi, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_guid_info_to_buf(node_guid, port_guid, + block_num, p_gi, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_guid_info_v2(IN osm_log_t * p_log, IN ib_net64_t node_guid, + IN ib_net64_t port_guid, IN uint8_t block_num, + IN const ib_guid_info_t * p_gi, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_guid_info_to_buf(node_guid, port_guid, + block_num, p_gi, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_guidinfo_record_to_buf(IN const ib_guidinfo_record_t * p_gir, + OUT char * buf) +{ + if (!buf || !p_gir) + return; + else { + const ib_guid_info_t *p_gi = &p_gir->guid_info; + + sprintf(buf, + "GUIDInfo Record dump:\n" + "\t\t\t\tRID\n" + "\t\t\t\tLid.....................%u\n" + "\t\t\t\tBlockNum................0x%X\n" + "\t\t\t\tReserved................0x%X\n" + "\t\t\t\tGUIDInfo dump:\n" + "\t\t\t\tReserved................0x%X\n" + "\t\t\t\tGUID 0..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 1..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 2..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 3..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 4..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 5..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 6..................0x%016" PRIx64 "\n" + "\t\t\t\tGUID 7..................0x%016" PRIx64 "\n", + cl_ntoh16(p_gir->lid), p_gir->block_num, p_gir->resv, + cl_ntoh32(p_gir->reserved), + cl_ntoh64(p_gi->guid[0]), cl_ntoh64(p_gi->guid[1]), + cl_ntoh64(p_gi->guid[2]), cl_ntoh64(p_gi->guid[3]), + cl_ntoh64(p_gi->guid[4]), cl_ntoh64(p_gi->guid[5]), + cl_ntoh64(p_gi->guid[6]), cl_ntoh64(p_gi->guid[7])); + } +} +void osm_dump_guidinfo_record(IN osm_log_t * p_log, + IN const ib_guidinfo_record_t * p_gir, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_guidinfo_record_to_buf(p_gir, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_guidinfo_record_v2(IN osm_log_t * p_log, + IN const ib_guidinfo_record_t * p_gir, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_guidinfo_record_to_buf(p_gir, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_node_info_to_buf(IN const ib_node_info_t * p_ni, + OUT char * buf) +{ + if (!buf || !p_ni) + return; + else { + sprintf(buf, + "NodeInfo dump:\n" + "\t\t\t\tbase_version............0x%X\n" + "\t\t\t\tclass_version...........0x%X\n" + "\t\t\t\tnode_type...............%s\n" + "\t\t\t\tnum_ports...............%u\n" + "\t\t\t\tsys_guid................0x%016" PRIx64 "\n" + "\t\t\t\tnode_guid...............0x%016" PRIx64 "\n" + "\t\t\t\tport_guid...............0x%016" PRIx64 "\n" + "\t\t\t\tpartition_cap...........0x%X\n" + "\t\t\t\tdevice_id...............0x%X\n" + "\t\t\t\trevision................0x%X\n" + "\t\t\t\tport_num................%u\n" + "\t\t\t\tvendor_id...............0x%X\n", + p_ni->base_version, p_ni->class_version, + ib_get_node_type_str(p_ni->node_type), p_ni->num_ports, + cl_ntoh64(p_ni->sys_guid), cl_ntoh64(p_ni->node_guid), + cl_ntoh64(p_ni->port_guid), + cl_ntoh16(p_ni->partition_cap), + cl_ntoh16(p_ni->device_id), cl_ntoh32(p_ni->revision), + ib_node_info_get_local_port_num(p_ni), + cl_ntoh32(ib_node_info_get_vendor_id(p_ni))); + } +} + +void osm_dump_node_info(IN osm_log_t * p_log, IN const ib_node_info_t * p_ni, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_node_info_to_buf(p_ni, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_node_info_v2(IN osm_log_t * p_log, IN const ib_node_info_t * p_ni, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_node_info_to_buf(p_ni, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_node_record_to_buf(IN const ib_node_record_t * p_nr, + OUT char * buf) +{ + if (!buf || !p_nr) + return; + else { + char desc[sizeof(p_nr->node_desc.description) + 1]; + const ib_node_info_t *p_ni = &p_nr->node_info; + + memcpy(desc, p_nr->node_desc.description, + sizeof(p_nr->node_desc.description)); + desc[sizeof(desc) - 1] = '\0'; + sprintf(buf, + "Node Record dump:\n" + "\t\t\t\tRID\n" + "\t\t\t\tLid.....................%u\n" + "\t\t\t\tReserved................0x%X\n" + "\t\t\t\tNodeInfo dump:\n" + "\t\t\t\tbase_version............0x%X\n" + "\t\t\t\tclass_version...........0x%X\n" + "\t\t\t\tnode_type...............%s\n" + "\t\t\t\tnum_ports...............%u\n" + "\t\t\t\tsys_guid................0x%016" PRIx64 "\n" + "\t\t\t\tnode_guid...............0x%016" PRIx64 "\n" + "\t\t\t\tport_guid...............0x%016" PRIx64 "\n" + "\t\t\t\tpartition_cap...........0x%X\n" + "\t\t\t\tdevice_id...............0x%X\n" + "\t\t\t\trevision................0x%X\n" + "\t\t\t\tport_num................%u\n" + "\t\t\t\tvendor_id...............0x%X\n" + "\t\t\t\tNodeDescription\n" + "\t\t\t\t%s\n", + cl_ntoh16(p_nr->lid), cl_ntoh16(p_nr->resv), + p_ni->base_version, p_ni->class_version, + ib_get_node_type_str(p_ni->node_type), p_ni->num_ports, + cl_ntoh64(p_ni->sys_guid), cl_ntoh64(p_ni->node_guid), + cl_ntoh64(p_ni->port_guid), + cl_ntoh16(p_ni->partition_cap), + cl_ntoh16(p_ni->device_id), cl_ntoh32(p_ni->revision), + ib_node_info_get_local_port_num(p_ni), + cl_ntoh32(ib_node_info_get_vendor_id(p_ni)), desc); + } +} + +void osm_dump_node_record(IN osm_log_t * p_log, + IN const ib_node_record_t * p_nr, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_node_record_to_buf(p_nr, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_node_record_v2(IN osm_log_t * p_log, + IN const ib_node_record_t * p_nr, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_node_record_to_buf(p_nr, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_path_record_to_buf(IN const ib_path_rec_t * p_pr, + OUT char * buf) +{ + if (!buf || !p_pr) + return; + else { + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + + sprintf(buf, + "PathRecord dump:\n" + "\t\t\t\tservice_id..............0x%016" PRIx64 "\n" + "\t\t\t\tdgid....................%s\n" + "\t\t\t\tsgid....................%s\n" + "\t\t\t\tdlid....................%u\n" + "\t\t\t\tslid....................%u\n" + "\t\t\t\thop_flow_raw............0x%X\n" + "\t\t\t\ttclass..................0x%X\n" + "\t\t\t\tnum_path_revers.........0x%X\n" + "\t\t\t\tpkey....................0x%X\n" + "\t\t\t\tqos_class...............0x%X\n" + "\t\t\t\tsl......................0x%X\n" + "\t\t\t\tmtu.....................0x%X\n" + "\t\t\t\trate....................0x%X\n" + "\t\t\t\tpkt_life................0x%X\n" + "\t\t\t\tpreference..............0x%X\n" + "\t\t\t\tresv2...................0x%02X%02X%02X%02X%02X%02X\n", + cl_ntoh64(p_pr->service_id), + inet_ntop(AF_INET6, p_pr->dgid.raw, gid_str, + sizeof gid_str), + inet_ntop(AF_INET6, p_pr->sgid.raw, gid_str2, + sizeof gid_str2), + cl_ntoh16(p_pr->dlid), cl_ntoh16(p_pr->slid), + cl_ntoh32(p_pr->hop_flow_raw), p_pr->tclass, + p_pr->num_path, cl_ntoh16(p_pr->pkey), + ib_path_rec_qos_class(p_pr), ib_path_rec_sl(p_pr), + p_pr->mtu, p_pr->rate, p_pr->pkt_life, p_pr->preference, + p_pr->resv2[0], p_pr->resv2[1], p_pr->resv2[2], + p_pr->resv2[3], p_pr->resv2[4], p_pr->resv2[5]); + } +} + +void osm_dump_path_record(IN osm_log_t * p_log, IN const ib_path_rec_t * p_pr, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_path_record_to_buf(p_pr, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_path_record_v2(IN osm_log_t * p_log, IN const ib_path_rec_t * p_pr, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_path_record_to_buf(p_pr, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_multipath_record_to_buf(IN const ib_multipath_rec_t * p_mpr, + OUT char * buf) +{ + if (!buf || !p_mpr) + return; + else { + char gid_str[INET6_ADDRSTRLEN]; + char buf_line[1024]; + ib_gid_t const *p_gid = p_mpr->gids; + int i, n = 0; + + if (p_mpr->sgid_count) { + for (i = 0; i < p_mpr->sgid_count; i++) { + n += sprintf(buf_line + n, + "\t\t\t\tsgid%02d.................." + "%s\n", i + 1, + inet_ntop(AF_INET6, p_gid->raw, + gid_str, + sizeof gid_str)); + p_gid++; + } + } + if (p_mpr->dgid_count) { + for (i = 0; i < p_mpr->dgid_count; i++) { + n += sprintf(buf_line + n, + "\t\t\t\tdgid%02d.................." + "%s\n", i + 1, + inet_ntop(AF_INET6, p_gid->raw, + gid_str, + sizeof gid_str)); + p_gid++; + } + } + sprintf(buf, + "MultiPath Record dump:\n" + "\t\t\t\thop_flow_raw............0x%X\n" + "\t\t\t\ttclass..................0x%X\n" + "\t\t\t\tnum_path_revers.........0x%X\n" + "\t\t\t\tpkey....................0x%X\n" + "\t\t\t\tqos_class...............0x%X\n" + "\t\t\t\tsl......................0x%X\n" + "\t\t\t\tmtu.....................0x%X\n" + "\t\t\t\trate....................0x%X\n" + "\t\t\t\tpkt_life................0x%X\n" + "\t\t\t\tindependence............0x%X\n" + "\t\t\t\tsgid_count..............0x%X\n" + "\t\t\t\tdgid_count..............0x%X\n" + "\t\t\t\tservice_id..............0x%016" PRIx64 "\n" + "%s\n", + cl_ntoh32(p_mpr->hop_flow_raw), p_mpr->tclass, + p_mpr->num_path, cl_ntoh16(p_mpr->pkey), + ib_multipath_rec_qos_class(p_mpr), + ib_multipath_rec_sl(p_mpr), p_mpr->mtu, p_mpr->rate, + p_mpr->pkt_life, p_mpr->independence, + p_mpr->sgid_count, p_mpr->dgid_count, + cl_ntoh64(ib_multipath_rec_service_id(p_mpr)), + buf_line); + } +} + +void osm_dump_multipath_record(IN osm_log_t * p_log, + IN const ib_multipath_rec_t * p_mpr, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_multipath_record_to_buf(p_mpr, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_multipath_record_v2(IN osm_log_t * p_log, + IN const ib_multipath_rec_t * p_mpr, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_multipath_record_to_buf(p_mpr, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_mc_record_to_buf(IN const ib_member_rec_t * p_mcmr, + OUT char * buf) +{ + if(!buf || !p_mcmr) + return; + else { + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + + sprintf(buf, + "MCMember Record dump:\n" + "\t\t\t\tMGID....................%s\n" + "\t\t\t\tPortGid.................%s\n" + "\t\t\t\tqkey....................0x%X\n" + "\t\t\t\tmlid....................0x%X\n" + "\t\t\t\tmtu.....................0x%X\n" + "\t\t\t\tTClass..................0x%X\n" + "\t\t\t\tpkey....................0x%X\n" + "\t\t\t\trate....................0x%X\n" + "\t\t\t\tpkt_life................0x%X\n" + "\t\t\t\tSLFlowLabelHopLimit.....0x%X\n" + "\t\t\t\tScopeState..............0x%X\n" + "\t\t\t\tProxyJoin...............0x%X\n", + inet_ntop(AF_INET6, p_mcmr->mgid.raw, gid_str, + sizeof gid_str), + inet_ntop(AF_INET6, p_mcmr->port_gid.raw, gid_str2, + sizeof gid_str2), + cl_ntoh32(p_mcmr->qkey), cl_ntoh16(p_mcmr->mlid), + p_mcmr->mtu, p_mcmr->tclass, cl_ntoh16(p_mcmr->pkey), + p_mcmr->rate, p_mcmr->pkt_life, + cl_ntoh32(p_mcmr->sl_flow_hop), + p_mcmr->scope_state, p_mcmr->proxy_join); + } +} + +void osm_dump_mc_record(IN osm_log_t * p_log, IN const ib_member_rec_t * p_mcmr, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_mc_record_to_buf(p_mcmr, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_mc_record_v2(IN osm_log_t * p_log, IN const ib_member_rec_t * p_mcmr, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_mc_record_to_buf(p_mcmr, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_service_record_to_buf(IN const ib_service_record_t * p_sr, + OUT char * buf) +{ + if (!buf || !p_sr) + return; + else { + char gid_str[INET6_ADDRSTRLEN]; + char buf_service_key[35]; + char buf_service_name[65]; + + sprintf(buf_service_key, + "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", + p_sr->service_key[0], p_sr->service_key[1], + p_sr->service_key[2], p_sr->service_key[3], + p_sr->service_key[4], p_sr->service_key[5], + p_sr->service_key[6], p_sr->service_key[7], + p_sr->service_key[8], p_sr->service_key[9], + p_sr->service_key[10], p_sr->service_key[11], + p_sr->service_key[12], p_sr->service_key[13], + p_sr->service_key[14], p_sr->service_key[15]); + strncpy(buf_service_name, (char *)p_sr->service_name, 64); + buf_service_name[64] = '\0'; + + sprintf(buf, + "Service Record dump:\n" + "\t\t\t\tServiceID...............0x%016" PRIx64 "\n" + "\t\t\t\tServiceGID..............%s\n" + "\t\t\t\tServiceP_Key............0x%X\n" + "\t\t\t\tServiceLease............0x%X\n" + "\t\t\t\tServiceKey..............%s\n" + "\t\t\t\tServiceName.............%s\n" + "\t\t\t\tServiceData8.1..........0x%X\n" + "\t\t\t\tServiceData8.2..........0x%X\n" + "\t\t\t\tServiceData8.3..........0x%X\n" + "\t\t\t\tServiceData8.4..........0x%X\n" + "\t\t\t\tServiceData8.5..........0x%X\n" + "\t\t\t\tServiceData8.6..........0x%X\n" + "\t\t\t\tServiceData8.7..........0x%X\n" + "\t\t\t\tServiceData8.8..........0x%X\n" + "\t\t\t\tServiceData8.9..........0x%X\n" + "\t\t\t\tServiceData8.10.........0x%X\n" + "\t\t\t\tServiceData8.11.........0x%X\n" + "\t\t\t\tServiceData8.12.........0x%X\n" + "\t\t\t\tServiceData8.13.........0x%X\n" + "\t\t\t\tServiceData8.14.........0x%X\n" + "\t\t\t\tServiceData8.15.........0x%X\n" + "\t\t\t\tServiceData8.16.........0x%X\n" + "\t\t\t\tServiceData16.1.........0x%X\n" + "\t\t\t\tServiceData16.2.........0x%X\n" + "\t\t\t\tServiceData16.3.........0x%X\n" + "\t\t\t\tServiceData16.4.........0x%X\n" + "\t\t\t\tServiceData16.5.........0x%X\n" + "\t\t\t\tServiceData16.6.........0x%X\n" + "\t\t\t\tServiceData16.7.........0x%X\n" + "\t\t\t\tServiceData16.8.........0x%X\n" + "\t\t\t\tServiceData32.1.........0x%X\n" + "\t\t\t\tServiceData32.2.........0x%X\n" + "\t\t\t\tServiceData32.3.........0x%X\n" + "\t\t\t\tServiceData32.4.........0x%X\n" + "\t\t\t\tServiceData64.1.........0x%016" PRIx64 "\n" + "\t\t\t\tServiceData64.2.........0x%016" PRIx64 "\n", + cl_ntoh64(p_sr->service_id), + inet_ntop(AF_INET6, p_sr->service_gid.raw, gid_str, + sizeof gid_str), + cl_ntoh16(p_sr->service_pkey), + cl_ntoh32(p_sr->service_lease), + buf_service_key, buf_service_name, + p_sr->service_data8[0], p_sr->service_data8[1], + p_sr->service_data8[2], p_sr->service_data8[3], + p_sr->service_data8[4], p_sr->service_data8[5], + p_sr->service_data8[6], p_sr->service_data8[7], + p_sr->service_data8[8], p_sr->service_data8[9], + p_sr->service_data8[10], p_sr->service_data8[11], + p_sr->service_data8[12], p_sr->service_data8[13], + p_sr->service_data8[14], p_sr->service_data8[15], + cl_ntoh16(p_sr->service_data16[0]), + cl_ntoh16(p_sr->service_data16[1]), + cl_ntoh16(p_sr->service_data16[2]), + cl_ntoh16(p_sr->service_data16[3]), + cl_ntoh16(p_sr->service_data16[4]), + cl_ntoh16(p_sr->service_data16[5]), + cl_ntoh16(p_sr->service_data16[6]), + cl_ntoh16(p_sr->service_data16[7]), + cl_ntoh32(p_sr->service_data32[0]), + cl_ntoh32(p_sr->service_data32[1]), + cl_ntoh32(p_sr->service_data32[2]), + cl_ntoh32(p_sr->service_data32[3]), + cl_ntoh64(p_sr->service_data64[0]), + cl_ntoh64(p_sr->service_data64[1])); + } +} + +void osm_dump_service_record(IN osm_log_t * p_log, + IN const ib_service_record_t * p_sr, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_service_record_to_buf(p_sr, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_service_record_v2(IN osm_log_t * p_log, + IN const ib_service_record_t * p_sr, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_service_record_to_buf(p_sr, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_inform_info_to_buf_generic(IN const ib_inform_info_t * p_ii, + OUT char * buf) +{ + if (!buf || !p_ii) + return; + else { + uint32_t qpn; + uint8_t resp_time_val; + char gid_str[INET6_ADDRSTRLEN]; + + ib_inform_info_get_qpn_resp_time(p_ii->g_or_v.generic. + qpn_resp_time_val, &qpn, + &resp_time_val); + sprintf(buf, + "InformInfo dump:\n" + "\t\t\t\tgid.....................%s\n" + "\t\t\t\tlid_range_begin.........%u\n" + "\t\t\t\tlid_range_end...........%u\n" + "\t\t\t\tis_generic..............0x%X\n" + "\t\t\t\tsubscribe...............0x%X\n" + "\t\t\t\ttrap_type...............0x%X\n" + "\t\t\t\ttrap_num................%u\n" + "\t\t\t\tqpn.....................0x%06X\n" + "\t\t\t\tresp_time_val...........0x%X\n" + "\t\t\t\tnode_type...............0x%06X\n" "", + inet_ntop(AF_INET6, p_ii->gid.raw, gid_str, + sizeof gid_str), + cl_ntoh16(p_ii->lid_range_begin), + cl_ntoh16(p_ii->lid_range_end), + p_ii->is_generic, p_ii->subscribe, + cl_ntoh16(p_ii->trap_type), + cl_ntoh16(p_ii->g_or_v.generic.trap_num), + cl_ntoh32(qpn), resp_time_val, + cl_ntoh32(ib_inform_info_get_prod_type(p_ii))); + } +} + +static void osm_dump_inform_info_to_buf(IN const ib_inform_info_t * p_ii, + OUT char * buf) +{ + if (!buf || !p_ii) + return; + else { + uint32_t qpn; + uint8_t resp_time_val; + char gid_str[INET6_ADDRSTRLEN]; + + ib_inform_info_get_qpn_resp_time(p_ii->g_or_v.generic. + qpn_resp_time_val, &qpn, + &resp_time_val); + sprintf(buf, + "InformInfo dump:\n" + "\t\t\t\tgid.....................%s\n" + "\t\t\t\tlid_range_begin.........%u\n" + "\t\t\t\tlid_range_end...........%u\n" + "\t\t\t\tis_generic..............0x%X\n" + "\t\t\t\tsubscribe...............0x%X\n" + "\t\t\t\ttrap_type...............0x%X\n" + "\t\t\t\tdev_id..................0x%X\n" + "\t\t\t\tqpn.....................0x%06X\n" + "\t\t\t\tresp_time_val...........0x%X\n" + "\t\t\t\tvendor_id...............0x%06X\n" "", + inet_ntop(AF_INET6, p_ii->gid.raw, gid_str, + sizeof gid_str), + cl_ntoh16(p_ii->lid_range_begin), + cl_ntoh16(p_ii->lid_range_end), + p_ii->is_generic, p_ii->subscribe, + cl_ntoh16(p_ii->trap_type), + cl_ntoh16(p_ii->g_or_v.vend.dev_id), + cl_ntoh32(qpn), resp_time_val, + cl_ntoh32(ib_inform_info_get_prod_type(p_ii))); + } +} + +void osm_dump_inform_info(IN osm_log_t * p_log, + IN const ib_inform_info_t * p_ii, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + if (p_ii->is_generic) + osm_dump_inform_info_to_buf_generic(p_ii, buf); + else + osm_dump_inform_info_to_buf(p_ii, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_inform_info_v2(IN osm_log_t * p_log, + IN const ib_inform_info_t * p_ii, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + if (p_ii->is_generic) + osm_dump_inform_info_to_buf_generic(p_ii, buf); + else + osm_dump_inform_info_to_buf(p_ii, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_inform_info_record_to_buf_generic(IN const ib_inform_info_record_t * p_iir, + OUT char * buf) +{ + if (!buf || p_iir) + return; + else { + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + uint32_t qpn; + uint8_t resp_time_val; + + ib_inform_info_get_qpn_resp_time(p_iir->inform_info.g_or_v. + generic.qpn_resp_time_val, + &qpn, &resp_time_val); + sprintf(buf, + "InformInfo Record dump:\n" + "\t\t\t\tRID\n" + "\t\t\t\tSubscriberGID...........%s\n" + "\t\t\t\tSubscriberEnum..........0x%X\n" + "\t\t\t\tInformInfo dump:\n" + "\t\t\t\tgid.....................%s\n" + "\t\t\t\tlid_range_begin.........%u\n" + "\t\t\t\tlid_range_end...........%u\n" + "\t\t\t\tis_generic..............0x%X\n" + "\t\t\t\tsubscribe...............0x%X\n" + "\t\t\t\ttrap_type...............0x%X\n" + "\t\t\t\ttrap_num................%u\n" + "\t\t\t\tqpn.....................0x%06X\n" + "\t\t\t\tresp_time_val...........0x%X\n" + "\t\t\t\tnode_type...............0x%06X\n" "", + inet_ntop(AF_INET6, p_iir->subscriber_gid.raw, + gid_str, sizeof gid_str), + cl_ntoh16(p_iir->subscriber_enum), + inet_ntop(AF_INET6, p_iir->inform_info.gid.raw, + gid_str2, sizeof gid_str2), + cl_ntoh16(p_iir->inform_info.lid_range_begin), + cl_ntoh16(p_iir->inform_info.lid_range_end), + p_iir->inform_info.is_generic, + p_iir->inform_info.subscribe, + cl_ntoh16(p_iir->inform_info.trap_type), + cl_ntoh16(p_iir->inform_info.g_or_v.generic. + trap_num), cl_ntoh32(qpn), + resp_time_val, + cl_ntoh32(ib_inform_info_get_prod_type + (&p_iir->inform_info))); + } +} + +static void osm_dump_inform_info_record_to_buf(IN const ib_inform_info_record_t * p_iir, + OUT char * buf) +{ + if(!buf || p_iir) + return; + else { + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + uint32_t qpn; + uint8_t resp_time_val; + + ib_inform_info_get_qpn_resp_time(p_iir->inform_info.g_or_v. + generic.qpn_resp_time_val, + &qpn, &resp_time_val); + sprintf(buf, + "InformInfo Record dump:\n" + "\t\t\t\tRID\n" + "\t\t\t\tSubscriberGID...........%s\n" + "\t\t\t\tSubscriberEnum..........0x%X\n" + "\t\t\t\tInformInfo dump:\n" + "\t\t\t\tgid.....................%s\n" + "\t\t\t\tlid_range_begin.........%u\n" + "\t\t\t\tlid_range_end...........%u\n" + "\t\t\t\tis_generic..............0x%X\n" + "\t\t\t\tsubscribe...............0x%X\n" + "\t\t\t\ttrap_type...............0x%X\n" + "\t\t\t\tdev_id..................0x%X\n" + "\t\t\t\tqpn.....................0x%06X\n" + "\t\t\t\tresp_time_val...........0x%X\n" + "\t\t\t\tvendor_id...............0x%06X\n" "", + inet_ntop(AF_INET6, p_iir->subscriber_gid.raw, + gid_str, sizeof gid_str), + cl_ntoh16(p_iir->subscriber_enum), + inet_ntop(AF_INET6, p_iir->inform_info.gid.raw, + gid_str2, sizeof gid_str2), + cl_ntoh16(p_iir->inform_info.lid_range_begin), + cl_ntoh16(p_iir->inform_info.lid_range_end), + p_iir->inform_info.is_generic, + p_iir->inform_info.subscribe, + cl_ntoh16(p_iir->inform_info.trap_type), + cl_ntoh16(p_iir->inform_info.g_or_v.vend. + dev_id), cl_ntoh32(qpn), + resp_time_val, + cl_ntoh32(ib_inform_info_get_prod_type + (&p_iir->inform_info))); + } +} + +void osm_dump_inform_info_record(IN osm_log_t * p_log, + IN const ib_inform_info_record_t * p_iir, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + if (p_iir->inform_info.is_generic) + osm_dump_inform_info_record_to_buf_generic(p_iir, buf); + else + osm_dump_inform_info_record_to_buf(p_iir, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_inform_info_record_v2(IN osm_log_t * p_log, + IN const ib_inform_info_record_t * p_iir, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + if (p_iir->inform_info.is_generic) + osm_dump_inform_info_record_to_buf_generic(p_iir, buf); + else + osm_dump_inform_info_record_to_buf(p_iir, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_link_record_to_buf(IN const ib_link_record_t * p_lr, + OUT char * buf) +{ + if (!buf || !p_lr) + return; + else { + sprintf(buf, + "Link Record dump:\n" + "\t\t\t\tfrom_lid................%u\n" + "\t\t\t\tfrom_port_num...........%u\n" + "\t\t\t\tto_port_num.............%u\n" + "\t\t\t\tto_lid..................%u\n", + cl_ntoh16(p_lr->from_lid), + p_lr->from_port_num, + p_lr->to_port_num, cl_ntoh16(p_lr->to_lid)); + } +} + +void osm_dump_link_record(IN osm_log_t * p_log, + IN const ib_link_record_t * p_lr, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_link_record_to_buf(p_lr, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_link_record_v2(IN osm_log_t * p_log, + IN const ib_link_record_t * p_lr, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_link_record_to_buf(p_lr, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_switch_info_to_buf(IN const ib_switch_info_t * p_si, + OUT char * buf) +{ + if (!buf || !p_si) + return; + else { + sprintf(buf, + "SwitchInfo dump:\n" + "\t\t\t\tlin_cap.................0x%X\n" + "\t\t\t\trand_cap................0x%X\n" + "\t\t\t\tmcast_cap...............0x%X\n" + "\t\t\t\tlin_top.................0x%X\n" + "\t\t\t\tdef_port................%u\n" + "\t\t\t\tdef_mcast_pri_port......%u\n" + "\t\t\t\tdef_mcast_not_port......%u\n" + "\t\t\t\tlife_state..............0x%X\n" + "\t\t\t\tlids_per_port...........%u\n" + "\t\t\t\tpartition_enf_cap.......0x%X\n" + "\t\t\t\tflags...................0x%X\n" + "\t\t\t\tmcast_top...............0x%X\n", + cl_ntoh16(p_si->lin_cap), cl_ntoh16(p_si->rand_cap), + cl_ntoh16(p_si->mcast_cap), cl_ntoh16(p_si->lin_top), + p_si->def_port, p_si->def_mcast_pri_port, + p_si->def_mcast_not_port, p_si->life_state, + cl_ntoh16(p_si->lids_per_port), + cl_ntoh16(p_si->enforce_cap), p_si->flags, + cl_ntoh16(p_si->mcast_top)); + } +} + +void osm_dump_switch_info(IN osm_log_t * p_log, + IN const ib_switch_info_t * p_si, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_switch_info_to_buf(p_si, buf); + + osm_log(p_log, OSM_LOG_VERBOSE, "%s", buf); + } +} + +void osm_dump_switch_info_v2(IN osm_log_t * p_log, + IN const ib_switch_info_t * p_si, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_switch_info_to_buf(p_si, buf); + + osm_log_v2(p_log, OSM_LOG_VERBOSE, file_id, "%s", buf); + } +} + +static void osm_dump_switch_info_record_to_buf(IN const ib_switch_info_record_t * p_sir, + OUT char * buf) +{ + if (!buf || !p_sir) + return; + else { + sprintf(buf, + "SwitchInfo Record dump:\n" + "\t\t\t\tRID\n" + "\t\t\t\tlid.....................%u\n" + "\t\t\t\tSwitchInfo dump:\n" + "\t\t\t\tlin_cap.................0x%X\n" + "\t\t\t\trand_cap................0x%X\n" + "\t\t\t\tmcast_cap...............0x%X\n" + "\t\t\t\tlin_top.................0x%X\n" + "\t\t\t\tdef_port................%u\n" + "\t\t\t\tdef_mcast_pri_port......%u\n" + "\t\t\t\tdef_mcast_not_port......%u\n" + "\t\t\t\tlife_state..............0x%X\n" + "\t\t\t\tlids_per_port...........%u\n" + "\t\t\t\tpartition_enf_cap.......0x%X\n" + "\t\t\t\tflags...................0x%X\n", + cl_ntoh16(p_sir->lid), + cl_ntoh16(p_sir->switch_info.lin_cap), + cl_ntoh16(p_sir->switch_info.rand_cap), + cl_ntoh16(p_sir->switch_info.mcast_cap), + cl_ntoh16(p_sir->switch_info.lin_top), + p_sir->switch_info.def_port, + p_sir->switch_info.def_mcast_pri_port, + p_sir->switch_info.def_mcast_not_port, + p_sir->switch_info.life_state, + cl_ntoh16(p_sir->switch_info.lids_per_port), + cl_ntoh16(p_sir->switch_info.enforce_cap), + p_sir->switch_info.flags); + } +} + +void osm_dump_switch_info_record(IN osm_log_t * p_log, + IN const ib_switch_info_record_t * p_sir, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_switch_info_record_to_buf(p_sir, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_switch_info_record_v2(IN osm_log_t * p_log, + IN const ib_switch_info_record_t * p_sir, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_switch_info_record_to_buf(p_sir, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_pkey_block_to_buf(IN uint64_t port_guid, + IN uint16_t block_num, + IN uint8_t port_num, + IN const ib_pkey_table_t * p_pkey_tbl, + OUT char * buf) +{ + if (!buf || !p_pkey_tbl) + return; + else { + char buf_line[1024]; + int i, n; + + for (i = 0, n = 0; i < 32; i++) + n += sprintf(buf_line + n, " 0x%04x |", + cl_ntoh16(p_pkey_tbl->pkey_entry[i])); + + sprintf(buf, + "P_Key table dump:\n" + "\t\t\tport_guid...........0x%016" PRIx64 "\n" + "\t\t\tblock_num...........0x%X\n" + "\t\t\tport_num............%u\n\tP_Key Table: %s\n", + cl_ntoh64(port_guid), block_num, port_num, buf_line); + } +} + +void osm_dump_pkey_block(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint16_t block_num, IN uint8_t port_num, + IN const ib_pkey_table_t * p_pkey_tbl, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_pkey_block_to_buf(port_guid, block_num, port_num, + p_pkey_tbl, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_pkey_block_v2(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint16_t block_num, IN uint8_t port_num, + IN const ib_pkey_table_t * p_pkey_tbl, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_pkey_block_to_buf(port_guid, block_num, + port_num, p_pkey_tbl, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_slvl_map_table_to_buf(IN uint64_t port_guid, + IN uint8_t in_port_num, + IN uint8_t out_port_num, + IN const ib_slvl_table_t * p_slvl_tbl, + OUT char * buf) +{ + if (!buf || !p_slvl_tbl) + return; + else { + char buf_line1[1024], buf_line2[1024]; + int n; + uint8_t i; + + for (i = 0, n = 0; i < 16; i++) + n += sprintf(buf_line1 + n, " %-2u |", i); + for (i = 0, n = 0; i < 16; i++) + n += sprintf(buf_line2 + n, "0x%01X |", + ib_slvl_table_get(p_slvl_tbl, i)); + sprintf(buf, + "SLtoVL dump:\n" + "\t\t\tport_guid............0x%016" PRIx64 "\n" + "\t\t\tin_port_num..........%u\n" + "\t\t\tout_port_num.........%u\n\tSL: | %s\n\tVL: | %s\n", + cl_ntoh64(port_guid), in_port_num, out_port_num, + buf_line1, buf_line2); + } +} + +void osm_dump_slvl_map_table(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t in_port_num, IN uint8_t out_port_num, + IN const ib_slvl_table_t * p_slvl_tbl, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_slvl_map_table_to_buf(port_guid, in_port_num, + out_port_num, p_slvl_tbl, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_slvl_map_table_v2(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t in_port_num, IN uint8_t out_port_num, + IN const ib_slvl_table_t * p_slvl_tbl, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_slvl_map_table_to_buf(port_guid, in_port_num, + out_port_num, p_slvl_tbl, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_vl_arb_table_to_buf(IN uint64_t port_guid, + IN uint8_t block_num, + IN uint8_t port_num, + IN const ib_vl_arb_table_t * p_vla_tbl, + OUT char * buf) +{ + if (!buf || !p_vla_tbl) + return; + else { + char buf_line1[1024], buf_line2[1024]; + int i, n; + + for (i = 0, n = 0; i < 32; i++) + n += sprintf(buf_line1 + n, " 0x%01X |", + p_vla_tbl->vl_entry[i].vl); + for (i = 0, n = 0; i < 32; i++) + n += sprintf(buf_line2 + n, " 0x%01X |", + p_vla_tbl->vl_entry[i].weight); + sprintf(buf, + "VLArb dump:\n" "\t\t\tport_guid...........0x%016" + PRIx64 "\n" "\t\t\tblock_num...........0x%X\n" + "\t\t\tport_num............%u\n\tVL : | %s\n\tWEIGHT:| %s\n", + cl_ntoh64(port_guid), block_num, port_num, buf_line1, + buf_line2); + } +} + +void osm_dump_vl_arb_table(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t block_num, IN uint8_t port_num, + IN const ib_vl_arb_table_t * p_vla_tbl, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_vl_arb_table_to_buf(port_guid, block_num, + port_num, p_vla_tbl, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_vl_arb_table_v2(IN osm_log_t * p_log, IN uint64_t port_guid, + IN uint8_t block_num, IN uint8_t port_num, + IN const ib_vl_arb_table_t * p_vla_tbl, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_vl_arb_table_to_buf(port_guid, block_num, + port_num, p_vla_tbl, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_sm_info_to_buf(IN const ib_sm_info_t * p_smi, + OUT char * buf) +{ + if (!buf || !p_smi) + return; + else { + sprintf(buf, + "SMInfo dump:\n" + "\t\t\t\tguid....................0x%016" PRIx64 "\n" + "\t\t\t\tsm_key..................0x%016" PRIx64 "\n" + "\t\t\t\tact_count...............%u\n" + "\t\t\t\tpriority................%u\n" + "\t\t\t\tsm_state................%u\n", + cl_ntoh64(p_smi->guid), cl_ntoh64(p_smi->sm_key), + cl_ntoh32(p_smi->act_count), + ib_sminfo_get_priority(p_smi), + ib_sminfo_get_state(p_smi)); + } +} + +void osm_dump_sm_info(IN osm_log_t * p_log, IN const ib_sm_info_t * p_smi, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_sm_info_to_buf(p_smi, buf); + + osm_log(p_log, OSM_LOG_DEBUG, "%s", buf); + } +} + +void osm_dump_sm_info_v2(IN osm_log_t * p_log, IN const ib_sm_info_t * p_smi, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_sm_info_to_buf(p_smi, buf); + + osm_log_v2(p_log, OSM_LOG_DEBUG, file_id, "%s", buf); + } +} + +static void osm_dump_sm_info_record_to_buf(IN const ib_sminfo_record_t * p_smir, + OUT char * buf) +{ + if (!buf || !p_smir) + return; + else { + sprintf(buf, + "SMInfo Record dump:\n" + "\t\t\t\tRID\n" + "\t\t\t\tLid.....................%u\n" + "\t\t\t\tReserved................0x%X\n" + "\t\t\t\tSMInfo dump:\n" + "\t\t\t\tguid....................0x%016" PRIx64 "\n" + "\t\t\t\tsm_key..................0x%016" PRIx64 "\n" + "\t\t\t\tact_count...............%u\n" + "\t\t\t\tpriority................%u\n" + "\t\t\t\tsm_state................%u\n", + cl_ntoh16(p_smir->lid), cl_ntoh16(p_smir->resv0), + cl_ntoh64(p_smir->sm_info.guid), + cl_ntoh64(p_smir->sm_info.sm_key), + cl_ntoh32(p_smir->sm_info.act_count), + ib_sminfo_get_priority(&p_smir->sm_info), + ib_sminfo_get_state(&p_smir->sm_info)); + } +} + +void osm_dump_sm_info_record(IN osm_log_t * p_log, + IN const ib_sminfo_record_t * p_smir, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_sm_info_record_to_buf(p_smir, buf); + + osm_log(p_log, OSM_LOG_DEBUG, "%s", buf); + } +} + +void osm_dump_sm_info_record_v2(IN osm_log_t * p_log, + IN const ib_sminfo_record_t * p_smir, + IN const int file_id, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_sm_info_record_to_buf(p_smir, buf); + + osm_log_v2(p_log, OSM_LOG_DEBUG, file_id, "%s", buf); + } +} + +static void osm_dump_notice_to_buf_generic(IN const ib_mad_notice_attr_t * p_ntci, + OUT char * log_buf) +{ + if (!log_buf || !p_ntci) + return; + else { + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + char buff[1024]; + int n; + + buff[0] = '\0'; + + /* immediate data based on the trap */ + switch (cl_ntoh16(p_ntci->g_or_v.generic.trap_num)) { + case SM_GID_IN_SERVICE_TRAP: /* 64 */ + case SM_GID_OUT_OF_SERVICE_TRAP: /* 65 */ + case SM_MGID_CREATED_TRAP: /* 66 */ + case SM_MGID_DESTROYED_TRAP: /* 67 */ + sprintf(buff, + "\t\t\t\tsrc_gid..................%s\n", + inet_ntop(AF_INET6, p_ntci->data_details. + ntc_64_67.gid.raw, gid_str, + sizeof gid_str)); + break; + case SM_LINK_STATE_CHANGED_TRAP: /* 128 */ + sprintf(buff, + "\t\t\t\tsw_lid...................%u\n", + cl_ntoh16(p_ntci->data_details.ntc_128.sw_lid)); + break; + case SM_LINK_INTEGRITY_THRESHOLD_TRAP: /* 129 */ + case SM_BUFFER_OVERRUN_THRESHOLD_TRAP: /* 130 */ + case SM_WATCHDOG_TIMER_EXPIRED_TRAP: /* 131 */ + sprintf(buff, + "\t\t\t\tlid......................%u\n" + "\t\t\t\tport_num.................%u\n", + cl_ntoh16(p_ntci->data_details. + ntc_129_131.lid), + p_ntci->data_details.ntc_129_131.port_num); + break; + case SM_LOCAL_CHANGES_TRAP: /* 144 */ + sprintf(buff, + "\t\t\t\tlid......................%u\n" + "\t\t\t\tlocal_changes............%u\n" + "\t\t\t\tnew_cap_mask.............0x%08x\n" + "\t\t\t\tchange_flags.............0x%x\n" + "\t\t\t\tcap_mask2................0x%x\n", + cl_ntoh16(p_ntci->data_details.ntc_144.lid), + p_ntci->data_details.ntc_144.local_changes, + cl_ntoh32(p_ntci->data_details.ntc_144. + new_cap_mask), + cl_ntoh16(p_ntci->data_details.ntc_144. + change_flgs), + cl_ntoh16(p_ntci->data_details.ntc_144. + cap_mask2)); + break; + case SM_SYS_IMG_GUID_CHANGED_TRAP: /* 145 */ + sprintf(buff, + "\t\t\t\tlid......................%u\n" + "\t\t\t\tnew_sys_guid.............0x%016" + PRIx64 "\n", + cl_ntoh16(p_ntci->data_details.ntc_145. + lid), + cl_ntoh64(p_ntci->data_details.ntc_145. + new_sys_guid)); + break; + case SM_BAD_MKEY_TRAP: /* 256 */ + n = sprintf(buff, + "\t\t\t\tlid......................%u\n" + "\t\t\t\tdrslid...................%u\n" + "\t\t\t\tmethod...................0x%x\n" + "\t\t\t\tattr_id..................0x%x\n" + "\t\t\t\tattr_mod.................0x%x\n" + "\t\t\t\tm_key....................0x%016" + PRIx64 "\n" + "\t\t\t\tdr_notice................%d\n" + "\t\t\t\tdr_path_truncated........%d\n" + "\t\t\t\tdr_hop_count.............%u\n", + cl_ntoh16(p_ntci->data_details.ntc_256.lid), + cl_ntoh16(p_ntci->data_details.ntc_256. + dr_slid), + p_ntci->data_details.ntc_256.method, + cl_ntoh16(p_ntci->data_details.ntc_256. + attr_id), + cl_ntoh32(p_ntci->data_details.ntc_256. + attr_mod), + cl_ntoh64(p_ntci->data_details.ntc_256. + mkey), + p_ntci->data_details.ntc_256. + dr_trunc_hop >> 7, + p_ntci->data_details.ntc_256. + dr_trunc_hop >> 6, + p_ntci->data_details.ntc_256. + dr_trunc_hop & 0x3f); + n += snprintf(buff + n, sizeof(buff) - n, + "Directed Path Dump of %u hop path:" + "\n\t\t\t\tPath = ", + p_ntci->data_details.ntc_256. + dr_trunc_hop & 0x3f); + n += sprint_uint8_arr(buff + n, sizeof(buff) - n, + p_ntci->data_details.ntc_256. + dr_rtn_path, + (p_ntci->data_details.ntc_256. + dr_trunc_hop & 0x3f) + 1); + if (n >= sizeof(buff)) { + n = sizeof(buff) - 2; + break; + } + snprintf(buff + n, sizeof(buff) - n, "\n"); + break; + case SM_BAD_PKEY_TRAP: /* 257 */ + case SM_BAD_QKEY_TRAP: /* 258 */ + sprintf(buff, + "\t\t\t\tlid1.....................%u\n" + "\t\t\t\tlid2.....................%u\n" + "\t\t\t\tkey......................0x%x\n" + "\t\t\t\tsl.......................%d\n" + "\t\t\t\tqp1......................0x%x\n" + "\t\t\t\tqp2......................0x%x\n" + "\t\t\t\tgid1.....................%s\n" + "\t\t\t\tgid2.....................%s\n", + cl_ntoh16(p_ntci->data_details.ntc_257_258. + lid1), + cl_ntoh16(p_ntci->data_details.ntc_257_258. + lid2), + cl_ntoh32(p_ntci->data_details.ntc_257_258.key), + cl_ntoh32(p_ntci->data_details.ntc_257_258. + qp1) >> 28, + cl_ntoh32(p_ntci->data_details.ntc_257_258. + qp1) & 0xffffff, + cl_ntoh32(p_ntci->data_details.ntc_257_258. + qp2) & 0xffffff, + inet_ntop(AF_INET6, p_ntci->data_details. + ntc_257_258.gid1.raw, gid_str, + sizeof gid_str), + inet_ntop(AF_INET6, p_ntci->data_details. + ntc_257_258.gid2.raw, gid_str2, + sizeof gid_str2)); + break; + case SM_BAD_SWITCH_PKEY_TRAP: /* 259 */ + sprintf(buff, + "\t\t\t\tdata_valid...............0x%x\n" + "\t\t\t\tlid1.....................%u\n" + "\t\t\t\tlid2.....................%u\n" + "\t\t\t\tpkey.....................0x%x\n" + "\t\t\t\tsl.......................%d\n" + "\t\t\t\tqp1......................0x%x\n" + "\t\t\t\tqp2......................0x%x\n" + "\t\t\t\tgid1.....................%s\n" + "\t\t\t\tgid2.....................%s\n" + "\t\t\t\tsw_lid...................%u\n" + "\t\t\t\tport_no..................%u\n", + cl_ntoh16(p_ntci->data_details.ntc_259. + data_valid), + cl_ntoh16(p_ntci->data_details.ntc_259.lid1), + cl_ntoh16(p_ntci->data_details.ntc_259.lid2), + cl_ntoh16(p_ntci->data_details.ntc_259.pkey), + cl_ntoh32(p_ntci->data_details.ntc_259. + sl_qp1) >> 24, + cl_ntoh32(p_ntci->data_details.ntc_259. + sl_qp1) & 0xffffff, + cl_ntoh32(p_ntci->data_details.ntc_259.qp2), + inet_ntop(AF_INET6, p_ntci->data_details. + ntc_259.gid1.raw, gid_str, + sizeof gid_str), + inet_ntop(AF_INET6, p_ntci->data_details. + ntc_259.gid2.raw, gid_str2, + sizeof gid_str2), + cl_ntoh16(p_ntci->data_details.ntc_259.sw_lid), + p_ntci->data_details.ntc_259.port_no); + break; + } + + sprintf(log_buf, + "Generic Notice dump:\n" + "\t\t\t\ttype.....................%u\n" + "\t\t\t\tprod_type................%u (%s)\n" + "\t\t\t\ttrap_num.................%u\n%s", + ib_notice_get_type(p_ntci), + cl_ntoh32(ib_notice_get_prod_type(p_ntci)), + ib_get_producer_type_str(ib_notice_get_prod_type + (p_ntci)), + cl_ntoh16(p_ntci->g_or_v.generic.trap_num), buff); + } +} + +static void osm_dump_notice_to_buf(IN const ib_mad_notice_attr_t * p_ntci, + OUT char * buf) +{ + if (!buf || !p_ntci) + return; + else { + sprintf(buf, + "Vendor Notice dump:\n" + "\t\t\t\ttype.....................%u\n" + "\t\t\t\tvendor...................%u\n" + "\t\t\t\tdevice_id................%u\n", + cl_ntoh16(ib_notice_get_type(p_ntci)), + cl_ntoh32(ib_notice_get_vend_id(p_ntci)), + cl_ntoh16(p_ntci->g_or_v.vend.dev_id)); + } +} + +void osm_dump_notice(IN osm_log_t * p_log, + IN const ib_mad_notice_attr_t * p_ntci, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + if (ib_notice_is_generic(p_ntci)) + osm_dump_notice_to_buf_generic(p_ntci, buf); + else + osm_dump_notice_to_buf(p_ntci, buf); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_notice_v2(IN osm_log_t * p_log, + IN const ib_mad_notice_attr_t * p_ntci, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + if (ib_notice_is_generic(p_ntci)) + osm_dump_notice_to_buf_generic(p_ntci, buf); + else + osm_dump_notice_to_buf(p_ntci, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_dr_smp_to_buf(IN const ib_smp_t * p_smp, OUT char * buf, + IN size_t buf_size) +{ + if (!buf || !p_smp) + return; + else { + unsigned n; + + n = sprintf(buf, + "SMP dump:\n" + "\t\t\t\tbase_ver................0x%X\n" + "\t\t\t\tmgmt_class..............0x%X\n" + "\t\t\t\tclass_ver...............0x%X\n" + "\t\t\t\tmethod..................0x%X (%s)\n", + p_smp->base_ver, p_smp->mgmt_class, + p_smp->class_ver, p_smp->method, + ib_get_sm_method_str(p_smp->method)); + + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) { + n += snprintf(buf + n, buf_size - n, + "\t\t\t\tD bit...................0x%X\n" + "\t\t\t\tstatus..................0x%X\n", + ib_smp_is_d(p_smp), + cl_ntoh16(ib_smp_get_status(p_smp))); + } else { + n += snprintf(buf + n, buf_size - n, + "\t\t\t\tstatus..................0x%X\n", + cl_ntoh16(p_smp->status)); + } + + n += snprintf(buf + n, buf_size - n, + "\t\t\t\thop_ptr.................0x%X\n" + "\t\t\t\thop_count...............0x%X\n" + "\t\t\t\ttrans_id................0x%" PRIx64 "\n" + "\t\t\t\tattr_id.................0x%X (%s)\n" + "\t\t\t\tresv....................0x%X\n" + "\t\t\t\tattr_mod................0x%X\n" + "\t\t\t\tm_key...................0x%016" PRIx64 + "\n", p_smp->hop_ptr, p_smp->hop_count, + cl_ntoh64(p_smp->trans_id), + cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id), + cl_ntoh16(p_smp->resv), + cl_ntoh32(p_smp->attr_mod), + cl_ntoh64(p_smp->m_key)); + + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) { + uint32_t i; + n += snprintf(buf + n, buf_size - n, + "\t\t\t\tdr_slid.................%u\n" + "\t\t\t\tdr_dlid.................%u\n", + cl_ntoh16(p_smp->dr_slid), + cl_ntoh16(p_smp->dr_dlid)); + + n += snprintf(buf + n, buf_size - n, + "\n\t\t\t\tInitial path: "); + n += sprint_uint8_arr(buf + n, buf_size - n, + p_smp->initial_path, + p_smp->hop_count + 1); + + n += snprintf(buf + n, buf_size - n, + "\n\t\t\t\tReturn path: "); + n += sprint_uint8_arr(buf + n, buf_size - n, + p_smp->return_path, + p_smp->hop_count + 1); + + n += snprintf(buf + n, buf_size - n, + "\n\t\t\t\tReserved: "); + for (i = 0; i < 7; i++) { + n += snprintf(buf + n, buf_size - n, + "[%0X]", p_smp->resv1[i]); + } + n += snprintf(buf + n, buf_size - n, "\n"); + + for (i = 0; i < 64; i += 16) { + n += snprintf(buf + n, buf_size - n, + "\n\t\t\t\t%02X %02X %02X %02X " + "%02X %02X %02X %02X" + " %02X %02X %02X %02X %02X %02X %02X %02X\n", + p_smp->data[i], + p_smp->data[i + 1], + p_smp->data[i + 2], + p_smp->data[i + 3], + p_smp->data[i + 4], + p_smp->data[i + 5], + p_smp->data[i + 6], + p_smp->data[i + 7], + p_smp->data[i + 8], + p_smp->data[i + 9], + p_smp->data[i + 10], + p_smp->data[i + 11], + p_smp->data[i + 12], + p_smp->data[i + 13], + p_smp->data[i + 14], + p_smp->data[i + 15]); + } + } else { + /* not a Direct Route so provide source and destination lids */ + n += snprintf(buf + n, buf_size - n, + "\t\t\t\tMAD IS LID ROUTED\n"); + } + } +} + +void osm_dump_dr_smp(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_dr_smp_to_buf(p_smp, buf, BUF_SIZE); + + osm_log(p_log, log_level, "%s", buf); + } +} + +void osm_dump_dr_smp_v2(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_dr_smp_to_buf(p_smp, buf, BUF_SIZE); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_sa_mad_to_buf(IN const ib_sa_mad_t * p_mad, OUT char * buf) +{ + if (!buf || !p_mad) + return; + else { + /* make sure the mad is valid */ + if (p_mad == NULL) { + sprintf(buf, "NULL MAD POINTER\n"); + return; + } + + sprintf(buf, + "SA MAD dump:\n" + "\t\t\t\tbase_ver................0x%X\n" + "\t\t\t\tmgmt_class..............0x%X\n" + "\t\t\t\tclass_ver...............0x%X\n" + "\t\t\t\tmethod..................0x%X (%s)\n" + "\t\t\t\tstatus..................0x%X\n" + "\t\t\t\tresv....................0x%X\n" + "\t\t\t\ttrans_id................0x%" PRIx64 "\n" + "\t\t\t\tattr_id.................0x%X (%s)\n" + "\t\t\t\tresv1...................0x%X\n" + "\t\t\t\tattr_mod................0x%X\n" + "\t\t\t\trmpp_version............0x%X\n" + "\t\t\t\trmpp_type...............0x%X\n" + "\t\t\t\trmpp_flags..............0x%X\n" + "\t\t\t\trmpp_status.............0x%X\n" + "\t\t\t\tseg_num.................0x%X\n" + "\t\t\t\tpayload_len/new_win.....0x%X\n" + "\t\t\t\tsm_key..................0x%016" PRIx64 "\n" + "\t\t\t\tattr_offset.............0x%X\n" + "\t\t\t\tresv2...................0x%X\n" + "\t\t\t\tcomp_mask...............0x%016" PRIx64 "\n", + p_mad->base_ver, p_mad->mgmt_class, p_mad->class_ver, + p_mad->method, ib_get_sa_method_str(p_mad->method), + cl_ntoh16(p_mad->status), cl_ntoh16(p_mad->resv), + cl_ntoh64(p_mad->trans_id), cl_ntoh16(p_mad->attr_id), + ib_get_sa_attr_str(p_mad->attr_id), + cl_ntoh16(p_mad->resv1), cl_ntoh32(p_mad->attr_mod), + p_mad->rmpp_version, p_mad->rmpp_type, + p_mad->rmpp_flags, p_mad->rmpp_status, + cl_ntoh32(p_mad->seg_num), + cl_ntoh32(p_mad->paylen_newwin), + cl_ntoh64(p_mad->sm_key), cl_ntoh16(p_mad->attr_offset), + cl_ntoh16(p_mad->resv3), cl_ntoh64(p_mad->comp_mask)); + + strcat(buf, "\n"); + } +} + +void osm_dump_sa_mad(IN osm_log_t * p_log, IN const ib_sa_mad_t * p_mad, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_sa_mad_to_buf(p_mad, buf); + + osm_log(p_log, log_level, "%s\n", buf); + } +} + +void osm_dump_sa_mad_v2(IN osm_log_t * p_log, IN const ib_sa_mad_t * p_mad, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_sa_mad_to_buf(p_mad, buf); + + osm_log_v2(p_log, log_level, file_id, "%s", buf); + } +} + +static void osm_dump_dr_path_to_buf(IN const osm_dr_path_t * p_path, + OUT char * buf, IN size_t buf_size) +{ + if (!buf || !p_path) + return; + else { + unsigned n = 0; + + n = sprintf(buf, "Directed Path Dump of %u hop path: " + "Path = ", p_path->hop_count); + + sprint_uint8_arr(buf + n, buf_size - n, p_path->path, + p_path->hop_count + 1); + } +} + +void osm_dump_dr_path(IN osm_log_t * p_log, IN const osm_dr_path_t * p_path, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_dr_path_to_buf(p_path, buf, BUF_SIZE); + + osm_log(p_log, log_level, "%s\n", buf); + } +} + +void osm_dump_dr_path_v2(IN osm_log_t * p_log, IN const osm_dr_path_t * p_path, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_dr_path_to_buf(p_path, buf, BUF_SIZE); + + osm_log_v2(p_log, log_level, file_id, "%s\n", buf); + } +} + +static void osm_dump_smp_dr_path_to_buf(IN const ib_smp_t * p_smp, + OUT char * buf, IN size_t buf_size) +{ + if (!buf || !p_smp) + return; + else { + unsigned n; + + n = sprintf(buf, "Received SMP on a %u hop path: " + "Initial path = ", p_smp->hop_count); + n += sprint_uint8_arr(buf + n, buf_size - n, + p_smp->initial_path, + p_smp->hop_count + 1); + + n += snprintf(buf + n, buf_size - n, ", Return path = "); + n += sprint_uint8_arr(buf + n, buf_size - n, + p_smp->return_path, p_smp->hop_count + 1); + } +} + +void osm_dump_smp_dr_path(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN osm_log_level_t log_level) +{ + if (osm_log_is_active(p_log, log_level)) { + char buf[BUF_SIZE]; + + osm_dump_smp_dr_path_to_buf(p_smp, buf, BUF_SIZE); + + osm_log(p_log, log_level, "%s\n", buf); + } +} + +void osm_dump_smp_dr_path_v2(IN osm_log_t * p_log, IN const ib_smp_t * p_smp, + IN const int file_id, IN osm_log_level_t log_level) +{ + if (osm_log_is_active_v2(p_log, log_level, file_id)) { + char buf[BUF_SIZE]; + + osm_dump_smp_dr_path_to_buf(p_smp, buf, BUF_SIZE); + + osm_log_v2(p_log, log_level, file_id, "%s\n", buf); + } +} + +void osm_dump_dr_path_as_buf(IN size_t max_len, + IN const osm_dr_path_t * p_path, + OUT char* buf) +{ + sprint_uint8_arr(buf, max_len, p_path->path, p_path->hop_count + 1); +} + +static const char *sm_signal_str[] = { + "OSM_SIGNAL_NONE", /* 0 */ + "OSM_SIGNAL_SWEEP", /* 1 */ + "OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST", /* 2 */ + "OSM_SIGNAL_PERFMGR_SWEEP", /* 3 */ + "OSM_SIGNAL_GUID_PROCESS_REQUEST", /* 4 */ + "UNKNOWN SIGNAL!!" /* 5 */ +}; + +const char *osm_get_sm_signal_str(IN osm_signal_t signal) +{ + if (signal > OSM_SIGNAL_MAX) + signal = OSM_SIGNAL_MAX; + return sm_signal_str[signal]; +} + +static const char *disp_msg_str[] = { + "OSM_MSG_NONE", + "OSM_MSG_MAD_NODE_INFO", + "OSM_MSG_MAD_PORT_INFO", + "OSM_MSG_MAD_SWITCH_INFO", + "OSM_MSG_MAD_GUID_INFO", + "OSM_MSG_MAD_NODE_DESC", + "OSM_MSG_MAD_NODE_RECORD", + "OSM_MSG_MAD_PORTINFO_RECORD", + "OSM_MSG_MAD_SERVICE_RECORD", + "OSM_MSG_MAD_PATH_RECORD", + "OSM_MSG_MAD_MCMEMBER_RECORD", + "OSM_MSG_MAD_LINK_RECORD", + "OSM_MSG_MAD_SMINFO_RECORD", + "OSM_MSG_MAD_CLASS_PORT_INFO", + "OSM_MSG_MAD_INFORM_INFO", + "OSM_MSG_MAD_LFT_RECORD", + "OSM_MSG_MAD_LFT", + "OSM_MSG_MAD_SM_INFO", + "OSM_MSG_MAD_NOTICE", + "OSM_MSG_LIGHT_SWEEP_FAIL", + "OSM_MSG_MAD_MFT", + "OSM_MSG_MAD_PKEY_TBL_RECORD", + "OSM_MSG_MAD_VL_ARB_RECORD", + "OSM_MSG_MAD_SLVL_TBL_RECORD", + "OSM_MSG_MAD_PKEY", + "OSM_MSG_MAD_VL_ARB", + "OSM_MSG_MAD_SLVL", + "OSM_MSG_MAD_GUIDINFO_RECORD", + "OSM_MSG_MAD_INFORM_INFO_RECORD", + "OSM_MSG_MAD_SWITCH_INFO_RECORD", + "OSM_MSG_MAD_MFT_RECORD", +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + "OSM_MSG_MAD_MULTIPATH_RECORD", +#endif + "OSM_MSG_MAD_PORT_COUNTERS", + "OSM_MSG_MAD_MLNX_EXT_PORT_INFO", + "UNKNOWN!!" +}; + +const char *osm_get_disp_msg_str(IN cl_disp_msgid_t msg) +{ + if (msg >= OSM_MSG_MAX) + msg = OSM_MSG_MAX-1; + return disp_msg_str[msg]; +} + +static const char *port_state_str_fixed_width[] = { + "NOC", + "DWN", + "INI", + "ARM", + "ACT", + "???" +}; + +const char *osm_get_port_state_str_fixed_width(IN uint8_t port_state) +{ + if (port_state > IB_LINK_ACTIVE) + port_state = IB_LINK_ACTIVE + 1; + return port_state_str_fixed_width[port_state]; +} + +static const char *node_type_str_fixed_width[] = { + "??", + "CA", + "SW", + "RT", +}; + +const char *osm_get_node_type_str_fixed_width(IN uint8_t node_type) +{ + if (node_type > IB_NODE_TYPE_ROUTER) + node_type = 0; + return node_type_str_fixed_width[node_type]; +} + +const char *osm_get_manufacturer_str(IN uint64_t guid_ho) +{ + /* note that the max vendor string length is 11 */ + static const char *intel_str = "Intel"; + static const char *mellanox_str = "Mellanox"; + static const char *redswitch_str = "Redswitch"; + static const char *silverstorm_str = "SilverStorm"; + static const char *topspin_str = "Topspin"; + static const char *fujitsu_str = "Fujitsu"; + static const char *voltaire_str = "Voltaire"; + static const char *yotta_str = "YottaYotta"; + static const char *pathscale_str = "PathScale"; + static const char *ibm_str = "IBM"; + static const char *divergenet_str = "DivergeNet"; + static const char *flextronics_str = "Flextronics"; + static const char *agilent_str = "Agilent"; + static const char *obsidian_str = "Obsidian"; + static const char *baymicro_str = "BayMicro"; + static const char *lsilogic_str = "LSILogic"; + static const char *ddn_str = "DataDirect"; + static const char *panta_str = "Panta"; + static const char *hp_str = "HP"; + static const char *rioworks_str = "Rioworks"; + static const char *sun_str = "Sun"; + static const char *leafntwks_str = "3LeafNtwks"; + static const char *xsigo_str = "Xsigo"; + static const char *dell_str = "Dell"; + static const char *supermicro_str = "SuperMicro"; + static const char *openib_str = "OpenIB"; + static const char *unknown_str = "Unknown"; + static const char *bull_str = "Bull"; + + switch ((uint32_t) (guid_ho >> (5 * 8))) { + case OSM_VENDOR_ID_INTEL: + return intel_str; + case OSM_VENDOR_ID_MELLANOX: + case OSM_VENDOR_ID_MELLANOX2: + case OSM_VENDOR_ID_MELLANOX3: + case OSM_VENDOR_ID_MELLANOX4: + case OSM_VENDOR_ID_MELLANOX5: + case OSM_VENDOR_ID_MELLANOX6: + case OSM_VENDOR_ID_MELLANOX7: + case OSM_VENDOR_ID_MELLANOX8: + case OSM_VENDOR_ID_MELLANOX9: + return mellanox_str; + case OSM_VENDOR_ID_REDSWITCH: + return redswitch_str; + case OSM_VENDOR_ID_SILVERSTORM: + return silverstorm_str; + case OSM_VENDOR_ID_TOPSPIN: + return topspin_str; + case OSM_VENDOR_ID_FUJITSU: + case OSM_VENDOR_ID_FUJITSU2: + return fujitsu_str; + case OSM_VENDOR_ID_VOLTAIRE: + return voltaire_str; + case OSM_VENDOR_ID_YOTTAYOTTA: + return yotta_str; + case OSM_VENDOR_ID_PATHSCALE: + return pathscale_str; + case OSM_VENDOR_ID_IBM: + case OSM_VENDOR_ID_IBM2: + return ibm_str; + case OSM_VENDOR_ID_DIVERGENET: + return divergenet_str; + case OSM_VENDOR_ID_FLEXTRONICS: + return flextronics_str; + case OSM_VENDOR_ID_AGILENT: + return agilent_str; + case OSM_VENDOR_ID_OBSIDIAN: + return obsidian_str; + case OSM_VENDOR_ID_BAYMICRO: + return baymicro_str; + case OSM_VENDOR_ID_LSILOGIC: + return lsilogic_str; + case OSM_VENDOR_ID_DDN: + return ddn_str; + case OSM_VENDOR_ID_PANTA: + return panta_str; + case OSM_VENDOR_ID_HP: + case OSM_VENDOR_ID_HP2: + case OSM_VENDOR_ID_HP3: + case OSM_VENDOR_ID_HP4: + return hp_str; + case OSM_VENDOR_ID_RIOWORKS: + return rioworks_str; + case OSM_VENDOR_ID_SUN: + case OSM_VENDOR_ID_SUN2: + return sun_str; + case OSM_VENDOR_ID_3LEAFNTWKS: + return leafntwks_str; + case OSM_VENDOR_ID_XSIGO: + return xsigo_str; + case OSM_VENDOR_ID_DELL: + return dell_str; + case OSM_VENDOR_ID_SUPERMICRO: + return supermicro_str; + case OSM_VENDOR_ID_OPENIB: + return openib_str; + case OSM_VENDOR_ID_BULL: + return bull_str; + default: + return unknown_str; + } +} + +static const char *mtu_str_fixed_width[] = { + "??? ", + "256 ", + "512 ", + "1024", + "2048", + "4096" +}; + +const char *osm_get_mtu_str(IN uint8_t mtu) +{ + if (mtu > IB_MTU_LEN_4096) + return mtu_str_fixed_width[0]; + else + return mtu_str_fixed_width[mtu]; +} + +static const char *lwa_str_fixed_width[] = { + "???", + "1x ", + "4x ", + "???", + "8x ", + "???", + "???", + "???", + "12x", + "???", + "???", + "???", + "???", + "???", + "???", + "???", + "2x " +}; + +const char *osm_get_lwa_str(IN uint8_t lwa) +{ + if (lwa > 16) + return lwa_str_fixed_width[0]; + else + return lwa_str_fixed_width[lwa]; +} + +static const char *lsa_str_fixed_width[] = { + "Ext ", + "2.5 ", + "5 ", + "????", + "10 " +}; + +static const char *lsea_str_fixed_width[] = { + "Std ", + "14 ", + "25 ", + "????", + "50 " +}; + +const char *osm_get_lsa_str(IN uint8_t lsa, IN uint8_t lsea, IN uint8_t state, + IN uint8_t fdr10) +{ + if (lsa > IB_LINK_SPEED_ACTIVE_10 || state == IB_LINK_DOWN) + return lsa_str_fixed_width[3]; + if (lsea == IB_LINK_SPEED_EXT_ACTIVE_NONE) { + if (fdr10) + return "FDR10"; + else + return lsa_str_fixed_width[lsa]; + } + if (lsea > IB_LINK_SPEED_EXT_ACTIVE_50) + return lsa_str_fixed_width[3]; + return lsea_str_fixed_width[lsea]; +} + +static const char *sm_mgr_signal_str[] = { + "OSM_SM_SIGNAL_NONE", /* 0 */ + "OSM_SM_SIGNAL_DISCOVERY_COMPLETED", /* 1 */ + "OSM_SM_SIGNAL_POLLING_TIMEOUT", /* 2 */ + "OSM_SM_SIGNAL_DISCOVER", /* 3 */ + "OSM_SM_SIGNAL_DISABLE", /* 4 */ + "OSM_SM_SIGNAL_HANDOVER", /* 5 */ + "OSM_SM_SIGNAL_HANDOVER_SENT", /* 6 */ + "OSM_SM_SIGNAL_ACKNOWLEDGE", /* 7 */ + "OSM_SM_SIGNAL_STANDBY", /* 8 */ + "OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED", /* 9 */ + "OSM_SM_SIGNAL_WAIT_FOR_HANDOVER", /* 10 */ + "UNKNOWN STATE!!" /* 11 */ +}; + +const char *osm_get_sm_mgr_signal_str(IN osm_sm_signal_t signal) +{ + if (signal > OSM_SM_SIGNAL_MAX) + signal = OSM_SM_SIGNAL_MAX; + return sm_mgr_signal_str[signal]; +} + +static const char *sm_mgr_state_str[] = { + "NOTACTIVE", /* 0 */ + "DISCOVERING", /* 1 */ + "STANDBY", /* 2 */ + "MASTER", /* 3 */ + "UNKNOWN STATE!!" /* 4 */ +}; + +const char *osm_get_sm_mgr_state_str(IN uint16_t state) +{ + return state < ARR_SIZE(sm_mgr_state_str) ? + sm_mgr_state_str[state] : + sm_mgr_state_str[ARR_SIZE(sm_mgr_state_str) - 1]; +} + +int ib_mtu_is_valid(IN const int mtu) +{ + if (mtu < IB_MIN_MTU || mtu > IB_MAX_MTU) + return 0; + return 1; +} + +int ib_rate_is_valid(IN const int rate) +{ + if (rate < IB_MIN_RATE || rate > IB_MAX_RATE) + return 0; + return 1; +} + +int ib_path_compare_rates(IN const int rate1, IN const int rate2) +{ + int orate1 = 0, orate2 = 0; + + CL_ASSERT(rate1 >= IB_MIN_RATE && rate1 <= IB_MAX_RATE); + CL_ASSERT(rate2 >= IB_MIN_RATE && rate2 <= IB_MAX_RATE); + + if (rate1 <= IB_MAX_RATE) + orate1 = ordered_rates[rate1]; + if (rate2 <= IB_MAX_RATE) + orate2 = ordered_rates[rate2]; + if (orate1 < orate2) + return -1; + if (orate1 == orate2) + return 0; + return 1; +} + +static int find_ordered_rate(IN const int rate) +{ + int i; + + for (i = IB_MIN_RATE; i <= IB_MAX_RATE; i++) { + if (ordered_rates[i] == rate) + return i; + } + return 0; +} + +int ib_path_rate_get_prev(IN const int rate) +{ + int orate; + + CL_ASSERT(rate >= IB_MIN_RATE && rate <= IB_MAX_RATE); + + if (rate <= IB_MIN_RATE) + return 0; + if (rate > IB_MAX_RATE) + return 0; + orate = ordered_rates[rate]; + orate--; + return find_ordered_rate(orate); +} + +int ib_path_rate_get_next(IN const int rate) +{ + int orate; + + CL_ASSERT(rate >= IB_MIN_RATE && rate <= IB_MAX_RATE); + + if (rate < IB_MIN_RATE) + return 0; + if (rate >= IB_MAX_RATE) + return 0; + orate = ordered_rates[rate]; + orate++; + return find_ordered_rate(orate); +} + +int ib_path_rate_max_12xedr(IN const int rate) +{ + CL_ASSERT(rate >= IB_MIN_RATE && rate <= IB_MAX_RATE); + + if (rate <= IB_PATH_RECORD_RATE_300_GBS) + return rate; + + switch (rate) { + case IB_PATH_RECORD_RATE_28_GBS: + return IB_PATH_RECORD_RATE_25_GBS; + case IB_PATH_RECORD_RATE_50_GBS: + return IB_PATH_RECORD_RATE_40_GBS; + case IB_PATH_RECORD_RATE_400_GBS: + case IB_PATH_RECORD_RATE_600_GBS: + return IB_PATH_RECORD_RATE_300_GBS; + default: + break; + } + + return 0; +} + +int ib_path_rate_2x_hdr_fixups(IN const ib_port_info_t * p_pi, + IN const int rate) +{ + int new_rate = rate; + + CL_ASSERT(rate >= IB_MIN_RATE && rate <= IB_MAX_RATE); + + switch (rate) { + case IB_PATH_RECORD_RATE_28_GBS: + /* 2x not supported but 2x only rate */ + if (!(p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) || + (p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2 && + !(p_pi->capability_mask2 & IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED))) { + if (p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) + new_rate = IB_PATH_RECORD_RATE_25_GBS; + else + new_rate = IB_PATH_RECORD_RATE_20_GBS; + } + break; + case IB_PATH_RECORD_RATE_50_GBS: + /* neither 2x or HDR supported */ + if (!(p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) || + (p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2 && + !(p_pi->capability_mask2 & IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED) && + !(p_pi->capability_mask2 & IB_PORT_CAP2_IS_LINK_SPEED_HDR_SUPPORTED))) + new_rate = IB_PATH_RECORD_RATE_40_GBS; + break; + case IB_PATH_RECORD_RATE_400_GBS: + case IB_PATH_RECORD_RATE_600_GBS: + /* HDR not supported but HDR only rate */ + if (!(p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2) || + (p_pi->capability_mask & IB_PORT_CAP_HAS_CAP_MASK2 && + !(p_pi->capability_mask2 & IB_PORT_CAP2_IS_LINK_SPEED_HDR_SUPPORTED))) + new_rate = IB_PATH_RECORD_RATE_300_GBS; + break; + default: + break; + } + + return new_rate; +} diff --git a/libopensm/osm_log.c b/libopensm/osm_log.c new file mode 100644 index 0000000..29e80e4 --- /dev/null +++ b/libopensm/osm_log.c @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_log_t. + * This object represents the log file. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#define FILE_ID OSM_FILE_LOG_C +#include +#include +#include +#include +#include +#include +#include +#include + +static int log_exit_count = 0; + +#ifndef __WIN__ +#include +#include +#include + +static const char *month_str[] = { + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec" +}; +#else +void OsmReportState(IN const char *p_str); +#endif /* ndef __WIN__ */ + +#ifndef __WIN__ + +static void truncate_log_file(osm_log_t * p_log) +{ + int fd = fileno(p_log->out_port); + if (ftruncate(fd, 0) < 0) + fprintf(stderr, "truncate_log_file: cannot truncate: %s\n", + strerror(errno)); + if (lseek(fd, 0, SEEK_SET) < 0) + fprintf(stderr, "truncate_log_file: cannot rewind: %s\n", + strerror(errno)); + p_log->count = 0; +} + +#else /* Windows */ + +static void truncate_log_file(osm_log_t * p_log) +{ + int fd = _fileno(p_log->out_port); + HANDLE hFile = (HANDLE) _get_osfhandle(fd); + + if (_lseek(fd, 0, SEEK_SET) < 0) + fprintf(stderr, "truncate_log_file: cannot rewind: %s\n", + strerror(errno)); + SetEndOfFile(hFile); + p_log->count = 0; +} +#endif /* ndef __WIN__ */ + +void osm_log(IN osm_log_t * p_log, IN osm_log_level_t verbosity, + IN const char *p_str, ...) +{ + char buffer[LOG_ENTRY_SIZE_MAX]; + va_list args; + int ret; +#ifdef __WIN__ + SYSTEMTIME st; + uint32_t pid = GetCurrentThreadId(); +#else + pid_t pid; + time_t tim; + struct tm result; + uint64_t time_usecs; + uint32_t usecs; +#endif /* __WIN__ */ + + /* If this is a call to syslog - always print it */ + if (!(verbosity & p_log->level)) + return; + + va_start(args, p_str); +#ifndef __WIN__ + if (p_log->log_prefix == NULL) + vsprintf(buffer, p_str, args); + else { + int n = snprintf(buffer, sizeof(buffer), "%s: ", p_log->log_prefix); + vsprintf(buffer + n, p_str, args); + } +#else + if (p_log->log_prefix == NULL) + _vsnprintf(buffer, 1024, (LPSTR)p_str, args); + else { + int n = snprintf(buffer, sizeof(buffer), "%s: ", p_log->log_prefix); + _vsnprintf(buffer + n, (1024 - n), (LPSTR)p_str, args); + } +#endif + va_end(args); + + /* this is a call to the syslog */ + if (verbosity & OSM_LOG_SYS) { + syslog(LOG_INFO, "%s\n", buffer); + + /* SYSLOG should go to stdout too */ + if (p_log->out_port != stdout) { + printf("%s\n", buffer); + fflush(stdout); + } +#ifdef __WIN__ + OsmReportState(buffer); +#endif /* __WIN__ */ + } + + /* regular log to default out_port */ + cl_spinlock_acquire(&p_log->lock); + + if (p_log->max_size && p_log->count > p_log->max_size) { + /* truncate here */ + fprintf(stderr, + "osm_log: log file exceeds the limit %lu. Truncating.\n", + p_log->max_size); + truncate_log_file(p_log); + } +#ifdef __WIN__ + GetLocalTime(&st); +_retry: + ret = + fprintf(p_log->out_port, + "[%02d:%02d:%02d:%03d][%04X] 0x%02x -> %s", + st.wHour, st.wMinute, st.wSecond, st.wMilliseconds, + pid, verbosity, buffer); +#else + time_usecs = cl_get_time_stamp(); + tim = time_usecs / 1000000; + usecs = time_usecs % 1000000; + localtime_r(&tim, &result); + pid = pthread_self(); +_retry: + ret = + fprintf(p_log->out_port, + "%s %02d %02d:%02d:%02d %06d [%04X] 0x%02x -> %s", + (result.tm_mon < + 12 ? month_str[result.tm_mon] : "???"), + result.tm_mday, result.tm_hour, result.tm_min, + result.tm_sec, usecs, pid, verbosity, buffer); +#endif + + /* flush log */ + if (ret > 0 && + (p_log->flush || (verbosity & (OSM_LOG_ERROR | OSM_LOG_SYS))) + && fflush(p_log->out_port) < 0) + ret = -1; + + if (ret >= 0) { + log_exit_count = 0; + p_log->count += ret; + } else if (log_exit_count < 3) { + log_exit_count++; + if (errno == ENOSPC && p_log->max_size) { + fprintf(stderr, + "osm_log: write failed: %s. Truncating log file.\n", + strerror(errno)); + truncate_log_file(p_log); + goto _retry; + } + fprintf(stderr, "osm_log: write failed: %s\n", strerror(errno)); + } + + cl_spinlock_release(&p_log->lock); +} + +void osm_log_v2(IN osm_log_t * p_log, IN osm_log_level_t verbosity, + IN const int file_id, IN const char *p_str, ...) +{ + char buffer[LOG_ENTRY_SIZE_MAX]; + va_list args; + int ret; +#ifdef __WIN__ + SYSTEMTIME st; + uint32_t pid = GetCurrentThreadId(); +#else + struct timeval tv; + pid_t pid = 0; + time_t tim; + struct tm result; + uint64_t time_usecs; + uint32_t usecs; +#endif /* __WIN__ */ + + /* If this is a call to syslog - always print it */ + if (!(verbosity & p_log->level)) { + if (!(verbosity & p_log->per_mod_log_tbl[file_id])) + return; + } + + va_start(args, p_str); +#ifndef __WIN__ + if (p_log->log_prefix == NULL) + vsprintf(buffer, p_str, args); + else { + int n = snprintf(buffer, sizeof(buffer), "%s: ", p_log->log_prefix); + vsprintf(buffer + n, p_str, args); + } +#else + if (p_log->log_prefix == NULL) + _vsnprintf(buffer, 1024, (LPSTR)p_str, args); + else { + int n = snprintf(buffer, sizeof(buffer), "%s: ", p_log->log_prefix); + _vsnprintf(buffer + n, (1024 - n), (LPSTR)p_str, args); + } +#endif + va_end(args); + + /* this is a call to the syslog */ + if (verbosity & OSM_LOG_SYS) { + syslog(LOG_INFO, "%s\n", buffer); + + /* SYSLOG should go to stdout too */ + if (p_log->out_port != stdout) { + printf("%s\n", buffer); + fflush(stdout); + } +#ifdef __WIN__ + OsmReportState(buffer); +#endif /* __WIN__ */ + } + + /* regular log to default out_port */ + cl_spinlock_acquire(&p_log->lock); + + if (p_log->max_size && p_log->count > p_log->max_size) { + /* truncate here */ + fprintf(stderr, + "osm_log: log file exceeds the limit %lu. Truncating.\n", + p_log->max_size); + truncate_log_file(p_log); + } +#ifdef __WIN__ + GetLocalTime(&st); +_retry: + ret = + fprintf(p_log->out_port, + "[%02d:%02d:%02d:%03d][%04X] 0x%02x -> %s", + st.wHour, st.wMinute, st.wSecond, st.wMilliseconds, + pid, verbosity, buffer); +#else + gettimeofday(&tv, NULL); + /* Convert the time of day into a microsecond timestamp */ + time_usecs = ((uint64_t) tv.tv_sec * 1000000) + (uint64_t) tv.tv_usec; + tim = time_usecs / 1000000; + usecs = time_usecs % 1000000; + localtime_r(&tim, &result); + pid = pthread_self(); +_retry: + ret = + fprintf(p_log->out_port, + "%s %02d %02d:%02d:%02d %06d [%04X] 0x%02x -> %s", + (result.tm_mon < + 12 ? month_str[result.tm_mon] : "???"), + result.tm_mday, result.tm_hour, result.tm_min, + result.tm_sec, usecs, pid, verbosity, buffer); +#endif + + /* flush log */ + if (ret > 0 && + (p_log->flush || (verbosity & (OSM_LOG_ERROR | OSM_LOG_SYS))) + && fflush(p_log->out_port) < 0) + ret = -1; + + if (ret >= 0) { + log_exit_count = 0; + p_log->count += ret; + } else if (log_exit_count < 3) { + log_exit_count++; + if (errno == ENOSPC && p_log->max_size) { + fprintf(stderr, + "osm_log: write failed: %s. Truncating log file.\n", + strerror(errno)); + truncate_log_file(p_log); + goto _retry; + } + fprintf(stderr, "osm_log: write failed: %s\n", strerror(errno)); + } + + cl_spinlock_release(&p_log->lock); +} + +void osm_log_raw(IN osm_log_t * p_log, IN osm_log_level_t verbosity, + IN const char *p_buf) +{ + if (p_log->level & verbosity) { + cl_spinlock_acquire(&p_log->lock); + printf("%s", p_buf); + cl_spinlock_release(&p_log->lock); + + /* + Flush log on errors too. + */ + if (p_log->flush || (verbosity & OSM_LOG_ERROR)) + fflush(stdout); + } +} + +void osm_log_msg_box(IN osm_log_t * log, osm_log_level_t level, + const char *func_name, const char *msg) +{ +#define MSG_BOX_LENGTH 66 + char buf[MSG_BOX_LENGTH + 1]; + int i, n; + + if (!osm_log_is_active(log, level)) + return; + + n = (MSG_BOX_LENGTH - strlen(msg)) / 2 - 1; + if (n < 0) + n = 0; + for (i = 0; i < n; i++) + sprintf(buf + i, "*"); + n += snprintf(buf + n, sizeof(buf) - n, " %s ", msg); + for (i = n; i < MSG_BOX_LENGTH; i++) + buf[i] = '*'; + buf[i] = '\0'; + + osm_log(log, level, "%s:\n\n\n" + "*********************************************" + "*********************\n%s\n" + "*********************************************" + "*********************\n\n\n", func_name, buf); +} + +void osm_log_msg_box_v2(IN osm_log_t * log, osm_log_level_t level, + const int file_id, const char *func_name, + const char *msg) +{ +#define MSG_BOX_LENGTH 66 + char buf[MSG_BOX_LENGTH + 1]; + int i, n; + + if (!osm_log_is_active_v2(log, level, file_id)) + return; + + n = (MSG_BOX_LENGTH - strlen(msg)) / 2 - 1; + if (n < 0) + n = 0; + for (i = 0; i < n; i++) + sprintf(buf + i, "*"); + n += snprintf(buf + n, sizeof(buf) - n, " %s ", msg); + for (i = n; i < MSG_BOX_LENGTH; i++) + buf[i] = '*'; + buf[i] = '\0'; + + osm_log_v2(log, level, file_id, "%s:\n\n\n" + "*********************************************" + "*********************\n%s\n" + "*********************************************" + "*********************\n\n\n", func_name, buf); +} + +boolean_t osm_is_debug(void) +{ +#if defined( _DEBUG_ ) + return TRUE; +#else + return FALSE; +#endif /* defined( _DEBUG_ ) */ +} + +static int open_out_port(IN osm_log_t * p_log) +{ + struct stat st; + + if (p_log->accum_log_file) + p_log->out_port = fopen(p_log->log_file_name, "a+"); + else + p_log->out_port = fopen(p_log->log_file_name, "w+"); + + if (!p_log->out_port) { + syslog(LOG_CRIT, "Cannot open file \'%s\' for %s: %s\n", + p_log->log_file_name, + p_log->accum_log_file ? "appending" : "writing", + strerror(errno)); + fprintf(stderr, "Cannot open file \'%s\': %s\n", + p_log->log_file_name, strerror(errno)); + return -1; + } + + if (fstat(fileno(p_log->out_port), &st) == 0) + p_log->count = st.st_size; + + syslog(LOG_NOTICE, "%s log file opened\n", p_log->log_file_name); + + if (p_log->daemon) { + dup2(fileno(p_log->out_port), 0); + dup2(fileno(p_log->out_port), 1); + dup2(fileno(p_log->out_port), 2); + } + + return 0; +} + +int osm_log_reopen_file(osm_log_t * p_log) +{ + int ret; + + if (p_log->out_port == stdout || p_log->out_port == stderr) + return 0; + cl_spinlock_acquire(&p_log->lock); + fclose(p_log->out_port); + ret = open_out_port(p_log); + cl_spinlock_release(&p_log->lock); + return ret; +} + +ib_api_status_t osm_log_init_v2(IN osm_log_t * p_log, IN boolean_t flush, + IN uint8_t log_flags, IN const char *log_file, + IN unsigned long max_size, + IN boolean_t accum_log_file) +{ + p_log->level = log_flags | OSM_LOG_SYS; + p_log->flush = flush; + p_log->count = 0; + p_log->max_size = max_size << 20; /* convert size in MB to bytes */ + p_log->accum_log_file = accum_log_file; + p_log->log_file_name = (char *)log_file; + memset(p_log->per_mod_log_tbl, 0, sizeof(p_log->per_mod_log_tbl)); + + openlog("OpenSM", LOG_CONS | LOG_PID, LOG_USER); + + if (log_file == NULL || !strcmp(log_file, "-") || + !strcmp(log_file, "stdout")) + p_log->out_port = stdout; + else if (!strcmp(log_file, "stderr")) + p_log->out_port = stderr; + else if (open_out_port(p_log)) + return IB_ERROR; + + if (cl_spinlock_init(&p_log->lock) == CL_SUCCESS) + return IB_SUCCESS; + else + return IB_ERROR; +} + +ib_api_status_t osm_log_init(IN osm_log_t * p_log, IN boolean_t flush, + IN uint8_t log_flags, IN const char *log_file, + IN boolean_t accum_log_file) +{ + return osm_log_init_v2(p_log, flush, log_flags, log_file, 0, + accum_log_file); +} + +osm_log_level_t osm_get_log_per_module(IN osm_log_t * p_log, + IN const int file_id) +{ + return p_log->per_mod_log_tbl[file_id]; +} + +void osm_set_log_per_module(IN osm_log_t * p_log, IN const int file_id, + IN osm_log_level_t level) +{ + p_log->per_mod_log_tbl[file_id] = level; +} + +void osm_reset_log_per_module(IN osm_log_t * p_log) +{ + memset(p_log->per_mod_log_tbl, 0, sizeof(p_log->per_mod_log_tbl)); +} diff --git a/libvendor/ChangeLog b/libvendor/ChangeLog new file mode 100644 index 0000000..c9648ec --- /dev/null +++ b/libvendor/ChangeLog @@ -0,0 +1,64 @@ +2007-07-11 Hal Rosenstock + + * configure.in: Bump version to 2.2.1 + +2007-07-10 Sean Hefty + + * osm_vendor_ibumad.c: Use pkey index, rather than pkey + on umad_set_pkey call. Using index 0 for now. + +2007-05-07 Hal Rosenstock + + * osm_vendor_ibumad.(h c): Remove support for issmdisabled + +2007-03-29 Hal Rosenstock + + * configure.in: Bump version to 2.2.0 + +2007-03-27 Hal Rosenstock + + * osm_vendor_ibumad.(h c): Add support for issmdisabled + +2007-03-13 Hal Rosenstock + + * osm_vendor_ibumad.c: In osm_vendor_set_sm, set issmfd to + -1 on open error + +2007-03-12 Hal Rosenstock + + * osm_vendor_ibumad.c: In umad_receiver, display DR path of + sent MAD when it times out. In osm_vendor_send, simplify redundant + code. Cosmetic change to osm_log message in osm_vendor_bind. + +2007-02-20 Hal Rosenstock + + * configure.in: Bump version to 2.1.1 + +2007-02-20 Sasha Khapyorsky + + * osm_vendor_ibumad.(h c): Fix termination crash associated + with umad_receiver thread termination. + + * osm_vendor_mlx_sa.c, osm_vendor_mlx_sim.c: Changes for + compilation failures detected during ibutils/ibmgtsim building + +2007=01-10 Sasha Khapyorsky + + * osm_vendor_ibumad.c: Close umad port in + osm_vendor_delete so same process can reinitialize + and resuse vendor layer. + +2006-10-12 Hal Rosenstock + + * osm_vendor_ibumad.c (umad_receiver): Fix endian of LID + displayed in send timeout error message. + +2006-10-10 Hal Rosenstock + + * osm_vendor_ibumad.c: Print errors to stderr rather than + stdout. + +2006-09-28 Eitan Zahavi + + * osm_vendor_mlx_sa.c: Missing status on timeout SA query. + diff --git a/libvendor/Makefile.am b/libvendor/Makefile.am new file mode 100644 index 0000000..f39545e --- /dev/null +++ b/libvendor/Makefile.am @@ -0,0 +1,90 @@ + +SUBDIRS = . + +if DEBUG +DBGFLAGS = -ggdb -D_DEBUG_ +else +DBGFLAGS = -g +endif + +AM_CPPFLAGS = $(OSMV_INCLUDES) + +lib_LTLIBRARIES = libosmvendor.la + +libosmvendor_la_CFLAGS = -Wall -Wwrite-strings $(DBGFLAGS) + +if HAVE_LD_VERSION_SCRIPT + libosmvendor_version_script = -Wl,--version-script=$(srcdir)/libosmvendor.map +else + libosmvendor_version_script = +endif + +osmvendor_api_version=$(shell grep LIBVERSION= $(srcdir)/libosmvendor.ver | sed 's/LIBVERSION=//') + +COMM_HDRS= $(srcdir)/../include/vendor/osm_vendor_api.h \ + $(srcdir)/../include/vendor/osm_vendor.h \ + $(srcdir)/../include/vendor/osm_vendor_sa_api.h + +if OSMV_OPENIB +libosmvendor_la_SOURCES = osm_vendor_ibumad.c \ + osm_vendor_ibumad_sa.c \ + osm_mad_pool.c +HDRS =$(COMM_HDRS) $(srcdir)/../include/vendor/osm_vendor_ibumad.h +endif +if OSMV_SIM +libosmvendor_la_SOURCES = osm_vendor_mlx.c \ + osm_vendor_mlx_sim.c \ + osm_vendor_mlx_hca_sim.c \ + osm_vendor_mlx_dispatcher.c \ + osm_vendor_mlx_rmpp_ctx.c \ + osm_vendor_mlx_sar.c \ + osm_vendor_mlx_sender.c \ + osm_vendor_mlx_txn.c \ + osm_vendor_mlx_sa.c \ + osm_pkt_randomizer.c \ + osm_mad_pool.c +HDRS =$(COMM_HDRS) $(srcdir)/../include/vendor/osm_vendor_mlx.h \ + $(srcdir)/../include/vendor/osm_pkt_randomizer.h +endif +if OSMV_GEN1 +libosmvendor_la_SOURCES = osm_vendor_mlx.c \ + osm_pkt_randomizer.c \ + osm_vendor_mlx_hca.c \ + osm_vendor_mlx_dispatcher.c \ + osm_vendor_mlx_rmpp_ctx.c \ + osm_vendor_mlx_sar.c \ + osm_vendor_mlx_sender.c \ + osm_vendor_mlx_ts.c \ + osm_vendor_mlx_txn.c \ + osm_vendor_mlx_sa.c \ + osm_mad_pool.c +HDRS =$(COMM_HDRS) $(srcdir)/../include/vendor/osm_vendor_mlx.h \ + $(srcdir)/../include/vendor/osm_pkt_randomizer.h +endif +if OSMV_VAPI +libosmvendor_la_SOURCES = osm_vendor_mlx.c \ + osm_pkt_randomizer.c \ + osm_vendor_mlx_hca.c \ + osm_vendor_mlx_dispatcher.c \ + osm_vendor_mlx_rmpp_ctx.c \ + osm_vendor_mlx_sar.c \ + osm_vendor_mlx_sender.c \ + osm_vendor_mlx_ibmgt.c \ + osm_vendor_mlx_txn.c \ + osm_vendor_mlx_sa.c \ + osm_mad_pool.c +HDRS =$(COMM_HDRS) $(srcdir)/../include/vendor/osm_vendor_mlx.h \ + $(srcdir)/../include/vendor/osm_pkt_randomizer.h +endif + +libosmvendor_la_LIBADD = -L../complib -losmcomp -L../libopensm -lopensm +libosmvendor_la_LDFLAGS = -version-info $(osmvendor_api_version) \ + -export-dynamic $(libosmvendor_version_script) +libosmvendor_la_DEPENDENCIES = $(srcdir)/libosmvendor.map + +libosmvendorincludedir = $(includedir)/infiniband/vendor + +libosmvendorinclude_HEADERS = $(HDRS) + +# headers are distributed as part of the include dir +EXTRA_DIST = $(srcdir)/libosmvendor.map $(srcdir)/libosmvendor.ver diff --git a/libvendor/libosmvendor.map b/libvendor/libosmvendor.map new file mode 100644 index 0000000..f8b70af --- /dev/null +++ b/libvendor/libosmvendor.map @@ -0,0 +1,27 @@ +OSMVENDOR_2.0 { + global: + umad_receiver; + osm_vendor_init; + osm_vendor_new; + osm_vendor_delete; + osm_vendor_get_all_port_attr; + osm_vendor_bind; + osm_vendor_unbind; + osm_vendor_get; + osm_vendor_put; + osm_vendor_send; + osm_vendor_local_lid_change; + osm_vendor_set_sm; + osm_vendor_set_debug; + osmv_bind_sa; + osmv_query_sa; + osm_vendor_get_guid_ca_and_port; + osm_mad_pool_construct; + osm_mad_pool_destroy; + osm_mad_pool_init; + osm_mad_pool_get; + osm_mad_pool_put; + osm_mad_pool_get_wrapper; + osm_mad_pool_get_wrapper_raw; + local: *; +}; diff --git a/libvendor/libosmvendor.ver b/libvendor/libosmvendor.ver new file mode 100644 index 0000000..0de127b --- /dev/null +++ b/libvendor/libosmvendor.ver @@ -0,0 +1,9 @@ +# In this file we track the current API version +# of the vendor interface (and libraries) +# The version is built of the following +# tree numbers: +# API_REV:RUNNING_REV:AGE +# API_REV - advance on any added API +# RUNNING_REV - advance any change to the vendor files +# AGE - number of backward versions the API still supports +LIBVERSION=5:0:0 diff --git a/libvendor/osm_mad_pool.c b/libvendor/osm_mad_pool.c new file mode 100644 index 0000000..5e15944 --- /dev/null +++ b/libvendor/osm_mad_pool.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mad_pool_t. + * This object represents a pool of management datagram (MAD) objects. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_MAD_POOL_C +#include +#include +#include + +void osm_mad_pool_construct(IN osm_mad_pool_t * p_pool) +{ + CL_ASSERT(p_pool); + + memset(p_pool, 0, sizeof(*p_pool)); +} + +void osm_mad_pool_destroy(IN osm_mad_pool_t * p_pool) +{ + CL_ASSERT(p_pool); +} + +ib_api_status_t osm_mad_pool_init(IN osm_mad_pool_t * p_pool) +{ + p_pool->mads_out = 0; + + return IB_SUCCESS; +} + +osm_madw_t *osm_mad_pool_get(IN osm_mad_pool_t * p_pool, + IN osm_bind_handle_t h_bind, + IN uint32_t total_size, + IN const osm_mad_addr_t * p_mad_addr) +{ + osm_madw_t *p_madw; + ib_mad_t *p_mad; + + CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE); + CL_ASSERT(total_size); + + /* + First, acquire a mad wrapper from the mad wrapper pool. + */ + p_madw = malloc(sizeof(*p_madw)); + if (p_madw == NULL) + goto Exit; + + osm_madw_init(p_madw, h_bind, total_size, p_mad_addr); + + /* + Next, acquire a wire mad of the specified size. + */ + p_mad = osm_vendor_get(h_bind, total_size, &p_madw->vend_wrap); + if (p_mad == NULL) { + /* Don't leak wrappers! */ + free(p_madw); + p_madw = NULL; + goto Exit; + } + + cl_atomic_inc(&p_pool->mads_out); + /* + Finally, attach the wire MAD to this wrapper. + */ + osm_madw_set_mad(p_madw, p_mad); + +Exit: + return p_madw; +} + +osm_madw_t *osm_mad_pool_get_wrapper(IN osm_mad_pool_t * p_pool, + IN osm_bind_handle_t h_bind, + IN uint32_t total_size, + IN const ib_mad_t * p_mad, + IN const osm_mad_addr_t * p_mad_addr) +{ + osm_madw_t *p_madw; + + CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE); + CL_ASSERT(total_size); + CL_ASSERT(p_mad); + + /* + First, acquire a mad wrapper from the mad wrapper pool. + */ + p_madw = malloc(sizeof(*p_madw)); + if (p_madw == NULL) + goto Exit; + + /* + Finally, initialize the wrapper object. + */ + cl_atomic_inc(&p_pool->mads_out); + osm_madw_init(p_madw, h_bind, total_size, p_mad_addr); + osm_madw_set_mad(p_madw, p_mad); + +Exit: + return p_madw; +} + +osm_madw_t *osm_mad_pool_get_wrapper_raw(IN osm_mad_pool_t * p_pool) +{ + osm_madw_t *p_madw; + + p_madw = malloc(sizeof(*p_madw)); + if (!p_madw) + return NULL; + + osm_madw_init(p_madw, NULL, 0, NULL); + osm_madw_set_mad(p_madw, NULL); + cl_atomic_inc(&p_pool->mads_out); + + return p_madw; +} + +void osm_mad_pool_put(IN osm_mad_pool_t * p_pool, IN osm_madw_t * p_madw) +{ + CL_ASSERT(p_madw); + + /* + First, return the wire mad to the pool + */ + if (p_madw->p_mad) + osm_vendor_put(p_madw->h_bind, &p_madw->vend_wrap); + + /* + Return the mad wrapper to the wrapper pool + */ + free(p_madw); + cl_atomic_dec(&p_pool->mads_out); +} diff --git a/libvendor/osm_pkt_randomizer.c b/libvendor/osm_pkt_randomizer.c new file mode 100644 index 0000000..5ea1f4a --- /dev/null +++ b/libvendor/osm_pkt_randomizer.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_pkt_randomizer_t. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +#ifndef __WIN__ +#include +#include +#endif + +/********************************************************************** + * Return TRUE if the path is in a fault path, and FALSE otherwise. + * By in a fault path the meaning is that there is a path in the fault + * paths that the given path includes it. + * E.g: if there is a fault path: 0,1,4 + * For the given path: 0,1,4,7 the return value will be TRUE, also for + * the given path: 0,1,4 the return value will be TRUE, but for + * the given paths: 0,1 or 0,3,1,4 - the return value will be FALSE. + **********************************************************************/ +boolean_t +__osm_pkt_randomizer_is_path_in_fault_paths(IN osm_log_t * p_log, + IN osm_dr_path_t * p_dr_path, + IN osm_pkt_randomizer_t * + p_pkt_rand) +{ + boolean_t res = FALSE, found_path; + osm_dr_path_t *p_found_dr_path; + uint8_t ind1, ind2; + + OSM_LOG_ENTER(p_log); + + for (ind1 = 0; ind1 < p_pkt_rand->num_paths_initialized; ind1++) { + found_path = TRUE; + p_found_dr_path = &(p_pkt_rand->fault_dr_paths[ind1]); + /* if the hop count of the found path is greater than the + hop count of the input path - then it is not part of it. + Check the next path. */ + if (p_found_dr_path->hop_count > p_dr_path->hop_count) + continue; + + /* go over all the ports in the found path and see if they match + the ports in the input path */ + for (ind2 = 0; ind2 <= p_found_dr_path->hop_count; ind2++) + if (p_found_dr_path->path[ind2] != + p_dr_path->path[ind2]) + found_path = FALSE; + + /* If found_path is TRUE then there is a full match of the path */ + if (found_path == TRUE) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Given path is in a fault path\n"); + res = TRUE; + break; + } + } + + OSM_LOG_EXIT(p_log); + return res; +} + +/********************************************************************** + * For a given dr_path - return TRUE if the path should be dropped, + * return FALSE otherwise. + * The check uses random criteria in order to determine whether or not + * the path should be dropped. + * First - if not all paths are initialized, it randomally chooses if + * to use this path as a fault path or not. + * Second - if the path is in the fault paths (meaning - it is equal + * to or includes one of the fault paths) - then it randomally chooses + * if to drop it or not. + **********************************************************************/ +boolean_t +__osm_pkt_randomizer_process_path(IN osm_log_t * p_log, + IN osm_pkt_randomizer_t * p_pkt_rand, + IN osm_dr_path_t * p_dr_path) +{ + boolean_t res = FALSE; + static boolean_t rand_value_init = FALSE; + static int rand_value; + boolean_t in_fault_paths; + uint8_t i; + char buf[BUF_SIZE]; + char line[BUF_SIZE]; + + OSM_LOG_ENTER(p_log); + + if (rand_value_init == FALSE) { + int seed; +#ifdef __WIN__ + SYSTEMTIME st; +#else + struct timeval tv; + struct timezone tz; +#endif /* __WIN__ */ + + /* initiate the rand_value according to timeofday */ + rand_value_init = TRUE; + +#ifdef __WIN__ + GetLocalTime(&st); + seed = st.wMilliseconds; +#else + gettimeofday(&tv, &tz); + seed = tv.tv_usec; +#endif /* __WIN__ */ + + srand(seed); + } + + /* If the hop_count is 1 - then this is a mad down to our local port - don't drop it */ + if (p_dr_path->hop_count <= 1) + goto Exit; + + rand_value = rand(); + + sprintf(buf, "Path: "); + /* update the dr_path into the buf */ + for (i = 0; i <= p_dr_path->hop_count; i++) { + sprintf(line, "[%X]", p_dr_path->path[i]); + strcat(buf, line); + } + + /* Check if the path given is in one of the fault paths */ + in_fault_paths = + __osm_pkt_randomizer_is_path_in_fault_paths(p_log, p_dr_path, + p_pkt_rand); + + /* Check if all paths are initialized */ + if (p_pkt_rand->num_paths_initialized < + p_pkt_rand->osm_pkt_num_unstable_links) { + /* Not all packets are initialized. */ + if (in_fault_paths == FALSE) { + /* the path is not in the false paths. Check using the rand value + if to update it there or not. */ + if (rand_value % + (p_pkt_rand->osm_pkt_unstable_link_rate) == 0) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "%s added to the fault_dr_paths list\n" + "\t\t\t rand_value:%u, unstable_link_rate:%u \n", + buf, rand_value, + p_pkt_rand->osm_pkt_unstable_link_rate); + + /* update the path in the fault paths */ + memcpy(& + (p_pkt_rand-> + fault_dr_paths[p_pkt_rand-> + num_paths_initialized]), + p_dr_path, sizeof(osm_dr_path_t)); + p_pkt_rand->num_paths_initialized++; + in_fault_paths = TRUE; + } + } + } + + if (in_fault_paths == FALSE) { + /* If in_fault_paths is FALSE - just ignore the path */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, "%s not in fault paths\n", buf); + goto Exit; + } + + /* The path is in the fault paths. Need to choose (randomally if to drop it + or not. */ + rand_value = rand(); + + if (rand_value % (p_pkt_rand->osm_pkt_drop_rate) == 0) { + /* drop the current packet */ + res = TRUE; + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Dropping path:%s\n", buf); + } + +Exit: + OSM_LOG_EXIT(p_log); + return res; +} + +boolean_t +osm_pkt_randomizer_mad_drop(IN osm_log_t * p_log, + IN osm_pkt_randomizer_t * p_pkt_randomizer, + IN const ib_mad_t * p_mad) +{ + const ib_smp_t *p_smp; + boolean_t res = FALSE; + osm_dr_path_t dr_path; + + OSM_LOG_ENTER(p_log); + + p_smp = (ib_smp_t *) p_mad; + + if (p_smp->mgmt_class != IB_MCLASS_SUBN_DIR) + /* This is a lid route mad. Don't drop it */ + goto Exit; + + osm_dr_path_init(&dr_path, p_smp->hop_count, p_smp->initial_path); + + if (__osm_pkt_randomizer_process_path + (p_log, p_pkt_randomizer, &dr_path)) { + /* the mad should be dropped o */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "mad TID: 0x%" PRIx64 " is being dropped\n", + cl_ntoh64(p_smp->trans_id)); + res = TRUE; + } + +Exit: + OSM_LOG_EXIT(p_log); + return res; +} + +ib_api_status_t +osm_pkt_randomizer_init(IN OUT osm_pkt_randomizer_t ** pp_pkt_randomizer, + IN osm_log_t * p_log) +{ + uint8_t tmp; + ib_api_status_t res = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + *pp_pkt_randomizer = malloc(sizeof(osm_pkt_randomizer_t)); + if (*pp_pkt_randomizer == NULL) { + res = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + memset(*pp_pkt_randomizer, 0, sizeof(osm_pkt_randomizer_t)); + (*pp_pkt_randomizer)->num_paths_initialized = 0; + + tmp = atol(getenv("OSM_PKT_DROP_RATE")); + (*pp_pkt_randomizer)->osm_pkt_drop_rate = tmp; + + if (getenv("OSM_PKT_NUM_UNSTABLE_LINKS") != NULL + && (tmp = atol(getenv("OSM_PKT_NUM_UNSTABLE_LINKS"))) > 0) + (*pp_pkt_randomizer)->osm_pkt_num_unstable_links = tmp; + else + (*pp_pkt_randomizer)->osm_pkt_num_unstable_links = 1; + + if (getenv("OSM_PKT_UNSTABLE_LINK_RATE") != NULL + && (tmp = atol(getenv("OSM_PKT_UNSTABLE_LINK_RATE"))) > 0) + (*pp_pkt_randomizer)->osm_pkt_unstable_link_rate = tmp; + else + (*pp_pkt_randomizer)->osm_pkt_unstable_link_rate = 20; + + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Using OSM_PKT_DROP_RATE=%u \n" + "\t\t\t\t OSM_PKT_NUM_UNSTABLE_LINKS=%u \n" + "\t\t\t\t OSM_PKT_UNSTABLE_LINK_RATE=%u \n", + (*pp_pkt_randomizer)->osm_pkt_drop_rate, + (*pp_pkt_randomizer)->osm_pkt_num_unstable_links, + (*pp_pkt_randomizer)->osm_pkt_unstable_link_rate); + + /* allocate the fault_dr_paths variable */ + /* It is the number of the paths that will be saved as fault = osm_pkt_num_unstable_links */ + (*pp_pkt_randomizer)->fault_dr_paths = malloc(sizeof(osm_dr_path_t) * + (*pp_pkt_randomizer)-> + osm_pkt_num_unstable_links); + if ((*pp_pkt_randomizer)->fault_dr_paths == NULL) { + res = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + + memset((*pp_pkt_randomizer)->fault_dr_paths, 0, + sizeof(osm_dr_path_t) * + (*pp_pkt_randomizer)->osm_pkt_num_unstable_links); + +Exit: + OSM_LOG_EXIT(p_log); + return (res); +} + +void +osm_pkt_randomizer_destroy(IN OUT osm_pkt_randomizer_t ** pp_pkt_randomizer, + IN osm_log_t * p_log) +{ + OSM_LOG_ENTER(p_log); + + if (*pp_pkt_randomizer != NULL) { + free((*pp_pkt_randomizer)->fault_dr_paths); + free(*pp_pkt_randomizer); + } + OSM_LOG_EXIT(p_log); +} diff --git a/libvendor/osm_vendor_al.c b/libvendor/osm_vendor_al.c new file mode 100644 index 0000000..a0c24df --- /dev/null +++ b/libvendor/osm_vendor_al.c @@ -0,0 +1,1270 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_req_t. + * This object represents the generic attribute requester. + * This object is part of the opensm family of objects. + * + */ + +/* + Next available error code: 0x300 +*/ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef OSM_VENDOR_INTF_AL + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/****s* OpenSM: Vendor AL/osm_al_bind_info_t + * NAME + * osm_al_bind_info_t + * + * DESCRIPTION + * Structure containing bind information. + * + * SYNOPSIS + */ +typedef struct _osm_al_bind_info { + osm_vendor_t *p_vend; + void *client_context; + ib_qp_handle_t h_qp; + ib_mad_svc_handle_t h_svc; + uint8_t port_num; + ib_pool_key_t pool_key; + osm_vend_mad_recv_callback_t rcv_callback; + osm_vend_mad_send_err_callback_t send_err_callback; + osm_mad_pool_t *p_osm_pool; + ib_av_handle_t h_dr_av; + +} osm_al_bind_info_t; +/* + * FIELDS + * p_vend + * Pointer to the vendor object. + * + * client_context + * User's context passed during osm_bind + * + * h_qp + * Handle the QP for this bind. + * + * h_qp_svc + * Handle the QP mad service for this bind. + * + * port_num + * Port number (within the HCA) of the bound port. + * + * pool_key + * Pool key returned by all for this QP. + * + * h_dr_av + * Address vector handle used for all directed route SMPs. + * + * SEE ALSO + *********/ + +inline static ib_api_status_t +__osm_al_convert_wcs(IN ib_wc_status_t const wc_status) +{ + switch (wc_status) { + case IB_WCS_SUCCESS: + return (IB_SUCCESS); + + case IB_WCS_TIMEOUT_RETRY_ERR: + return (IB_TIMEOUT); + + default: + return (IB_ERROR); + } +} + +static void __osm_al_ca_err_callback(IN ib_async_event_rec_t * p_async_rec) +{ + osm_vendor_t *p_vend = (osm_vendor_t *) p_async_rec->context; + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_al_ca_err_callback: ERR 3B01: " + "Event on channel adapter (%s).\n", + ib_get_async_event_str(p_async_rec->code)); + + OSM_LOG_EXIT(p_vend->p_log); +} + +static void __osm_al_ca_destroy_callback(IN void *context) +{ + osm_al_bind_info_t *p_bind = (osm_al_bind_info_t *) context; + osm_vendor_t *p_vend = p_bind->p_vend; + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_INFO, + "__osm_al_ca_destroy_callback: " + "Closing local channel adapter.\n"); + + OSM_LOG_EXIT(p_vend->p_log); +} + +static void __osm_al_err_callback(IN ib_async_event_rec_t * p_async_rec) +{ + osm_al_bind_info_t *p_bind = + (osm_al_bind_info_t *) p_async_rec->context; + osm_vendor_t *p_vend = p_bind->p_vend; + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_al_err_callback: ERR 3B02: " + "Error on QP (%s).\n", + ib_get_async_event_str(p_async_rec->code)); + + OSM_LOG_EXIT(p_vend->p_log); +} + +static void +__osm_al_send_callback(IN void *mad_svc_context, IN ib_mad_element_t * p_elem) +{ + osm_al_bind_info_t *const p_bind = + (osm_al_bind_info_t *) mad_svc_context; + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_madw_t *const p_madw = (osm_madw_t *) p_elem->context1; + osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); + ib_mad_t *p_mad; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + CL_ASSERT(p_vw->h_av); + + /* + Destroy the address vector as necessary. + */ + if (p_vw->h_av != p_bind->h_dr_av) { + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_al_send_callback: " + "Destroying av handle %p.\n", p_vw->h_av); + } + + ib_destroy_av(p_vw->h_av); + } + + p_mad = ib_get_mad_buf(p_elem); + + if (p_elem->resp_expected) { + /* + If the send was unsuccessful, notify the user + for MADs that were expecting a response. + A NULL mad wrapper parameter is the user's clue + that the transaction turned sour. + + Otherwise, do nothing for successful sends when a + reponse is expected. The mad will be returned to the + pool later. + */ + p_madw->status = __osm_al_convert_wcs(p_elem->status); + if (p_elem->status != IB_WCS_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_al_send_callback: " + "MAD completed with work queue error: %s.\n", + ib_get_wc_status_str(p_elem->status)); + /* + Return any wrappers to the pool that may have been + pre-emptively allocated to handle a receive. + */ + if (p_vw->p_resp_madw) { + osm_mad_pool_put(p_bind->p_osm_pool, + p_vw->p_resp_madw); + p_vw->p_resp_madw = NULL; + } + + p_bind->send_err_callback(p_bind->client_context, + p_madw); + } + } else { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_al_send_callback: " + "Returning MAD to pool, TID = 0x%" PRIx64 ".\n", + cl_ntoh64(p_mad->trans_id)); + osm_mad_pool_put(p_bind->p_osm_pool, p_madw); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); +} + +static void +__osm_al_rcv_callback(IN void *mad_svc_context, IN ib_mad_element_t * p_elem) +{ + osm_al_bind_info_t *const p_bind = + (osm_al_bind_info_t *) mad_svc_context; + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_madw_t *p_old_madw; + osm_madw_t *p_new_madw; + osm_vend_wrap_t *p_old_vw; + osm_vend_wrap_t *p_new_vw; + ib_mad_t *p_new_mad; + osm_mad_addr_t mad_addr; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_elem->context1 == NULL); + CL_ASSERT(p_elem->context2 == NULL); + + p_new_mad = ib_get_mad_buf(p_elem); + + /* + In preperation for initializing the new mad wrapper, + Initialize the mad_addr structure for the received wire MAD. + */ + mad_addr.dest_lid = p_elem->remote_lid; + mad_addr.path_bits = p_elem->path_bits; + + /* TO DO - figure out which #define to use for the 2.5 Gb rate... */ + mad_addr.static_rate = 0; + + if (p_new_mad->mgmt_class == IB_MCLASS_SUBN_LID || + p_new_mad->mgmt_class == IB_MCLASS_SUBN_DIR) { + mad_addr.addr_type.smi.source_lid = p_elem->remote_lid; + } else { + mad_addr.addr_type.gsi.remote_qp = p_elem->remote_qp; + mad_addr.addr_type.gsi.remote_qkey = p_elem->remote_qkey; + mad_addr.addr_type.gsi.pkey_ix = p_elem->pkey_index; + mad_addr.addr_type.gsi.service_level = p_elem->remote_sl; + mad_addr.addr_type.gsi.global_route = FALSE; + } + + /* + If this MAD is a response to a previous request, + then grab our pre-allocated MAD wrapper. + Otherwise, allocate a new MAD wrapper. + */ + if (ib_mad_is_response(p_new_mad)) { + CL_ASSERT(p_elem->send_context1 != NULL); + CL_ASSERT(p_elem->send_context2 == NULL); + + p_old_madw = (osm_madw_t *) p_elem->send_context1; + p_old_vw = osm_madw_get_vend_ptr(p_old_madw); + p_new_madw = p_old_vw->p_resp_madw; + + CL_ASSERT(p_new_madw); + + osm_madw_init(p_new_madw, p_bind, p_elem->size, &mad_addr); + osm_madw_set_mad(p_new_madw, p_new_mad); + } else { + CL_ASSERT(p_elem->send_context1 == NULL); + CL_ASSERT(p_elem->send_context2 == NULL); + + p_new_madw = osm_mad_pool_get_wrapper(p_bind->p_osm_pool, + p_bind, p_elem->size, + p_new_mad, &mad_addr); + } + + CL_ASSERT(p_new_madw); + p_new_vw = osm_madw_get_vend_ptr(p_new_madw); + + p_new_vw->h_bind = p_bind; + p_new_vw->size = p_elem->size; + p_new_vw->p_elem = p_elem; + p_new_vw->h_av = 0; + p_new_vw->p_resp_madw = NULL; + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_al_rcv_callback: " + "Calling receive callback function %p.\n", + p_bind->rcv_callback); + + p_bind->rcv_callback(p_new_madw, p_bind->client_context, + p_elem->send_context1); + + OSM_LOG_EXIT(p_vend->p_log); +} + +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + ib_api_status_t status; + OSM_LOG_ENTER(p_log); + + p_vend->p_log = p_log; + + /* + Open our instance of AL. + */ + status = ib_open_al(&p_vend->h_al); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_init: ERR 3B03: " + "Error opening AL (%s).\n", ib_get_err_str(status)); + + goto Exit; + } + + p_vend->timeout = timeout; + +Exit: + OSM_LOG_EXIT(p_log); + return (status); +} + +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + ib_api_status_t status; + osm_vendor_t *p_vend; + + OSM_LOG_ENTER(p_log); + + p_vend = malloc(sizeof(*p_vend)); + if (p_vend == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_new: ERR 3B04: " + "Unable to allocate vendor object.\n"); + goto Exit; + } + + memset(p_vend, 0, sizeof(*p_vend)); + + status = osm_vendor_init(p_vend, p_log, timeout); + if (status != IB_SUCCESS) { + free(p_vend); + p_vend = NULL; + } + +Exit: + OSM_LOG_EXIT(p_log); + return (p_vend); +} + +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + /* TO DO - fill this in */ + ib_close_al((*pp_vend)->h_al); + free(*pp_vend); + *pp_vend = NULL; +} + +static ib_api_status_t +__osm_ca_info_init(IN osm_vendor_t * const p_vend, + IN osm_ca_info_t * const p_ca_info, + IN const ib_net64_t ca_guid) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + p_ca_info->guid = ca_guid; + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_VERBOSE)) { + osm_log(p_vend->p_log, OSM_LOG_VERBOSE, + "__osm_ca_info_init: " + "Querying CA 0x%" PRIx64 ".\n", cl_ntoh64(ca_guid)); + } + + status = ib_query_ca_by_guid(p_vend->h_al, ca_guid, NULL, + &p_ca_info->attr_size); + if ((status != IB_INSUFFICIENT_MEMORY) && (status != IB_SUCCESS)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 3B05: " + "Unexpected status getting CA attributes (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + + CL_ASSERT(p_ca_info->attr_size); + + p_ca_info->p_attr = malloc(p_ca_info->attr_size); + if (p_ca_info->p_attr == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 3B06: " + "Unable to allocate attribute storage.\n"); + goto Exit; + } + + status = ib_query_ca_by_guid(p_vend->h_al, ca_guid, p_ca_info->p_attr, + &p_ca_info->attr_size); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 3B07: " + "Unexpected status getting CA attributes (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void +osm_ca_info_destroy(IN osm_vendor_t * const p_vend, + IN osm_ca_info_t * const p_ca_info) +{ + OSM_LOG_ENTER(p_vend->p_log); + + if (p_ca_info->p_attr) + free(p_ca_info->p_attr); + + free(p_ca_info); + + OSM_LOG_EXIT(p_vend->p_log); +} + +osm_ca_info_t *osm_ca_info_new(IN osm_vendor_t * const p_vend, + IN const ib_net64_t ca_guid) +{ + ib_api_status_t status; + osm_ca_info_t *p_ca_info; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(ca_guid); + + p_ca_info = malloc(sizeof(*p_ca_info)); + if (p_ca_info == NULL) + goto Exit; + + memset(p_ca_info, 0, sizeof(*p_ca_info)); + + status = __osm_ca_info_init(p_vend, p_ca_info, ca_guid); + if (status != IB_SUCCESS) { + osm_ca_info_destroy(p_vend, p_ca_info); + p_ca_info = NULL; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (p_ca_info); +} + +static ib_api_status_t +__osm_vendor_get_ca_guids(IN osm_vendor_t * const p_vend, + IN ib_net64_t ** const p_guids, + IN unsigned * const p_num_guids) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_guids); + CL_ASSERT(p_num_guids); + + status = ib_get_ca_guids(p_vend->h_al, NULL, p_num_guids); + if ((status != IB_INSUFFICIENT_MEMORY) && (status != IB_SUCCESS)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_guids: ERR 3B08: " + "Unexpected status getting CA GUID array (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + + if (*p_num_guids == 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_guids: ERR 3B09: " + "No available channel adapters.\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + *p_guids = malloc(*p_num_guids * sizeof(**p_guids)); + if (*p_guids == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_guids: ERR 3B10: " + "Unable to allocate CA GUID array.\n"); + goto Exit; + } + + status = ib_get_ca_guids(p_vend->h_al, *p_guids, p_num_guids); + CL_ASSERT(*p_num_guids); + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_VERBOSE)) { + osm_log(p_vend->p_log, OSM_LOG_VERBOSE, + "__osm_vendor_get_ca_guids: " + "Detected %u local channel adapters.\n", *p_num_guids); + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/****f* OpenSM: CA Info/osm_ca_info_get_pi_ptr + * NAME + * osm_ca_info_get_pi_ptr + * + * DESCRIPTION + * Returns a pointer to the port attribute of the specified port + * owned by this CA. + * + * SYNOPSIS + */ +static ib_port_attr_t *__osm_ca_info_get_port_attr_ptr(IN const osm_ca_info_t * + const p_ca_info, + IN const uint8_t index) +{ + return (&p_ca_info->p_attr->p_port_attr[index]); +} + +/* + * PARAMETERS + * p_ca_info + * [in] Pointer to a CA Info object. + * + * index + * [in] Port "index" for which to retrieve the port attribute. + * The index is the offset into the ca's internal array + * of port attributes. + * + * RETURN VALUE + * Returns a pointer to the port attribute of the specified port + * owned by this CA. + * + * NOTES + * + * SEE ALSO + *********/ + +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports) +{ + ib_api_status_t status; + + uint32_t ca; + unsigned ca_count; + uint32_t port_count = 0; + uint8_t port_num; + uint32_t total_ports = 0; + ib_net64_t *p_ca_guid = NULL; + osm_ca_info_t *p_ca_info; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + CL_ASSERT(p_vend->p_ca_info == NULL); + + /* + 1) Determine the number of CA's + 2) Allocate an array big enough to hold the ca info objects. + 3) Call again to retrieve the guids. + */ + status = __osm_vendor_get_ca_guids(p_vend, &p_ca_guid, &ca_count); + + p_vend->p_ca_info = malloc(ca_count * sizeof(*p_vend->p_ca_info)); + if (p_vend->p_ca_info == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 3B11: " + "Unable to allocate CA information array.\n"); + goto Exit; + } + + memset(p_vend->p_ca_info, 0, ca_count * sizeof(*p_vend->p_ca_info)); + p_vend->ca_count = ca_count; + + /* + For each CA, retrieve the port info attributes + */ + for (ca = 0; ca < ca_count; ca++) { + p_ca_info = &p_vend->p_ca_info[ca]; + + status = __osm_ca_info_init(p_vend, p_ca_info, p_ca_guid[ca]); + + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 3B12: " + "Unable to initialize CA Info object (%s).\n", + ib_get_err_str(status)); + } + + total_ports += osm_ca_info_get_num_ports(p_ca_info); + } + + /* + If the user supplied enough storage, return the port guids, + otherwise, return the appropriate error. + */ + if (*p_num_ports >= total_ports) { + for (ca = 0; ca < ca_count; ca++) { + uint32_t num_ports; + + p_ca_info = &p_vend->p_ca_info[ca]; + + num_ports = osm_ca_info_get_num_ports(p_ca_info); + + for (port_num = 0; port_num < num_ports; port_num++) { + p_attr_array[port_count] = + *__osm_ca_info_get_port_attr_ptr(p_ca_info, + port_num); + port_count++; + } + } + } else { + status = IB_INSUFFICIENT_MEMORY; + } + + *p_num_ports = total_ports; + +Exit: + if (p_ca_guid) + free(p_ca_guid); + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +ib_net64_t +osm_vendor_get_ca_guid(IN osm_vendor_t * const p_vend, + IN const ib_net64_t port_guid) +{ + uint8_t index; + uint8_t num_ports; + uint32_t num_guids = 0; + osm_ca_info_t *p_ca_info; + uint32_t ca; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(port_guid); + /* + First, locate the HCA that owns this port. + */ + if (p_vend->p_ca_info == NULL) { + /* + Initialize the osm_ca_info_t array which allows + us to match port GUID to CA. + */ + osm_vendor_get_all_port_attr(p_vend, NULL, &num_guids); + } + + CL_ASSERT(p_vend->p_ca_info); + CL_ASSERT(p_vend->ca_count); + + for (ca = 0; ca < p_vend->ca_count; ca++) { + p_ca_info = &p_vend->p_ca_info[ca]; + + num_ports = osm_ca_info_get_num_ports(p_ca_info); + CL_ASSERT(num_ports); + + for (index = 0; index < num_ports; index++) { + if (port_guid == + osm_ca_info_get_port_guid(p_ca_info, index)) { + OSM_LOG_EXIT(p_vend->p_log); + return (osm_ca_info_get_ca_guid(p_ca_info)); + } + } + } + + /* + No local CA owns this guid! + */ + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_ca_guid: ERR 3B13: " + "Unable to determine CA guid.\n"); + + OSM_LOG_EXIT(p_vend->p_log); + return (0); +} + +uint8_t +osm_vendor_get_port_num(IN osm_vendor_t * const p_vend, + IN const ib_net64_t port_guid) +{ + uint8_t index; + uint8_t num_ports; + uint32_t num_guids = 0; + osm_ca_info_t *p_ca_info; + uint32_t ca; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(port_guid); + /* + First, locate the HCA that owns this port. + */ + if (p_vend->p_ca_info == NULL) { + /* + Initialize the osm_ca_info_t array which allows + us to match port GUID to CA. + */ + osm_vendor_get_all_port_attr(p_vend, NULL, &num_guids); + } + + CL_ASSERT(p_vend->p_ca_info); + CL_ASSERT(p_vend->ca_count); + + for (ca = 0; ca < p_vend->ca_count; ca++) { + p_ca_info = &p_vend->p_ca_info[ca]; + + num_ports = osm_ca_info_get_num_ports(p_ca_info); + CL_ASSERT(num_ports); + + for (index = 0; index < num_ports; index++) { + if (port_guid == + osm_ca_info_get_port_guid(p_ca_info, index)) { + OSM_LOG_EXIT(p_vend->p_log); + return (osm_ca_info_get_port_num + (p_ca_info, index)); + } + } + } + + /* + No local CA owns this guid! + */ + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_port_num: ERR 3B30: " + "Unable to determine CA guid.\n"); + + OSM_LOG_EXIT(p_vend->p_log); + return (0); +} + +static ib_api_status_t +__osm_vendor_open_ca(IN osm_vendor_t * const p_vend, + IN const ib_net64_t port_guid) +{ + ib_net64_t ca_guid; + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + ca_guid = osm_vendor_get_ca_guid(p_vend, port_guid); + if (ca_guid == 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_open_ca: ERR 3B31: " + "Bad port GUID value 0x%" PRIx64 ".\n", + cl_ntoh64(port_guid)); + status = IB_ERROR; + goto Exit; + } + + osm_log(p_vend->p_log, OSM_LOG_VERBOSE, + "__osm_vendor_open_ca: " + "Opening HCA 0x%" PRIx64 ".\n", cl_ntoh64(ca_guid)); + + status = ib_open_ca(p_vend->h_al, + ca_guid, + __osm_al_ca_err_callback, p_vend, &p_vend->h_ca); + + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_open_ca: ERR 3B15: " + "Unable to open CA (%s).\n", ib_get_err_str(status)); + goto Exit; + } + + CL_ASSERT(p_vend->h_ca); + + status = ib_alloc_pd(p_vend->h_ca, IB_PDT_ALIAS, p_vend, &p_vend->h_pd); + + if (status != IB_SUCCESS) { + ib_close_ca(p_vend->h_ca, __osm_al_ca_destroy_callback); + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_open_ca: ERR 3B16: " + "Unable to allocate protection domain (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + + CL_ASSERT(p_vend->h_pd); + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +static void +__osm_vendor_init_av(IN const osm_al_bind_info_t * p_bind, + IN ib_av_attr_t * p_av) +{ + memset(p_av, 0, sizeof(*p_av)); + p_av->port_num = p_bind->port_num; + p_av->dlid = IB_LID_PERMISSIVE; +} + +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_user_bind, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN osm_vend_mad_send_err_callback_t send_err_callback, + IN void *context) +{ + ib_net64_t port_guid; + osm_al_bind_info_t *p_bind = 0; + ib_api_status_t status; + ib_qp_create_t qp_create; + ib_mad_svc_t mad_svc; + ib_av_attr_t av; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_user_bind); + CL_ASSERT(p_mad_pool); + CL_ASSERT(mad_recv_callback); + CL_ASSERT(send_err_callback); + + port_guid = p_user_bind->port_guid; + + osm_log(p_vend->p_log, OSM_LOG_INFO, + "osm_vendor_bind: " + "Binding to port 0x%" PRIx64 ".\n", cl_ntoh64(port_guid)); + + if (p_vend->h_ca == 0) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: " + "Opening CA that owns port 0x%" PRIx64 ".\n", + port_guid); + + status = __osm_vendor_open_ca(p_vend, port_guid); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3B17: " + "Unable to Open CA (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + } + + p_bind = malloc(sizeof(*p_bind)); + if (p_bind == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3B18: " + "Unable to allocate internal bind object.\n"); + goto Exit; + } + + memset(p_bind, 0, sizeof(*p_bind)); + p_bind->p_vend = p_vend; + p_bind->client_context = context; + p_bind->port_num = osm_vendor_get_port_num(p_vend, port_guid); + p_bind->rcv_callback = mad_recv_callback; + p_bind->send_err_callback = send_err_callback; + p_bind->p_osm_pool = p_mad_pool; + + CL_ASSERT(p_bind->port_num); + + /* + Get the proper QP. + */ + memset(&qp_create, 0, sizeof(qp_create)); + + switch (p_user_bind->mad_class) { + case IB_MCLASS_SUBN_LID: + case IB_MCLASS_SUBN_DIR: + qp_create.qp_type = IB_QPT_QP0_ALIAS; + break; + + case IB_MCLASS_SUBN_ADM: + default: + qp_create.qp_type = IB_QPT_QP1_ALIAS; + break; + } + + qp_create.sq_depth = p_user_bind->send_q_size; + qp_create.rq_depth = p_user_bind->recv_q_size; + qp_create.sq_sge = OSM_AL_SQ_SGE; + qp_create.rq_sge = OSM_AL_RQ_SGE; + + status = ib_get_spl_qp(p_vend->h_pd, + port_guid, + &qp_create, + p_bind, + __osm_al_err_callback, + &p_bind->pool_key, &p_bind->h_qp); + + if (status != IB_SUCCESS) { + free(p_bind); + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3B19: " + "Unable to get QP handle (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + + CL_ASSERT(p_bind->h_qp); + CL_ASSERT(p_bind->pool_key); + + memset(&mad_svc, 0, sizeof(mad_svc)); + + mad_svc.mad_svc_context = p_bind; + mad_svc.pfn_mad_send_cb = __osm_al_send_callback; + mad_svc.pfn_mad_recv_cb = __osm_al_rcv_callback; + mad_svc.mgmt_class = p_user_bind->mad_class; + mad_svc.mgmt_version = p_user_bind->class_version; + mad_svc.support_unsol = p_user_bind->is_responder; + mad_svc.method_array[IB_MAD_METHOD_GET] = TRUE; + mad_svc.method_array[IB_MAD_METHOD_SET] = TRUE; + mad_svc.method_array[IB_MAD_METHOD_DELETE] = TRUE; + mad_svc.method_array[IB_MAD_METHOD_TRAP] = TRUE; + mad_svc.method_array[IB_MAD_METHOD_GETTABLE] = TRUE; + + status = ib_reg_mad_svc(p_bind->h_qp, &mad_svc, &p_bind->h_svc); + + if (status != IB_SUCCESS) { + free(p_bind); + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3B21: " + "Unable to register QP0 MAD service (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + + __osm_vendor_init_av(p_bind, &av); + + status = ib_create_av(p_vend->h_pd, &av, &p_bind->h_dr_av); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3B22: " + "Unable to create address vector (%s).\n", + ib_get_err_str(status)); + + goto Exit; + } + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: " + "Allocating av handle %p.\n", p_bind->h_dr_av); + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return ((osm_bind_handle_t) p_bind); +} + +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * const p_vw) +{ + ib_mad_t *p_mad; + osm_al_bind_info_t *p_bind = (osm_al_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + + p_vw->size = mad_size; + p_vw->h_bind = h_bind; + + /* + Retrieve a MAD element from the pool and give the user direct + access to its buffer. + */ + status = ib_get_mad(p_bind->pool_key, mad_size, &p_vw->p_elem); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get: ERR 3B25: " + "Unable to acquire MAD (%s).\n", + ib_get_err_str(status)); + + p_mad = NULL; + goto Exit; + } + + CL_ASSERT(p_vw->p_elem); + p_mad = ib_get_mad_buf(p_vw->p_elem); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get: " + "Acquired MAD %p, size = %u.\n", p_mad, mad_size); + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (p_mad); +} + +void +osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) +{ + osm_al_bind_info_t *p_bind = (osm_al_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + CL_ASSERT(p_vw->p_elem); + CL_ASSERT(p_vw->h_bind == h_bind); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_put: " + "Retiring MAD %p.\n", ib_get_mad_buf(p_vw->p_elem)); + } + + status = ib_put_mad(p_vw->p_elem); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_put: ERR 3B26: " + "Unable to retire MAD (%s).\n", ib_get_err_str(status)); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, IN boolean_t const resp_expected) +{ + osm_al_bind_info_t *const p_bind = h_bind; + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); + osm_mad_addr_t *const p_mad_addr = osm_madw_get_mad_addr_ptr(p_madw); + ib_mad_t *const p_mad = osm_madw_get_mad_ptr(p_madw); + ib_api_status_t status; + ib_mad_element_t *p_elem; + ib_av_attr_t av; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw->h_bind == h_bind); + CL_ASSERT(p_vw->p_elem); + + p_elem = p_vw->p_elem; + + /* + If a response is expected to this MAD, then preallocate + a mad wrapper to contain the wire MAD received in the + response. Allocating a wrapper here allows for easier + failure paths than after we already received the wire mad. + */ + if (resp_expected) { + p_vw->p_resp_madw = + osm_mad_pool_get_wrapper_raw(p_bind->p_osm_pool); + if (p_vw->p_resp_madw == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 3B27: " + "Unable to allocate MAD wrapper.\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + } else + p_vw->p_resp_madw = NULL; + + /* + For all sends other than directed route SM MADs, + acquire an address vector for the destination. + */ + if (p_mad->mgmt_class != IB_MCLASS_SUBN_DIR) { + memset(&av, 0, sizeof(av)); + av.port_num = p_bind->port_num; + av.dlid = p_mad_addr->dest_lid; + av.static_rate = p_mad_addr->static_rate; + av.path_bits = p_mad_addr->path_bits; + + if ((p_mad->mgmt_class != IB_MCLASS_SUBN_LID) && + (p_mad->mgmt_class != IB_MCLASS_SUBN_DIR)) { + av.sl = p_mad_addr->addr_type.gsi.service_level; + + if (p_mad_addr->addr_type.gsi.global_route) { + av.grh_valid = TRUE; + /* ANIL */ + /* av.grh = p_mad_addr->addr_type.gsi.grh_info; */ + } + } + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_send: " + "av.port_num 0x%X, " + "av.dlid 0x%X, " + "av.static_rate %d, " + "av.path_bits %d.\n", + av.port_num, cl_ntoh16(av.dlid), + av.static_rate, av.path_bits); + } + + status = ib_create_av(p_vend->h_pd, &av, &p_vw->h_av); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 3B28: " + "Unable to create address vector (%s).\n", + ib_get_err_str(status)); + + if (p_vw->p_resp_madw) + osm_mad_pool_put(p_bind->p_osm_pool, + p_vw->p_resp_madw); + goto Exit; + } + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_send: " + "Allocating av handle %p.\n", p_vw->h_av); + } + } else { + p_vw->h_av = p_bind->h_dr_av; + } + + p_elem->h_av = p_vw->h_av; + + p_elem->context1 = p_madw; + p_elem->context2 = NULL; + + p_elem->immediate_data = 0; + p_elem->p_grh = NULL; + p_elem->resp_expected = resp_expected; + p_elem->retry_cnt = OSM_DEFAULT_RETRY_COUNT; + + p_elem->send_opt = IB_SEND_OPT_SIGNALED; + p_elem->timeout_ms = p_vend->timeout; + + /* Completion information. */ + p_elem->status = 0; /* Not trusting AL */ + + if ((p_mad->mgmt_class == IB_MCLASS_SUBN_LID) || + (p_mad->mgmt_class == IB_MCLASS_SUBN_DIR)) { + p_elem->remote_qp = 0; + p_elem->remote_qkey = 0; + } else { + p_elem->remote_qp = p_mad_addr->addr_type.gsi.remote_qp; + p_elem->remote_qkey = p_mad_addr->addr_type.gsi.remote_qkey; + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_send: " + "remote qp = 0x%X, remote qkey = 0x%X.\n", + cl_ntoh32(p_elem->remote_qp), + cl_ntoh32(p_elem->remote_qkey)); + } + + status = ib_send_mad(p_bind->h_svc, p_elem, NULL); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 3B29: " + "Send failed (%s).\n", ib_get_err_str(status)); + if (p_vw->p_resp_madw) + osm_mad_pool_put(p_bind->p_osm_pool, p_vw->p_resp_madw); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) +{ + osm_al_bind_info_t *p_bind = (osm_al_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + ib_av_attr_t av; + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + /* + The only thing we need to do is refresh the directed + route address vector. + */ + __osm_vendor_init_av(p_bind, &av); + + status = ib_destroy_av(p_bind->h_dr_av); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_local_lid_change: ERR 3B32: " + "Unable to destroy address vector (%s).\n", + ib_get_err_str(status)); + + goto Exit; + } + + status = ib_create_av(p_vend->h_pd, &av, &p_bind->h_dr_av); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_local_lid_change: ERR 3B33: " + "Unable to create address vector (%s).\n", + ib_get_err_str(status)); + + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osm_al_bind_info_t *p_bind = (osm_al_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + ib_api_status_t status; + ib_port_attr_mod_t attr_mod; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&attr_mod, 0, sizeof(attr_mod)); + + attr_mod.cap.sm = is_sm_val; + + status = ib_modify_ca(p_vend->h_ca, p_bind->port_num, + IB_CA_MOD_IS_SM, &attr_mod); + + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_set_sm: ERR 3B34: " + "Unable set 'IS_SM' bit to:%u in port attributes (%s).\n", + is_sm_val, ib_get_err_str(status)); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level) +{ + +} + +#endif /* OSM_VENDOR_INTF_AL */ diff --git a/libvendor/osm_vendor_ibumad.c b/libvendor/osm_vendor_ibumad.c new file mode 100644 index 0000000..74db9eb --- /dev/null +++ b/libvendor/osm_vendor_ibumad.c @@ -0,0 +1,1272 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_vendor_t (for umad). + * This object represents the OpenIB vendor layer. + * This object is part of the opensm family of objects. + * + * Environment: + * Linux User Mode + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef OSM_VENDOR_INTF_OPENIB + +#include +#include +#include +#include +#include /* for __be64 with older libibumad */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_VENDOR_IBUMAD_C +#include +#include +#include +#include +#include + +/****s* OpenSM: Vendor UMAD/osm_umad_bind_info_t + * NAME + * osm_umad_bind_info_t + * + * DESCRIPTION + * Structure containing bind information. + * + * SYNOPSIS + */ +typedef struct _osm_umad_bind_info { + osm_vendor_t *p_vend; + void *client_context; + osm_mad_pool_t *p_mad_pool; + osm_vend_mad_recv_callback_t mad_recv_callback; + osm_vend_mad_send_err_callback_t send_err_callback; + ib_net64_t port_guid; + int port_id; + int agent_id; + int agent_id1; /* SMI requires two agents */ + int timeout; + int max_retries; +} osm_umad_bind_info_t; + +typedef struct _umad_receiver { + pthread_t tid; + osm_vendor_t *p_vend; + osm_log_t *p_log; +} umad_receiver_t; + +static void osm_vendor_close_port(osm_vendor_t * const p_vend); + +static void log_send_error(osm_vendor_t * const p_vend, osm_madw_t *p_madw) +{ + if (p_madw->p_mad->mgmt_class != IB_MCLASS_SUBN_DIR) { + /* LID routed */ + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5410: " + "Send completed with error (%s) -- dropping\n" + "\t\t\tClass 0x%x, Method 0x%X, Attr 0x%X, " + "TID 0x%" PRIx64 ", LID %u\n", + ib_get_err_str(p_madw->status), + p_madw->p_mad->mgmt_class, p_madw->p_mad->method, + cl_ntoh16(p_madw->p_mad->attr_id), + cl_ntoh64(p_madw->p_mad->trans_id), + cl_ntoh16(p_madw->mad_addr.dest_lid)); + } else { + ib_smp_t *p_smp; + + /* Direct routed SMP */ + p_smp = osm_madw_get_smp_ptr(p_madw); + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5411: " + "DR SMP Send completed with error (%s) -- dropping\n" + "\t\t\tMethod 0x%X, Attr 0x%X, TID 0x%" PRIx64 "\n", + ib_get_err_str(p_madw->status), + p_madw->p_mad->method, + cl_ntoh16(p_madw->p_mad->attr_id), + cl_ntoh64(p_madw->p_mad->trans_id)); + osm_dump_smp_dr_path(p_vend->p_log, p_smp, OSM_LOG_ERROR); + } +} + +static void clear_madw(osm_vendor_t * p_vend) +{ + umad_match_t *m, *e, *old_m; + ib_net64_t old_tid; + uint8_t old_mgmt_class; + + OSM_LOG_ENTER(p_vend->p_log); + pthread_mutex_lock(&p_vend->match_tbl_mutex); + for (m = p_vend->mtbl.tbl, e = m + p_vend->mtbl.max; m < e; m++) { + if (m->tid) { + old_m = m; + old_tid = m->tid; + old_mgmt_class = m->mgmt_class; + m->tid = 0; + osm_mad_pool_put(((osm_umad_bind_info_t + *) ((osm_madw_t *) m->v)->h_bind)-> + p_mad_pool, m->v); + pthread_mutex_unlock(&p_vend->match_tbl_mutex); + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5401: " + "evicting entry %p (tid was 0x%" PRIx64 + " mgmt class 0x%x)\n", + old_m, cl_ntoh64(old_tid), old_mgmt_class); + goto Exit; + } + } + pthread_mutex_unlock(&p_vend->match_tbl_mutex); + +Exit: + OSM_LOG_EXIT(p_vend->p_log); +} + +static osm_madw_t *get_madw(osm_vendor_t * p_vend, ib_net64_t * tid, + uint8_t mgmt_class) +{ + umad_match_t *m, *e; + ib_net64_t mtid = (*tid & CL_HTON64(0x00000000ffffffffULL)); + osm_madw_t *res; + + /* + * Since mtid == 0 is the empty key, we should not + * waste time looking for it + */ + if (mtid == 0 || mgmt_class == 0) + return 0; + + pthread_mutex_lock(&p_vend->match_tbl_mutex); + for (m = p_vend->mtbl.tbl, e = m + p_vend->mtbl.max; m < e; m++) { + if (m->tid == mtid && m->mgmt_class == mgmt_class) { + m->tid = 0; + m->mgmt_class = 0; + *tid = mtid; + res = m->v; + pthread_mutex_unlock(&p_vend->match_tbl_mutex); + return res; + } + } + + pthread_mutex_unlock(&p_vend->match_tbl_mutex); + return 0; +} + +/* + * If match table full, evict LRU (least recently used) transaction. + * Maintain 2 LRUs: one for SMPs, and one for others (GS). + * Evict LRU GS transaction if one is available and only evict LRU SMP + * transaction if no other choice. + */ +static void +put_madw(osm_vendor_t * p_vend, osm_madw_t * p_madw, ib_net64_t tid, + uint8_t mgmt_class) +{ + umad_match_t *m, *e, *old_lru, *lru = 0, *lru_smp = 0; + osm_madw_t *p_req_madw; + osm_umad_bind_info_t *p_bind; + ib_net64_t old_tid; + uint32_t oldest = ~0, oldest_smp = ~0; + uint8_t old_mgmt_class; + + pthread_mutex_lock(&p_vend->match_tbl_mutex); + for (m = p_vend->mtbl.tbl, e = m + p_vend->mtbl.max; m < e; m++) { + if (m->tid == 0 && m->mgmt_class == 0) { + m->tid = tid; + m->mgmt_class = mgmt_class; + m->v = p_madw; + m->version = + cl_atomic_inc((atomic32_t *) & p_vend->mtbl. + last_version); + pthread_mutex_unlock(&p_vend->match_tbl_mutex); + return; + } + if (m->mgmt_class == IB_MCLASS_SUBN_DIR || + m->mgmt_class == IB_MCLASS_SUBN_LID) { + if (oldest_smp >= m->version) { + oldest_smp = m->version; + lru_smp = m; + } + } else { + if (oldest >= m->version) { + oldest = m->version; + lru = m; + } + } + } + + if (oldest != ~0) { + old_lru = lru; + old_tid = lru->tid; + old_mgmt_class = lru->mgmt_class; + } else { + CL_ASSERT(oldest_smp != ~0); + old_lru = lru_smp; + old_tid = lru_smp->tid; + old_mgmt_class = lru_smp->mgmt_class; + } + p_req_madw = old_lru->v; + p_bind = p_req_madw->h_bind; + p_req_madw->status = IB_CANCELED; + log_send_error(p_vend, p_req_madw); + pthread_mutex_lock(&p_vend->cb_mutex); + (*p_bind->send_err_callback) (p_bind->client_context, p_req_madw); + pthread_mutex_unlock(&p_vend->cb_mutex); + if (mgmt_class == IB_MCLASS_SUBN_DIR || + mgmt_class == IB_MCLASS_SUBN_LID) { + lru_smp->tid = tid; + lru_smp->mgmt_class = mgmt_class; + lru_smp->v = p_madw; + lru_smp->version = + cl_atomic_inc((atomic32_t *) & p_vend->mtbl.last_version); + } else { + lru->tid = tid; + lru->mgmt_class = mgmt_class; + lru->v = p_madw; + lru->version = + cl_atomic_inc((atomic32_t *) & p_vend->mtbl.last_version); + } + pthread_mutex_unlock(&p_vend->match_tbl_mutex); + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5402: " + "evicting entry %p (tid was 0x%" PRIx64 + " mgmt class 0x%x)\n", old_lru, + cl_ntoh64(old_tid), old_mgmt_class); +} + +static void +ib_mad_addr_conv(ib_user_mad_t * umad, osm_mad_addr_t * osm_mad_addr, + int is_smi) +{ + ib_mad_addr_t *ib_mad_addr = umad_get_mad_addr(umad); + + memset(osm_mad_addr, 0, sizeof(osm_mad_addr_t)); + osm_mad_addr->dest_lid = ib_mad_addr->lid; + osm_mad_addr->path_bits = ib_mad_addr->path_bits; + + if (is_smi) { + osm_mad_addr->addr_type.smi.source_lid = osm_mad_addr->dest_lid; + osm_mad_addr->addr_type.smi.port_num = 255; /* not used */ + return; + } + + osm_mad_addr->addr_type.gsi.remote_qp = ib_mad_addr->qpn; + osm_mad_addr->addr_type.gsi.remote_qkey = ib_mad_addr->qkey; + osm_mad_addr->addr_type.gsi.pkey_ix = umad_get_pkey(umad); + osm_mad_addr->addr_type.gsi.service_level = ib_mad_addr->sl; + if (ib_mad_addr->grh_present) { + osm_mad_addr->addr_type.gsi.global_route = 1; + osm_mad_addr->addr_type.gsi.grh_info.hop_limit = ib_mad_addr->hop_limit; + osm_mad_addr->addr_type.gsi.grh_info.ver_class_flow = + ib_grh_set_ver_class_flow(6, /* GRH version */ + ib_mad_addr->traffic_class, + ib_mad_addr->flow_label); + memcpy(&osm_mad_addr->addr_type.gsi.grh_info.dest_gid, + &ib_mad_addr->gid, 16); + } +} + +static void *swap_mad_bufs(osm_madw_t * p_madw, void *umad) +{ + void *old; + + old = p_madw->vend_wrap.umad; + p_madw->vend_wrap.umad = umad; + p_madw->p_mad = umad_get_mad(umad); + + return old; +} + +static void unlock_mutex(void *arg) +{ + pthread_mutex_unlock(arg); +} + +static void *umad_receiver(void *p_ptr) +{ + umad_receiver_t *const p_ur = (umad_receiver_t *) p_ptr; + osm_vendor_t *p_vend = p_ur->p_vend; + osm_umad_bind_info_t *p_bind; + osm_mad_addr_t osm_addr; + osm_madw_t *p_madw, *p_req_madw; + ib_mad_t *p_mad, *p_req_mad; + void *umad = 0; + int mad_agent, length; + + OSM_LOG_ENTER(p_ur->p_log); + + for (;;) { + if (!umad && + !(umad = umad_alloc(1, umad_size() + MAD_BLOCK_SIZE))) { + OSM_LOG(p_ur->p_log, OSM_LOG_ERROR, "ERR 5403: " + "can't alloc MAD sized umad\n"); + break; + } + + length = MAD_BLOCK_SIZE; + if ((mad_agent = umad_recv(p_vend->umad_port_id, umad, + &length, -1)) < 0) { + if (length <= MAD_BLOCK_SIZE) { + OSM_LOG(p_ur->p_log, OSM_LOG_ERROR, "ERR 5404: " + "recv error on MAD sized umad (%m)\n"); + continue; + } else { + umad_free(umad); + /* Need a larger buffer for RMPP */ + umad = umad_alloc(1, umad_size() + length); + if (!umad) { + OSM_LOG(p_ur->p_log, OSM_LOG_ERROR, + "ERR 5405: " + "can't alloc umad length %d\n", + length); + continue; + } + + if ((mad_agent = umad_recv(p_vend->umad_port_id, + umad, &length, + -1)) < 0) { + OSM_LOG(p_ur->p_log, OSM_LOG_ERROR, + "ERR 5406: " + "recv error on umad length %d (%m)\n", + length); + continue; + } + } + } + + if (mad_agent >= OSM_UMAD_MAX_AGENTS || + !(p_bind = p_vend->agents[mad_agent])) { + OSM_LOG(p_ur->p_log, OSM_LOG_ERROR, "ERR 5407: " + "invalid mad agent %d - dropping\n", mad_agent); + continue; + } + + p_mad = (ib_mad_t *) umad_get_mad(umad); + + ib_mad_addr_conv(umad, &osm_addr, + p_mad->mgmt_class == IB_MCLASS_SUBN_LID || + p_mad->mgmt_class == IB_MCLASS_SUBN_DIR); + + if (!(p_madw = osm_mad_pool_get(p_bind->p_mad_pool, + (osm_bind_handle_t) p_bind, + MAX(length, MAD_BLOCK_SIZE), + &osm_addr))) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5408: " + "request for a new madw failed -- dropping packet\n"); + continue; + } + + /* Need to fix up MAD size if short RMPP packet */ + if (length < MAD_BLOCK_SIZE) + p_madw->mad_size = length; + + /* + * Avoid copying by swapping mad buf pointers. + * Do not use umad after this line of code. + */ + umad = swap_mad_bufs(p_madw, umad); + + /* if status != 0 then we are handling recv timeout on send */ + if (umad_status(p_madw->vend_wrap.umad)) { + if (!(p_req_madw = get_madw(p_vend, &p_mad->trans_id, + p_mad->mgmt_class))) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, + "ERR 5412: " + "Failed to obtain request madw for timed out MAD" + " (class=0x%X method=0x%X attr=0x%X tid=0x%"PRIx64") -- dropping\n", + p_mad->mgmt_class, p_mad->method, + cl_ntoh16(p_mad->attr_id), + cl_ntoh64(p_mad->trans_id)); + } else { + p_req_madw->status = IB_TIMEOUT; + log_send_error(p_vend, p_req_madw); + /* cb frees req_madw */ + pthread_mutex_lock(&p_vend->cb_mutex); + pthread_cleanup_push(unlock_mutex, + &p_vend->cb_mutex); + (*p_bind->send_err_callback) (p_bind-> + client_context, + p_req_madw); + pthread_cleanup_pop(1); + } + + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + continue; + } + + p_req_madw = 0; + if (ib_mad_is_response(p_mad)) { + p_req_madw = get_madw(p_vend, &p_mad->trans_id, + p_mad->mgmt_class); + if (PF(!p_req_madw)) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, + "ERR 5413: Failed to obtain request " + "madw for received MAD " + "(class=0x%X method=0x%X attr=0x%X " + "tid=0x%"PRIx64") -- dropping\n", + p_mad->mgmt_class, p_mad->method, + cl_ntoh16(p_mad->attr_id), + cl_ntoh64(p_mad->trans_id)); + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + continue; + } + + /* + * Check that request MAD was really a request, + * and make sure that attribute ID, attribute + * modifier and transaction ID are the same in + * request and response. + * + * Exception for o15-0.2-1.11: + * SA response to a SubnAdmGetMulti() containing a + * MultiPathRecord shall have PathRecord attribute ID. + */ + p_req_mad = osm_madw_get_mad_ptr(p_req_madw); + if (PF(ib_mad_is_response(p_req_mad) || + (p_mad->attr_id != p_req_mad->attr_id && + !(p_mad->mgmt_class == IB_MCLASS_SUBN_ADM && + p_req_mad->attr_id == + IB_MAD_ATTR_MULTIPATH_RECORD && + p_mad->attr_id == IB_MAD_ATTR_PATH_RECORD)) || + p_mad->attr_mod != p_req_mad->attr_mod || + p_mad->trans_id != p_req_mad->trans_id)) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, + "ERR 541A: " + "Response MAD validation failed " + "(request attr=0x%X modif=0x%X " + "tid=0x%"PRIx64", " + "response attr=0x%X modif=0x%X " + "tid=0x%"PRIx64") -- dropping\n", + cl_ntoh16(p_req_mad->attr_id), + cl_ntoh32(p_req_mad->attr_mod), + cl_ntoh64(p_req_mad->trans_id), + cl_ntoh16(p_mad->attr_id), + cl_ntoh32(p_mad->attr_mod), + cl_ntoh64(p_mad->trans_id)); + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + continue; + } + } + +#ifndef VENDOR_RMPP_SUPPORT + if ((p_mad->mgmt_class != IB_MCLASS_SUBN_DIR) && + (p_mad->mgmt_class != IB_MCLASS_SUBN_LID) && + (ib_rmpp_is_flag_set((ib_rmpp_mad_t *) p_mad, + IB_RMPP_FLAG_ACTIVE))) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5414: " + "class 0x%x method 0x%x RMPP version %d type " + "%d flags 0x%x received -- dropping\n", + p_mad->mgmt_class, p_mad->method, + ((ib_rmpp_mad_t *) p_mad)->rmpp_version, + ((ib_rmpp_mad_t *) p_mad)->rmpp_type, + ((ib_rmpp_mad_t *) p_mad)->rmpp_flags); + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + continue; + } +#endif + + /* call the CB */ + pthread_mutex_lock(&p_vend->cb_mutex); + pthread_cleanup_push(unlock_mutex, &p_vend->cb_mutex); + (*p_bind->mad_recv_callback) (p_madw, p_bind->client_context, + p_req_madw); + pthread_cleanup_pop(1); + } + + OSM_LOG_EXIT(p_vend->p_log); + return NULL; +} + +static int umad_receiver_start(osm_vendor_t * p_vend) +{ + umad_receiver_t *p_ur = p_vend->receiver; + + p_ur->p_vend = p_vend; + p_ur->p_log = p_vend->p_log; + + if (pthread_create(&p_ur->tid, NULL, umad_receiver, p_ur) != 0) + return -1; + + return 0; +} + +static void umad_receiver_stop(umad_receiver_t * p_ur) +{ + pthread_cancel(p_ur->tid); + pthread_join(p_ur->tid, NULL); + p_ur->tid = 0; + p_ur->p_vend = NULL; + p_ur->p_log = NULL; +} + +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + char *max = NULL; + int r, n_cas; + + OSM_LOG_ENTER(p_log); + + p_vend->p_log = p_log; + p_vend->timeout = timeout; + p_vend->max_retries = OSM_DEFAULT_RETRY_COUNT; + pthread_mutex_init(&p_vend->cb_mutex, NULL); + pthread_mutex_init(&p_vend->match_tbl_mutex, NULL); + p_vend->umad_port_id = -1; + p_vend->issmfd = -1; + + /* + * Open our instance of UMAD. + */ + if ((r = umad_init()) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, + "ERR 5415: Error opening UMAD\n"); + } + + if ((n_cas = umad_get_cas_names(p_vend->ca_names, + OSM_UMAD_MAX_CAS)) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, + "ERR 5416: umad_get_cas_names failed\n"); + r = n_cas; + goto Exit; + } + + p_vend->ca_count = n_cas; + p_vend->mtbl.max = DEFAULT_OSM_UMAD_MAX_PENDING; + + if ((max = getenv("OSM_UMAD_MAX_PENDING")) != NULL) { + int tmp = strtol(max, NULL, 0); + if (tmp > 0) + p_vend->mtbl.max = tmp; + else + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "Error:" + "OSM_UMAD_MAX_PENDING=%d is invalid\n", + tmp); + } + + OSM_LOG(p_vend->p_log, OSM_LOG_INFO, "%d pending umads specified\n", + p_vend->mtbl.max); + + p_vend->mtbl.tbl = calloc(p_vend->mtbl.max, sizeof(*(p_vend->mtbl.tbl))); + if (!p_vend->mtbl.tbl) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "Error:" + "failed to allocate vendor match table\n"); + r = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_log); + return (r); +} + +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + osm_vendor_t *p_vend = NULL; + + OSM_LOG_ENTER(p_log); + + if (!timeout) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5433: " + "transaction timeout cannot be 0\n"); + goto Exit; + } + + p_vend = malloc(sizeof(*p_vend)); + if (p_vend == NULL) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5417: " + "Unable to allocate vendor object\n"); + goto Exit; + } + + memset(p_vend, 0, sizeof(*p_vend)); + + if (osm_vendor_init(p_vend, p_log, timeout) != IB_SUCCESS) { + free(p_vend); + p_vend = NULL; + } + +Exit: + OSM_LOG_EXIT(p_log); + return (p_vend); +} + +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + osm_vendor_close_port(*pp_vend); + + clear_madw(*pp_vend); + /* make sure all ports are closed */ + umad_done(); + + pthread_mutex_destroy(&(*pp_vend)->cb_mutex); + pthread_mutex_destroy(&(*pp_vend)->match_tbl_mutex); + free((*pp_vend)->mtbl.tbl); + free(*pp_vend); + *pp_vend = NULL; +} + +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports) +{ + umad_ca_t ca; + ib_port_attr_t *attr = p_attr_array; + unsigned done = 0; + int r = 0, i, j, k; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend && p_num_ports); + + if (!*p_num_ports) { + r = IB_INVALID_PARAMETER; + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5418: " + "Ports in should be > 0\n"); + goto Exit; + } + + if (!p_attr_array) { + r = IB_INSUFFICIENT_MEMORY; + *p_num_ports = 0; + goto Exit; + } + + for (i = 0; i < p_vend->ca_count && !done; i++) { + /* For each CA, retrieve the port attributes */ + if (umad_get_ca(p_vend->ca_names[i], &ca) == 0) { + if (ca.node_type < 1 || ca.node_type > 3) + continue; + for (j = 0; j <= ca.numports; j++) { + if (!ca.ports[j]) + continue; + attr->port_guid = ca.ports[j]->port_guid; + attr->lid = ca.ports[j]->base_lid; + attr->port_num = ca.ports[j]->portnum; + attr->sm_lid = ca.ports[j]->sm_lid; + attr->sm_sl = ca.ports[j]->sm_sl; + attr->link_state = ca.ports[j]->state; + if (attr->num_pkeys && attr->p_pkey_table) { + if (attr->num_pkeys > ca.ports[j]->pkeys_size) + attr->num_pkeys = ca.ports[j]->pkeys_size; + for (k = 0; k < attr->num_pkeys; k++) + attr->p_pkey_table[k] = + cl_hton16(ca.ports[j]->pkeys[k]); + } + attr->num_pkeys = ca.ports[j]->pkeys_size; + if (attr->num_gids && attr->p_gid_table) { + attr->p_gid_table[0].unicast.prefix = cl_hton64(ca.ports[j]->gid_prefix); + attr->p_gid_table[0].unicast.interface_id = cl_hton64(ca.ports[j]->port_guid); + attr->num_gids = 1; + } + attr++; + if (attr - p_attr_array > *p_num_ports) { + done = 1; + break; + } + } + umad_release_ca(&ca); + } + } + + *p_num_ports = attr - p_attr_array; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return r; +} + +static int +osm_vendor_open_port(IN osm_vendor_t * const p_vend, + IN const ib_net64_t port_guid) +{ + __be64 portguids[OSM_UMAD_MAX_PORTS_PER_CA + 1]; + umad_ca_t umad_ca; + int i = 0, umad_port_id = -1; + char *name; + int ca, r; + + CL_ASSERT(p_vend); + + OSM_LOG_ENTER(p_vend->p_log); + + if (p_vend->umad_port_id >= 0) { + umad_port_id = p_vend->umad_port_id; + goto Exit; + } + + if (!port_guid) { + name = NULL; + i = 0; + goto _found; + } + + for (ca = 0; ca < p_vend->ca_count; ca++) { + if ((r = umad_get_ca_portguids(p_vend->ca_names[ca], portguids, + OSM_UMAD_MAX_PORTS_PER_CA + 1)) < 0) { +#ifdef __WIN__ + OSM_LOG(p_vend->p_log, OSM_LOG_VERBOSE, +#else + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5421: " +#endif + "Unable to get CA %s port guids (%s)\n", + p_vend->ca_names[ca], strerror(r)); + continue; + } + for (i = 0; i < r; i++) + if (port_guid == portguids[i]) { + name = p_vend->ca_names[ca]; + goto _found; + } + } + + /* + * No local CA owns this guid! + */ + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5422: " + "Unable to find requested CA guid 0x%" PRIx64 "\n", + cl_ntoh64(port_guid)); + goto Exit; + +_found: + /* Validate that node is an IB node type (not iWARP) */ + if (umad_get_ca(name, &umad_ca) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 542A: " + "umad_get_ca() failed\n"); + goto Exit; + } + + if (umad_ca.node_type < 1 || umad_ca.node_type > 3) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 542D: " + "Type %d of node \'%s\' is not an IB node type\n", + umad_ca.node_type, umad_ca.ca_name); + fprintf(stderr, + "Type %d of node \'%s\' is not an IB node type\n", + umad_ca.node_type, umad_ca.ca_name); + umad_release_ca(&umad_ca); + goto Exit; + } + umad_release_ca(&umad_ca); + + /* Port found, try to open it */ + if (umad_get_port(name, i, &p_vend->umad_port) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 542B: " + "umad_get_port() failed\n"); + goto Exit; + } + + if ((umad_port_id = umad_open_port(p_vend->umad_port.ca_name, + p_vend->umad_port.portnum)) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 542C: " + "umad_open_port() failed\n"); + goto Exit; + } + + p_vend->umad_port_id = umad_port_id; + + /* start receiver thread */ + if (!(p_vend->receiver = calloc(1, sizeof(umad_receiver_t)))) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5423: " + "Unable to alloc receiver struct\n"); + umad_close_port(umad_port_id); + umad_release_port(&p_vend->umad_port); + p_vend->umad_port.port_guid = 0; + p_vend->umad_port_id = umad_port_id = -1; + goto Exit; + } + if (umad_receiver_start(p_vend) != 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5420: " + "umad_receiver_init failed\n"); + umad_close_port(umad_port_id); + umad_release_port(&p_vend->umad_port); + p_vend->umad_port.port_guid = 0; + p_vend->umad_port_id = umad_port_id = -1; + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return umad_port_id; +} + +static void osm_vendor_close_port(osm_vendor_t * const p_vend) +{ + umad_receiver_t *p_ur; + int i; + + p_ur = p_vend->receiver; + p_vend->receiver = NULL; + if (p_ur) { + umad_receiver_stop(p_ur); + free(p_ur); + } + + if (p_vend->umad_port_id >= 0) { + for (i = 0; i < OSM_UMAD_MAX_AGENTS; i++) + if (p_vend->agents[i]) + umad_unregister(p_vend->umad_port_id, i); + umad_close_port(p_vend->umad_port_id); + umad_release_port(&p_vend->umad_port); + p_vend->umad_port.port_guid = 0; + p_vend->umad_port_id = -1; + } +} + +static int set_bit(int nr, void *method_mask) +{ + long mask, *addr = method_mask; + int retval; + + addr += nr / (8 * sizeof(long)); + mask = 1L << (nr % (8 * sizeof(long))); + retval = (mask & *addr) != 0; + *addr |= mask; + return retval; +} + +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_user_bind, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN osm_vend_mad_send_err_callback_t send_err_callback, + IN void *context) +{ + ib_net64_t port_guid; + osm_umad_bind_info_t *p_bind = 0; + long method_mask[16 / sizeof(long)]; + int umad_port_id; + uint8_t rmpp_version; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_user_bind); + CL_ASSERT(p_mad_pool); + CL_ASSERT(mad_recv_callback); + CL_ASSERT(send_err_callback); + + port_guid = p_user_bind->port_guid; + + OSM_LOG(p_vend->p_log, OSM_LOG_INFO, + "Mgmt class 0x%02x binding to port GUID 0x%" PRIx64 "\n", + p_user_bind->mad_class, cl_ntoh64(port_guid)); + + if ((umad_port_id = osm_vendor_open_port(p_vend, port_guid)) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5424: " + "Unable to open port 0x%" PRIx64 "\n", + cl_ntoh64(port_guid)); + goto Exit; + } + + if (umad_get_issm_path(p_vend->umad_port.ca_name, + p_vend->umad_port.portnum, + p_vend->issm_path, + sizeof(p_vend->issm_path)) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 542E: " + "Cannot resolve issm path for port %s:%u\n", + p_vend->umad_port.ca_name, p_vend->umad_port.portnum); + goto Exit; + } + + if (!(p_bind = malloc(sizeof(*p_bind)))) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5425: " + "Unable to allocate internal bind object\n"); + goto Exit; + } + + memset(p_bind, 0, sizeof(*p_bind)); + p_bind->p_vend = p_vend; + p_bind->port_id = umad_port_id; + p_bind->client_context = context; + p_bind->mad_recv_callback = mad_recv_callback; + p_bind->send_err_callback = send_err_callback; + p_bind->p_mad_pool = p_mad_pool; + p_bind->port_guid = port_guid; + p_bind->timeout = p_user_bind->timeout ? p_user_bind->timeout : + p_vend->timeout; + p_bind->max_retries = p_user_bind->retries ? p_user_bind->retries : + p_vend->max_retries; + + memset(method_mask, 0, sizeof method_mask); + if (p_user_bind->is_responder) { + set_bit(IB_MAD_METHOD_GET, &method_mask); + set_bit(IB_MAD_METHOD_SET, &method_mask); + if (p_user_bind->mad_class == IB_MCLASS_SUBN_ADM) { + set_bit(IB_MAD_METHOD_GETTABLE, &method_mask); + set_bit(IB_MAD_METHOD_DELETE, &method_mask); +#ifdef DUAL_SIDED_RMPP + set_bit(IB_MAD_METHOD_GETMULTI, &method_mask); +#endif + /* Add in IB_MAD_METHOD_GETTRACETABLE */ + /* when supported by OpenSM */ + } + } + if (p_user_bind->is_report_processor) + set_bit(IB_MAD_METHOD_REPORT, &method_mask); + if (p_user_bind->is_trap_processor) { + set_bit(IB_MAD_METHOD_TRAP, &method_mask); + set_bit(IB_MAD_METHOD_TRAP_REPRESS, &method_mask); + } +#ifndef VENDOR_RMPP_SUPPORT + rmpp_version = 0; +#else + /* If SA class, set rmpp_version */ + if (p_user_bind->mad_class == IB_MCLASS_SUBN_ADM) + rmpp_version = 1; + else + rmpp_version = 0; +#endif + + if ((p_bind->agent_id = umad_register(p_vend->umad_port_id, + p_user_bind->mad_class, + p_user_bind->class_version, + rmpp_version, method_mask)) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5426: " + "Unable to register class %u version %u\n", + p_user_bind->mad_class, p_user_bind->class_version); + free(p_bind); + p_bind = 0; + goto Exit; + } + + if (p_bind->agent_id >= OSM_UMAD_MAX_AGENTS || + p_vend->agents[p_bind->agent_id]) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5427: " + "bad agent id %u or duplicate agent for class %u vers %u\n", + p_bind->agent_id, p_user_bind->mad_class, + p_user_bind->class_version); + free(p_bind); + p_bind = 0; + goto Exit; + } + + p_vend->agents[p_bind->agent_id] = p_bind; + + /* If Subn Directed Route class, register Subn LID routed class */ + if (p_user_bind->mad_class == IB_MCLASS_SUBN_DIR) { + if ((p_bind->agent_id1 = umad_register(p_vend->umad_port_id, + IB_MCLASS_SUBN_LID, + p_user_bind-> + class_version, 0, + method_mask)) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5428: " + "Unable to register class 1 version %u\n", + p_user_bind->class_version); + free(p_bind); + p_bind = 0; + goto Exit; + } + + if (p_bind->agent_id1 >= OSM_UMAD_MAX_AGENTS || + p_vend->agents[p_bind->agent_id1]) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5429: " + "bad agent id %u or duplicate agent for class 1 vers %u\n", + p_bind->agent_id1, p_user_bind->class_version); + free(p_bind); + p_bind = 0; + goto Exit; + } + + p_vend->agents[p_bind->agent_id1] = p_bind; + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return ((osm_bind_handle_t) p_bind); +} + +static void +__osm_vendor_recv_dummy_cb(IN osm_madw_t * p_madw, + IN void *bind_context, IN osm_madw_t * p_req_madw) +{ +#ifdef _DEBUG_ + fprintf(stderr, + "__osm_vendor_recv_dummy_cb: Ignoring received MAD after osm_vendor_unbind\n"); +#endif +} + +static void +__osm_vendor_send_err_dummy_cb(IN void *bind_context, + IN osm_madw_t * p_req_madw) +{ +#ifdef _DEBUG_ + fprintf(stderr, + "__osm_vendor_send_err_dummy_cb: Ignoring send error after osm_vendor_unbind\n"); +#endif +} + +void osm_vendor_unbind(IN osm_bind_handle_t h_bind) +{ + osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + pthread_mutex_lock(&p_vend->cb_mutex); + p_bind->mad_recv_callback = __osm_vendor_recv_dummy_cb; + p_bind->send_err_callback = __osm_vendor_send_err_dummy_cb; + pthread_mutex_unlock(&p_vend->cb_mutex); + + OSM_LOG_EXIT(p_vend->p_log); +} + +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * const p_vw) +{ + osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, + "Acquiring UMAD for p_madw = %p, size = %u\n", p_vw, mad_size); + CL_ASSERT(p_vw); + p_vw->size = mad_size; + p_vw->umad = umad_alloc(1, mad_size + umad_size()); + + /* track locally */ + p_vw->h_bind = h_bind; + + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, + "Acquired UMAD %p, size = %u\n", p_vw->umad, p_vw->size); + + OSM_LOG_EXIT(p_vend->p_log); + return (p_vw->umad ? umad_get_mad(p_vw->umad) : NULL); +} + +void +osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) +{ + osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, "Retiring UMAD %p\n", p_vw->umad); + + /* + * We moved the removal of the transaction to immediately after + * it was looked up. + */ + + /* free the mad but the wrapper is part of the madw object */ + umad_free(p_vw->umad); + p_vw->umad = 0; + p_madw = PARENT_STRUCT(p_vw, osm_madw_t, vend_wrap); + p_madw->p_mad = NULL; + + OSM_LOG_EXIT(p_vend->p_log); +} + +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, IN boolean_t const resp_expected) +{ + osm_umad_bind_info_t *const p_bind = h_bind; + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); + osm_mad_addr_t *const p_mad_addr = osm_madw_get_mad_addr_ptr(p_madw); + ib_mad_t *const p_mad = osm_madw_get_mad_ptr(p_madw); + ib_sa_mad_t *const p_sa = (ib_sa_mad_t *) p_mad; + ib_mad_addr_t mad_addr; + int ret = -1; + int __attribute__((__unused__)) is_rmpp = 0; + uint32_t sent_mad_size, timeout = 0; + uint64_t tid; +#ifndef VENDOR_RMPP_SUPPORT + uint32_t paylen = 0; +#endif + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw->h_bind == h_bind); + CL_ASSERT(p_mad == umad_get_mad(p_vw->umad)); + + if (p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) { + umad_set_addr_net(p_vw->umad, 0xffff, 0, 0, 0); + umad_set_grh(p_vw->umad, NULL); + goto Resp; + } + if (p_mad->mgmt_class == IB_MCLASS_SUBN_LID) { + umad_set_addr_net(p_vw->umad, p_mad_addr->dest_lid, 0, 0, 0); + umad_set_grh(p_vw->umad, NULL); + goto Resp; + } + /* GS classes */ + umad_set_addr_net(p_vw->umad, p_mad_addr->dest_lid, + p_mad_addr->addr_type.gsi.remote_qp, + p_mad_addr->addr_type.gsi.service_level, + IB_QP1_WELL_KNOWN_Q_KEY); + if (p_mad_addr->addr_type.gsi.global_route) { + mad_addr.grh_present = 1; + mad_addr.gid_index = 0; + mad_addr.hop_limit = p_mad_addr->addr_type.gsi.grh_info.hop_limit; + ib_grh_get_ver_class_flow(p_mad_addr->addr_type.gsi.grh_info.ver_class_flow, + NULL, &mad_addr.traffic_class, + &mad_addr.flow_label); + memcpy(&mad_addr.gid, &p_mad_addr->addr_type.gsi.grh_info.dest_gid, 16); + umad_set_grh(p_vw->umad, &mad_addr); + } else + umad_set_grh(p_vw->umad, NULL); + umad_set_pkey(p_vw->umad, p_mad_addr->addr_type.gsi.pkey_ix); + if (ib_class_is_rmpp(p_mad->mgmt_class)) { /* RMPP GS classes */ + if (!ib_rmpp_is_flag_set((ib_rmpp_mad_t *) p_sa, + IB_RMPP_FLAG_ACTIVE)) { + /* Clear RMPP header when RMPP not ACTIVE */ + p_sa->rmpp_version = 0; + p_sa->rmpp_type = 0; + p_sa->rmpp_flags = 0; + p_sa->rmpp_status = 0; +#ifdef VENDOR_RMPP_SUPPORT + } else + is_rmpp = 1; + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, "RMPP %d length %d\n", + ib_rmpp_is_flag_set((ib_rmpp_mad_t *) p_sa, + IB_RMPP_FLAG_ACTIVE), + p_madw->mad_size); +#else + } else { + p_sa->rmpp_version = 1; + p_sa->seg_num = cl_ntoh32(1); /* first DATA is seg 1 */ + p_sa->rmpp_flags |= (uint8_t) 0x70; /* RRespTime of 14 (high 5 bits) */ + p_sa->rmpp_status = 0; + paylen = p_madw->mad_size - IB_SA_MAD_HDR_SIZE; + paylen += (IB_SA_MAD_HDR_SIZE - MAD_RMPP_HDR_SIZE); + p_sa->paylen_newwin = cl_ntoh32(paylen); + } +#endif + } + +Resp: + if (resp_expected) + put_madw(p_vend, p_madw, p_mad->trans_id, p_mad->mgmt_class); + +#ifdef VENDOR_RMPP_SUPPORT + sent_mad_size = p_madw->mad_size; +#else + sent_mad_size = is_rmpp ? p_madw->mad_size - IB_SA_MAD_HDR_SIZE : + p_madw->mad_size; +#endif + tid = cl_ntoh64(p_mad->trans_id); + if (resp_expected) { + if (p_madw->timeout) + timeout = p_madw->timeout; + else + timeout = p_bind->timeout; + } + if ((ret = umad_send(p_bind->port_id, p_bind->agent_id, p_vw->umad, + sent_mad_size, timeout, + p_bind->max_retries)) < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5430: " + "Send p_madw = %p of size %d, Class 0x%x, Method 0x%X, " + "Attr 0x%X, TID 0x%" PRIx64 " failed %d (%m)\n", + p_madw, sent_mad_size, p_mad->mgmt_class, + p_mad->method, cl_ntoh16(p_mad->attr_id), tid, ret); + if (resp_expected) { + get_madw(p_vend, &p_mad->trans_id, + p_mad->mgmt_class); /* remove from aging table */ + p_madw->status = IB_ERROR; + pthread_mutex_lock(&p_vend->cb_mutex); + (*p_bind->send_err_callback) (p_bind->client_context, p_madw); /* cb frees madw */ + pthread_mutex_unlock(&p_vend->cb_mutex); + } else + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + goto Exit; + } + + if (!resp_expected) + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, "Completed sending %s TID 0x%" PRIx64 "\n", + resp_expected ? "request" : "response or unsolicited", tid); +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (ret); +} + +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) +{ + osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + ; + OSM_LOG_EXIT(p_vend->p_log); + return (0); +} + +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + if (TRUE == is_sm_val) { + p_vend->issmfd = open(p_vend->issm_path, O_NONBLOCK); + if (p_vend->issmfd < 0) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5431: " + "setting IS_SM capmask: cannot open file " + "\'%s\': %s\n", + p_vend->issm_path, strerror(errno)); + p_vend->issmfd = -1; + } + } else if (p_vend->issmfd != -1) { + if (0 != close(p_vend->issmfd)) + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 5432: " + "clearing IS_SM capmask: cannot close: %s\n", + strerror(errno)); + p_vend->issmfd = -1; + } + OSM_LOG_EXIT(p_vend->p_log); +} + +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level) +{ + umad_debug(level); +} + +#endif /* OSM_VENDOR_INTF_OPENIB */ diff --git a/libvendor/osm_vendor_ibumad_sa.c b/libvendor/osm_vendor_ibumad_sa.c new file mode 100644 index 0000000..8c884b1 --- /dev/null +++ b/libvendor/osm_vendor_ibumad_sa.c @@ -0,0 +1,725 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007,2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include + +/* this struct is the internal rep of the bind handle */ +typedef struct _osmv_sa_bind_info { + osm_bind_handle_t h_bind; + osm_log_t *p_log; + osm_vendor_t *p_vendor; + osm_mad_pool_t *p_mad_pool; + cl_event_t sync_event; + time_t last_lids_update_sec; +} osmv_sa_bind_info_t; + +/* + Call back on new mad received: + + We basically only need to set the context of the query. + Or report an error. + + A pointer to the actual context of the request (a copy of the oriignal + request structure) is attached as the p_madw->context.ni_context.node_guid +*/ +static void +__osmv_sa_mad_rcv_cb(IN osm_madw_t * p_madw, + IN void *bind_context, IN osm_madw_t * p_req_madw) +{ + osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) bind_context; + osmv_query_req_t *p_query_req_copy = NULL; + osmv_query_res_t query_res; + ib_sa_mad_t *p_sa_mad; + ib_net16_t mad_status; + + OSM_LOG_ENTER(p_bind->p_log); + + if (!p_req_madw) { + OSM_LOG(p_bind->p_log, OSM_LOG_DEBUG, + "Ignoring a non-response mad\n"); + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + goto Exit; + } + + /* obtain the sent context since we store it during send in the ni_ctx */ + p_query_req_copy = (osmv_query_req_t *) + (uintptr_t)(osm_madw_get_ni_context_ptr(p_req_madw)->node_guid); + + /* provide the context of the original request in the result */ + query_res.query_context = p_query_req_copy->query_context; + + /* provide the resulting madw */ + query_res.p_result_madw = p_madw; + + /* update the req fields */ + p_sa_mad = (ib_sa_mad_t *) p_madw->p_mad; + + /* if we got a remote error track it in the status */ + mad_status = (ib_net16_t) (p_sa_mad->status & IB_SMP_STATUS_MASK); + if (mad_status != IB_SUCCESS) { + OSM_LOG(p_bind->p_log, OSM_LOG_ERROR, "ERR 5501: " + "Remote error: 0x%04X\n", cl_ntoh16(mad_status)); + query_res.status = IB_REMOTE_ERROR; + } else + query_res.status = IB_SUCCESS; + + /* what if we have got back an empty mad ? */ + if (!p_madw->mad_size) { + OSM_LOG(p_bind->p_log, OSM_LOG_ERROR, "ERR 5502: " + "Got an empty mad\n"); + query_res.status = IB_ERROR; + } + + if (IB_SUCCESS == mad_status) { + + /* if we are in not in a method response of an rmpp nature we must get only 1 */ + /* HACK: in the future we might need to be smarter for other methods... */ + if (p_sa_mad->method != IB_MAD_METHOD_GETTABLE_RESP) { + query_res.result_cnt = 1; + } else { +#ifndef VENDOR_RMPP_SUPPORT + if (mad_status != IB_SUCCESS) + query_res.result_cnt = 0; + else + query_res.result_cnt = 1; +#else + if (ib_get_attr_size(p_sa_mad->attr_offset)) { + /* we used the offset value to calculate the + number of records in here */ + query_res.result_cnt = + (p_madw->mad_size - IB_SA_MAD_HDR_SIZE) / + ib_get_attr_size(p_sa_mad->attr_offset); + OSM_LOG(p_bind->p_log, OSM_LOG_DEBUG, + "Count = %u = %zu / %u (%zu)\n", + query_res.result_cnt, + p_madw->mad_size - IB_SA_MAD_HDR_SIZE, + ib_get_attr_size(p_sa_mad->attr_offset), + (p_madw->mad_size - + IB_SA_MAD_HDR_SIZE) % + ib_get_attr_size(p_sa_mad->attr_offset)); + } else + query_res.result_cnt = 0; +#endif + } + } + + query_res.query_type = p_query_req_copy->query_type; + + p_query_req_copy->pfn_query_cb(&query_res); + + if ((p_query_req_copy->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC) + cl_event_signal(&p_bind->sync_event); + +Exit: + + /* free the copied query request if found */ + if (p_query_req_copy) + free(p_query_req_copy); + + /* put back the request madw */ + if (p_req_madw) + osm_mad_pool_put(p_bind->p_mad_pool, p_req_madw); + + OSM_LOG_EXIT(p_bind->p_log); +} + +/* + Send Error Callback: + + Only report the error and get rid of the mad wrapper +*/ +static void __osmv_sa_mad_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) +{ + osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) bind_context; + osmv_query_req_t *p_query_req_copy = NULL; + osmv_query_res_t query_res; + + OSM_LOG_ENTER(p_bind->p_log); + + /* Obtain the sent context etc */ + p_query_req_copy = (osmv_query_req_t *) + (uintptr_t)(osm_madw_get_ni_context_ptr(p_madw)->node_guid); + + /* provide the context of the original request in the result */ + query_res.query_context = p_query_req_copy->query_context; + + query_res.p_result_madw = p_madw; + + query_res.status = IB_TIMEOUT; + query_res.result_cnt = 0; + + query_res.query_type = p_query_req_copy->query_type; + + p_query_req_copy->pfn_query_cb(&query_res); + + if ((p_query_req_copy->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC) + cl_event_signal(&p_bind->sync_event); + + free(p_query_req_copy); + OSM_LOG_EXIT(p_bind->p_log); +} + +/***************************************************************************** + Update lids of vendor umad_port. + *****************************************************************************/ +static ib_api_status_t update_umad_port(osm_vendor_t * p_vend) +{ + umad_port_t port; + if (umad_get_port(p_vend->umad_port.ca_name, + p_vend->umad_port.portnum, &port) < 0) + return IB_ERROR; + p_vend->umad_port.base_lid = port.base_lid; + p_vend->umad_port.sm_lid = port.sm_lid; + umad_release_port(&port); + return IB_SUCCESS; +} + +osm_bind_handle_t +osmv_bind_sa(IN osm_vendor_t * const p_vend, + IN osm_mad_pool_t * const p_mad_pool, IN ib_net64_t port_guid) +{ + osm_bind_info_t bind_info; + osm_log_t *p_log = p_vend->p_log; + osmv_sa_bind_info_t *p_sa_bind_info; + cl_status_t cl_status; + + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Binding to port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); + + bind_info.port_guid = port_guid; + bind_info.mad_class = IB_MCLASS_SUBN_ADM; + bind_info.class_version = 2; + bind_info.is_responder = FALSE; + bind_info.is_trap_processor = FALSE; + bind_info.is_report_processor = FALSE; + bind_info.send_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; + bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; + bind_info.timeout = p_vend->timeout; + bind_info.retries = OSM_DEFAULT_RETRY_COUNT; + + /* allocate the new sa bind info */ + p_sa_bind_info = + (osmv_sa_bind_info_t *) malloc(sizeof(osmv_sa_bind_info_t)); + if (!p_sa_bind_info) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5505: " + "Failed to allocate new bind structure\n"); + p_sa_bind_info = OSM_BIND_INVALID_HANDLE; + goto Exit; + } + + /* store some important context */ + p_sa_bind_info->p_log = p_log; + p_sa_bind_info->p_mad_pool = p_mad_pool; + p_sa_bind_info->p_vendor = p_vend; + + /* Bind to the lower level */ + p_sa_bind_info->h_bind = osm_vendor_bind(p_vend, &bind_info, p_mad_pool, __osmv_sa_mad_rcv_cb, __osmv_sa_mad_err_cb, p_sa_bind_info); /* context provided to CBs */ + + if (p_sa_bind_info->h_bind == OSM_BIND_INVALID_HANDLE) { + free(p_sa_bind_info); + p_sa_bind_info = OSM_BIND_INVALID_HANDLE; + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5506: " + "Failed to bind to vendor GSI\n"); + goto Exit; + } + + /* update time umad_port is initialized now */ + p_sa_bind_info->last_lids_update_sec = time(NULL); + + /* initialize the sync_event */ + cl_event_construct(&p_sa_bind_info->sync_event); + cl_status = cl_event_init(&p_sa_bind_info->sync_event, TRUE); + if (cl_status != CL_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5508: " + "cl_init_event failed: %s\n", ib_get_err_str(cl_status)); + free(p_sa_bind_info); + p_sa_bind_info = OSM_BIND_INVALID_HANDLE; + } + +Exit: + OSM_LOG_EXIT(p_log); + return (p_sa_bind_info); +} + +/****t* OSM Vendor SA Client/osmv_sa_mad_data + * NAME + * osmv_sa_mad_data + * + * DESCRIPTION + * Extra fields required to perform a mad query + * This struct is passed to the actual send method + * + * SYNOPSIS + */ +typedef struct _osmv_sa_mad_data { + /* MAD data. */ + uint8_t method; + ib_net16_t attr_id; + ib_net16_t attr_offset; + ib_net32_t attr_mod; + ib_net64_t comp_mask; + void *p_attr; +} osmv_sa_mad_data_t; +/* + * method + * The method of the mad to be sent + * + * attr_id + * Attribute ID + * + * attr_offset + * Offset as defined by RMPP + * + * attr_mod + * Attribute modifier + * + * comp_mask + * The component mask of the query + * + * p_attr + * A pointer to the record of the attribute to be sent. + * + *****/ + +/* Send a MAD out on the GSI interface */ +static ib_api_status_t +__osmv_send_sa_req(IN osmv_sa_bind_info_t * p_bind, + IN const osmv_sa_mad_data_t * const p_sa_mad_data, + IN const osmv_query_req_t * const p_query_req) +{ + ib_api_status_t status; + ib_mad_t *p_mad_hdr; + ib_sa_mad_t *p_sa_mad; + osm_madw_t *p_madw; + osm_log_t *p_log = p_bind->p_log; + static atomic32_t trans_id; + boolean_t sync; + osmv_query_req_t *p_query_req_copy; + uint32_t sa_size; + + OSM_LOG_ENTER(p_log); + + /* + since the sm_lid might change we obtain it every send + (actually it is cached in the bind object and refreshed + every 30sec by this proc) + */ + if (time(NULL) > p_bind->last_lids_update_sec + 30) { + status = update_umad_port(p_bind->p_vendor); + if (status != IB_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5509: " + "Failed to obtain the SM lid\n"); + goto Exit; + } + p_bind->last_lids_update_sec = time(NULL); + } + + /* Get a MAD wrapper for the send */ + p_madw = osm_mad_pool_get(p_bind->p_mad_pool, + p_bind->h_bind, MAD_BLOCK_SIZE, NULL); + + if (p_madw == NULL) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5510: " + "Unable to acquire MAD\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + /* Initialize the Sent MAD: */ + + /* Initialize the MAD buffer for the send operation. */ + p_mad_hdr = osm_madw_get_mad_ptr(p_madw); + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + /* Get a new transaction Id */ + cl_atomic_inc(&trans_id); + + /* Cleanup the MAD from any residue */ + memset(p_sa_mad, 0, MAD_BLOCK_SIZE); + + /* Initialize the standard MAD header. */ + ib_mad_init_new(p_mad_hdr, /* mad pointer */ + IB_MCLASS_SUBN_ADM, /* class */ + (uint8_t) 2, /* version */ + p_sa_mad_data->method, /* method */ + cl_hton64((uint64_t) trans_id), /* tid */ + p_sa_mad_data->attr_id, /* attr id */ + p_sa_mad_data->attr_mod /* attr mod */); + + /* Set the query information. */ + p_sa_mad->sm_key = p_query_req->sm_key; + p_sa_mad->attr_offset = 0; + p_sa_mad->comp_mask = p_sa_mad_data->comp_mask; +#ifdef DUAL_SIDED_RMPP + if (p_sa_mad->method == IB_MAD_METHOD_GETMULTI) + p_sa_mad->rmpp_flags = IB_RMPP_FLAG_ACTIVE; +#endif + if (p_sa_mad->comp_mask) { + p_sa_mad_data->attr_offset ? (sa_size = ib_get_attr_size(p_sa_mad_data->attr_offset)) : (sa_size = IB_SA_DATA_SIZE); + memcpy(p_sa_mad->data, p_sa_mad_data->p_attr, sa_size); + } + + /* + Provide the address to send to + */ + p_madw->mad_addr.dest_lid = + cl_hton16(p_bind->p_vendor->umad_port.sm_lid); + p_madw->mad_addr.addr_type.smi.source_lid = + cl_hton16(p_bind->p_vendor->umad_port.base_lid); + p_madw->mad_addr.addr_type.gsi.remote_qp = CL_HTON32(1); + p_madw->resp_expected = TRUE; + p_madw->fail_msg = CL_DISP_MSGID_NONE; + + /* + add grh + */ + if (p_query_req->with_grh) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "sending sa query with GRH " + "GID 0x%016" PRIx64 " 0x%016" PRIx64 "\n", + cl_ntoh64(p_query_req->gid.unicast.prefix), + cl_ntoh64(p_query_req->gid.unicast.interface_id)); + p_madw->mad_addr.addr_type.gsi.global_route = 1; + memset(&p_madw->mad_addr.addr_type.gsi.grh_info, 0, + sizeof(p_madw->mad_addr.addr_type.gsi.grh_info)); + memcpy(&p_madw->mad_addr.addr_type.gsi.grh_info.dest_gid, &(p_query_req->gid), 16); + } + + /* + Provide MAD context such that the call back will know what to do. + We have to keep the entire request structure so we know the CB. + Since we can not rely on the client to keep it around until + the response - we duplicate it and will later dispose it (in CB). + To store on the MADW we cast it into what opensm has: + p_madw->context.ni_context.node_guid + */ + p_query_req_copy = malloc(sizeof(*p_query_req_copy)); + if (!p_query_req_copy) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 5511: " + "Unable to acquire memory for query copy\n"); + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + *p_query_req_copy = *p_query_req; + osm_madw_get_ni_context_ptr(p_madw)->node_guid = + (ib_net64_t) (uintptr_t)p_query_req_copy; + + /* we can support async as well as sync calls */ + sync = ((p_query_req->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC); + + /* send the mad asynchronously */ + status = osm_vendor_send(osm_madw_get_bind_handle(p_madw), + p_madw, p_madw->resp_expected); + + /* if synchronous - wait on the event */ + if (sync) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Waiting for async event\n"); + cl_event_wait_on(&p_bind->sync_event, EVENT_NO_TIMEOUT, FALSE); + cl_event_reset(&p_bind->sync_event); + status = p_madw->status; + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +/* + * Query the SA based on the user's request. + */ +ib_api_status_t +osmv_query_sa(IN osm_bind_handle_t h_bind, + IN const osmv_query_req_t * const p_query_req) +{ + union { + ib_service_record_t svc_rec; + ib_node_record_t node_rec; + ib_portinfo_record_t port_info; + ib_path_rec_t path_rec; +#ifdef DUAL_SIDED_RMPP + ib_multipath_rec_t multipath_rec; +#endif + ib_class_port_info_t class_port_info; + } u; + osmv_sa_mad_data_t sa_mad_data; + osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) h_bind; + osmv_user_query_t *p_user_query; +#ifdef DUAL_SIDED_RMPP + osmv_multipath_req_t *p_mpr_req; + int i, j; +#endif + osm_log_t *p_log = p_bind->p_log; + ib_api_status_t status; + + OSM_LOG_ENTER(p_log); + + /* Set the request information. */ + sa_mad_data.method = IB_MAD_METHOD_GETTABLE; + sa_mad_data.attr_mod = 0; + sa_mad_data.attr_offset = 0; + + /* Set the MAD attributes and component mask correctly. */ + switch (p_query_req->query_type) { + + case OSMV_QUERY_USER_DEFINED: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 USER_DEFINED\n"); + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + if (p_user_query->method) + sa_mad_data.method = p_user_query->method; +#ifdef DUAL_SIDED_RMPP + if (sa_mad_data.method == IB_MAD_METHOD_GETMULTI || + sa_mad_data.method == IB_MAD_METHOD_GETTRACETABLE) + sa_mad_data.attr_offset = p_user_query->attr_offset; +#endif + sa_mad_data.attr_id = p_user_query->attr_id; + sa_mad_data.attr_mod = p_user_query->attr_mod; + sa_mad_data.comp_mask = p_user_query->comp_mask; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_ALL_SVC_RECS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 SVC_REC_BY_NAME\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + sa_mad_data.comp_mask = 0; + sa_mad_data.p_attr = &u.svc_rec; + break; + + case OSMV_QUERY_SVC_REC_BY_NAME: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 SVC_REC_BY_NAME\n"); + sa_mad_data.method = IB_MAD_METHOD_GET; + sa_mad_data.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + sa_mad_data.comp_mask = IB_SR_COMPMASK_SNAME; + sa_mad_data.p_attr = &u.svc_rec; + memcpy(u.svc_rec.service_name, p_query_req->p_query_input, + sizeof(ib_svc_name_t)); + break; + + case OSMV_QUERY_SVC_REC_BY_ID: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 SVC_REC_BY_ID\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + sa_mad_data.comp_mask = IB_SR_COMPMASK_SID; + sa_mad_data.p_attr = &u.svc_rec; + u.svc_rec.service_id = + *(ib_net64_t *) (p_query_req->p_query_input); + break; + + case OSMV_QUERY_CLASS_PORT_INFO: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 CLASS_PORT_INFO\n"); + sa_mad_data.method = IB_MAD_METHOD_GET; + sa_mad_data.attr_id = IB_MAD_ATTR_CLASS_PORT_INFO; + sa_mad_data.comp_mask = 0; + sa_mad_data.p_attr = &u.class_port_info; + break; + + case OSMV_QUERY_NODE_REC_BY_NODE_GUID: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 NODE_REC_BY_NODE_GUID\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_NODE_RECORD; + sa_mad_data.comp_mask = IB_NR_COMPMASK_NODEGUID; + sa_mad_data.p_attr = &u.node_rec; + u.node_rec.node_info.node_guid = + *(ib_net64_t *) (p_query_req->p_query_input); + break; + + case OSMV_QUERY_PORT_REC_BY_LID: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PORT_REC_BY_LID\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_PORTINFO_RECORD; + sa_mad_data.comp_mask = IB_PIR_COMPMASK_LID; + sa_mad_data.p_attr = &u.port_info; + u.port_info.lid = *(ib_net16_t *) (p_query_req->p_query_input); + break; + + case OSMV_QUERY_PORT_REC_BY_LID_AND_NUM: + sa_mad_data.method = IB_MAD_METHOD_GET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PORT_REC_BY_LID_AND_NUM\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_PORTINFO_RECORD; + sa_mad_data.comp_mask = + IB_PIR_COMPMASK_LID | IB_PIR_COMPMASK_PORTNUM; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK: + sa_mad_data.method = IB_MAD_METHOD_GET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_VLARB_RECORD; + sa_mad_data.comp_mask = + IB_VLA_COMPMASK_LID | IB_VLA_COMPMASK_OUT_PORT | + IB_VLA_COMPMASK_BLOCK; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_SLVL_BY_LID_AND_PORTS: + sa_mad_data.method = IB_MAD_METHOD_GET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_SLVL_RECORD; + sa_mad_data.comp_mask = + IB_SLVL_COMPMASK_LID | IB_SLVL_COMPMASK_OUT_PORT | + IB_SLVL_COMPMASK_IN_PORT; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_PATH_REC_BY_PORT_GUIDS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PATH_REC_BY_PORT_GUIDS\n"); + memset(&u.path_rec, 0, sizeof(ib_path_rec_t)); + sa_mad_data.attr_id = IB_MAD_ATTR_PATH_RECORD; + sa_mad_data.comp_mask = + (IB_PR_COMPMASK_DGID | IB_PR_COMPMASK_SGID | IB_PR_COMPMASK_NUMBPATH); + u.path_rec.num_path = 0x7f; + sa_mad_data.p_attr = &u.path_rec; + ib_gid_set_default(&u.path_rec.dgid, + ((osmv_guid_pair_t *) (p_query_req-> + p_query_input))-> + dest_guid); + ib_gid_set_default(&u.path_rec.sgid, + ((osmv_guid_pair_t *) (p_query_req-> + p_query_input))-> + src_guid); + break; + + case OSMV_QUERY_PATH_REC_BY_GIDS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PATH_REC_BY_GIDS\n"); + memset(&u.path_rec, 0, sizeof(ib_path_rec_t)); + sa_mad_data.attr_id = IB_MAD_ATTR_PATH_RECORD; + sa_mad_data.comp_mask = + (IB_PR_COMPMASK_DGID | IB_PR_COMPMASK_SGID | IB_PR_COMPMASK_NUMBPATH); + u.path_rec.num_path = 0x7f; + sa_mad_data.p_attr = &u.path_rec; + memcpy(&u.path_rec.dgid, + &((osmv_gid_pair_t *) (p_query_req->p_query_input))-> + dest_gid, + sizeof(ib_gid_t)); + memcpy(&u.path_rec.sgid, + &((osmv_gid_pair_t *) (p_query_req->p_query_input))-> + src_gid, + sizeof(ib_gid_t)); + break; + + case OSMV_QUERY_PATH_REC_BY_LIDS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PATH_REC_BY_LIDS\n"); + memset(&u.path_rec, 0, sizeof(ib_path_rec_t)); + sa_mad_data.method = IB_MAD_METHOD_GET; + sa_mad_data.attr_id = IB_MAD_ATTR_PATH_RECORD; + sa_mad_data.comp_mask = + (IB_PR_COMPMASK_DLID | IB_PR_COMPMASK_SLID); + sa_mad_data.p_attr = &u.path_rec; + u.path_rec.dlid = + ((osmv_lid_pair_t *) (p_query_req->p_query_input))->dest_lid; + u.path_rec.slid = + ((osmv_lid_pair_t *) (p_query_req->p_query_input))->src_lid; + break; + + case OSMV_QUERY_UD_MULTICAST_SET: + sa_mad_data.method = IB_MAD_METHOD_SET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_UD_MULTICAST_SET\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + sa_mad_data.comp_mask = p_user_query->comp_mask; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_UD_MULTICAST_DELETE: + sa_mad_data.method = IB_MAD_METHOD_DELETE; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_UD_MULTICAST_DELETE\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + sa_mad_data.comp_mask = p_user_query->comp_mask; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + +#ifdef DUAL_SIDED_RMPP + case OSMV_QUERY_MULTIPATH_REC: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 MULTIPATH_REC\n"); + /* Validate sgid/dgid counts against SA client limit */ + p_mpr_req = (osmv_multipath_req_t *) p_query_req->p_query_input; + if (p_mpr_req->sgid_count + p_mpr_req->dgid_count > + IB_MULTIPATH_MAX_GIDS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "DBG:001 MULTIPATH_REC " + "SGID count %d DGID count %d max count %d\n", + p_mpr_req->sgid_count, p_mpr_req->dgid_count, + IB_MULTIPATH_MAX_GIDS); + CL_ASSERT(0); + return IB_ERROR; + } + memset(&u.multipath_rec, 0, sizeof(ib_multipath_rec_t)); + sa_mad_data.method = IB_MAD_METHOD_GETMULTI; + sa_mad_data.attr_id = IB_MAD_ATTR_MULTIPATH_RECORD; + sa_mad_data.attr_offset = + ib_get_attr_offset(sizeof(ib_multipath_rec_t)); + sa_mad_data.p_attr = &u.multipath_rec; + sa_mad_data.comp_mask = p_mpr_req->comp_mask; + u.multipath_rec.num_path = p_mpr_req->num_path; + if (p_mpr_req->reversible) + u.multipath_rec.num_path |= 0x80; + else + u.multipath_rec.num_path &= ~0x80; + u.multipath_rec.pkey = p_mpr_req->pkey; + ib_multipath_rec_set_sl(&u.multipath_rec, p_mpr_req->sl); + ib_multipath_rec_set_qos_class(&u.multipath_rec, 0); + u.multipath_rec.independence = p_mpr_req->independence; + u.multipath_rec.sgid_count = p_mpr_req->sgid_count; + u.multipath_rec.dgid_count = p_mpr_req->dgid_count; + j = 0; + for (i = 0; i < p_mpr_req->sgid_count; i++, j++) + u.multipath_rec.gids[j] = p_mpr_req->gids[j]; + for (i = 0; i < p_mpr_req->dgid_count; i++, j++) + u.multipath_rec.gids[j] = p_mpr_req->gids[j]; + break; +#endif + + default: + OSM_LOG(p_log, OSM_LOG_ERROR, "DBG:001 UNKNOWN\n"); + CL_ASSERT(0); + return IB_ERROR; + } + + status = __osmv_send_sa_req(h_bind, &sa_mad_data, p_query_req); + + OSM_LOG_EXIT(p_log); + return status; +} diff --git a/libvendor/osm_vendor_mlx.c b/libvendor/osm_vendor_mlx.c new file mode 100644 index 0000000..7a40fa5 --- /dev/null +++ b/libvendor/osm_vendor_mlx.c @@ -0,0 +1,768 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * FORWARD REFERENCES + */ +static ib_api_status_t +__osmv_get_send_txn(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN boolean_t is_rmpp, + IN boolean_t resp_expected, OUT osmv_txn_ctx_t ** pp_txn); + +static void __osm_vendor_internal_unbind(osm_bind_handle_t h_bind); + +/* + * NAME osm_vendor_new + * + * DESCRIPTION Create and Initialize the osm_vendor_t Object + */ + +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + ib_api_status_t status; + osm_vendor_t *p_vend; + + OSM_LOG_ENTER(p_log); + + CL_ASSERT(p_log); + + p_vend = malloc(sizeof(*p_vend)); + if (p_vend != NULL) { + memset(p_vend, 0, sizeof(*p_vend)); + + status = osm_vendor_init(p_vend, p_log, timeout); + if (status != IB_SUCCESS) { + osm_vendor_delete(&p_vend); + } + } else { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_new: ERR 7301: " + "Fail to allocate vendor object.\n"); + } + + OSM_LOG_EXIT(p_log); + return (p_vend); +} + +/* + * NAME osm_vendor_delete + * + * DESCRIPTION Delete all the binds behind the vendor + free the vendor object + */ + +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + cl_list_item_t *p_item; + cl_list_obj_t *p_obj; + osm_bind_handle_t bind_h; + osm_log_t *p_log; + + OSM_LOG_ENTER((*pp_vend)->p_log); + p_log = (*pp_vend)->p_log; + + /* go over the bind handles , unbind them and remove from list */ + p_item = cl_qlist_remove_head(&((*pp_vend)->bind_handles)); + while (p_item != cl_qlist_end(&((*pp_vend)->bind_handles))) { + + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + bind_h = (osm_bind_handle_t *) cl_qlist_obj(p_obj); + osm_log(p_log, OSM_LOG_DEBUG, + "osm_vendor_delete: unbinding bind_h:%p \n", bind_h); + + __osm_vendor_internal_unbind(bind_h); + + free(p_obj); + /*removing from list */ + p_item = cl_qlist_remove_head(&((*pp_vend)->bind_handles)); + } + + if (NULL != ((*pp_vend)->p_transport_info)) { + free((*pp_vend)->p_transport_info); + (*pp_vend)->p_transport_info = NULL; + } + + /* remove the packet randomizer object */ + if ((*pp_vend)->run_randomizer == TRUE) + osm_pkt_randomizer_destroy(&((*pp_vend)->p_pkt_randomizer), + p_log); + + free(*pp_vend); + *pp_vend = NULL; + + OSM_LOG_EXIT(p_log); +} + +/* + * NAME osm_vendor_init + * + * DESCRIPTION Initialize the vendor object + */ + +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + p_vend->p_transport_info = NULL; + p_vend->p_log = p_log; + p_vend->resp_timeout = timeout; + p_vend->ttime_timeout = timeout * OSMV_TXN_TIMEOUT_FACTOR; + + cl_qlist_init(&p_vend->bind_handles); + + /* update the run_randomizer flag */ + if (getenv("OSM_PKT_DROP_RATE") != NULL + && atol(getenv("OSM_PKT_DROP_RATE")) != 0) { + /* if the OSM_PKT_DROP_RATE global variable is defined to a non-zero value - + then the randomizer should be called. + Need to create the packet randomizer object */ + p_vend->run_randomizer = TRUE; + status = + osm_pkt_randomizer_init(&(p_vend->p_pkt_randomizer), p_log); + if (status != IB_SUCCESS) + return status; + } else { + p_vend->run_randomizer = FALSE; + p_vend->p_pkt_randomizer = NULL; + } + + OSM_LOG_EXIT(p_log); + return (IB_SUCCESS); +} + +/* + * NAME osm_vendor_bind + * + * DESCRIPTION Create a new bind object under the vendor object + */ + +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_bind_info, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN osm_vend_mad_send_err_callback_t send_err_callback, + IN void *context) +{ + osmv_bind_obj_t *p_bo; + ib_api_status_t status; + char hca_id[32]; + cl_status_t cl_st; + cl_list_obj_t *p_obj; + uint8_t hca_index; + + if (NULL == p_vend || NULL == p_bind_info || NULL == p_mad_pool + || NULL == mad_recv_callback || NULL == send_err_callback) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7302: " + "NULL parameter passed in: p_vend=%p p_bind_info=%p p_mad_pool=%p recv_cb=%p send_err_cb=%p\n", + p_vend, p_bind_info, p_mad_pool, mad_recv_callback, + send_err_callback); + + return OSM_BIND_INVALID_HANDLE; + } + + p_bo = malloc(sizeof(osmv_bind_obj_t)); + if (NULL == p_bo) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7303: could not allocate the bind object\n"); + return OSM_BIND_INVALID_HANDLE; + } + + memset(p_bo, 0, sizeof(osmv_bind_obj_t)); + p_bo->p_vendor = p_vend; + p_bo->recv_cb = mad_recv_callback; + p_bo->send_err_cb = send_err_callback; + p_bo->cb_context = context; + p_bo->p_osm_pool = p_mad_pool; + + /* obtain the hca name and port num from the guid */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: " + "Finding CA and Port that owns port guid 0x%" PRIx64 ".\n", + cl_ntoh64(p_bind_info->port_guid)); + + status = osm_vendor_get_guid_ca_and_port(p_bo->p_vendor, + p_bind_info->port_guid, + &(p_bo->hca_hndl), + hca_id, + &hca_index, &(p_bo->port_num)); + if (status != IB_SUCCESS) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7304: " + "Fail to find port number of port guid:0x%016" PRIx64 + "\n", p_bind_info->port_guid); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + + /* Initialize the magic_ptr to the pointer of the p_bo info. + This will be used to signal when the object is being destroyed, so no + real action will be done then. */ + p_bo->magic_ptr = p_bo; + + p_bo->is_closing = FALSE; + + cl_spinlock_construct(&(p_bo->lock)); + cl_st = cl_spinlock_init(&(p_bo->lock)); + if (cl_st != CL_SUCCESS) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7305: " + "could not initialize the spinlock ...\n"); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: osmv_txnmgr_init ... \n"); + if (osmv_txnmgr_init(&p_bo->txn_mgr, p_vend->p_log, &(p_bo->lock)) != + IB_SUCCESS) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7306: " + "osmv_txnmgr_init failed \n"); + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + + /* Do the real job! (Transport-dependent) */ + if (IB_SUCCESS != + osmv_transport_init(p_bind_info, hca_id, hca_index, p_bo)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7307: " + "osmv_transport_init failed \n"); + osmv_txnmgr_done((osm_bind_handle_t) p_bo); + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + + /* insert bind handle into db */ + p_obj = malloc(sizeof(cl_list_obj_t)); + if (NULL == p_obj) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7308: " + "osm_vendor_bind: could not allocate the list object\n"); + + osmv_transport_done(p_bo->p_transp_mgr); + osmv_txnmgr_done((osm_bind_handle_t) p_bo); + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + memset(p_obj, 0, sizeof(cl_list_obj_t)); + cl_qlist_set_obj(p_obj, p_bo); + + cl_qlist_insert_head(&p_vend->bind_handles, &p_obj->list_item); + + return (osm_bind_handle_t) p_bo; +} + +/* + * NAME osm_vendor_unbind + * + * DESCRIPTION Destroy the bind object and remove it from the vendor's list + */ + +void osm_vendor_unbind(IN osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_log_t *p_log = p_bo->p_vendor->p_log; + cl_list_obj_t *p_obj = NULL; + cl_list_item_t *p_item, *p_item_tmp; + cl_qlist_t *const p_bh_list = + (cl_qlist_t * const)&p_bo->p_vendor->bind_handles; + + OSM_LOG_ENTER(p_log); + + /* go over all the items in the list and remove the specific item */ + p_item = cl_qlist_head(p_bh_list); + while (p_item != cl_qlist_end(p_bh_list)) { + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + if (cl_qlist_obj(p_obj) == h_bind) { + break; + } + p_item_tmp = cl_qlist_next(p_item); + p_item = p_item_tmp; + } + + CL_ASSERT(p_item != cl_qlist_end(p_bh_list)); + + cl_qlist_remove_item(p_bh_list, p_item); + if (p_obj) + free(p_obj); + + if (h_bind != 0) { + __osm_vendor_internal_unbind(h_bind); + } + + OSM_LOG_EXIT(p_log); +} + +/* + * NAME osm_vendor_get + * + * DESCRIPTION Allocate the space for a new MAD + */ + +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * const p_vw) +{ + ib_mad_t *p_mad; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + uint32_t act_mad_size; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + + if (mad_size < MAD_BLOCK_SIZE) { + /* Stupid, but the applications want that! */ + act_mad_size = MAD_BLOCK_SIZE; + } else { + act_mad_size = mad_size; + } + + /* allocate it */ + p_mad = (ib_mad_t *) malloc(act_mad_size); + if (p_mad == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get: ERR 7309: " + "Error Obtaining MAD buffer.\n"); + goto Exit; + } + + memset(p_mad, 0, act_mad_size); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get: " + "Allocated MAD %p, size = %u.\n", p_mad, act_mad_size); + } + p_vw->p_mad = p_mad; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (p_mad); +} + +/* + * NAME osm_vendor_send + * + * DESCRIPTION Send a MAD buffer (RMPP or simple send). + * + * Semantics: + * (1) The RMPP send completes when every segment + * is acknowledged (synchronous) + * (2) The simple send completes when the send completion + * is received (asynchronous) + */ + +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, IN boolean_t const resp_expected) +{ + ib_api_status_t ret = IB_SUCCESS; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + boolean_t is_rmpp = FALSE, is_rmpp_ds = FALSE; + osmv_txn_ctx_t *p_txn = NULL; + ib_mad_t *p_mad; + osm_log_t *p_log = p_bo->p_vendor->p_log; + osm_mad_pool_t *p_mad_pool = p_bo->p_osm_pool; + OSM_LOG_ENTER(p_log); + + if (NULL == h_bind || NULL == p_madw || + NULL == (p_mad = osm_madw_get_mad_ptr(p_madw)) || + NULL == osm_madw_get_mad_addr_ptr(p_madw)) { + + return IB_INVALID_PARAMETER; + } + + is_rmpp = (p_madw->mad_size > MAD_BLOCK_SIZE + || osmv_mad_is_rmpp(p_mad)); + /* is this rmpp double sided? This means we expect a response that can be + an rmpp or not */ + is_rmpp_ds = (TRUE == is_rmpp && TRUE == resp_expected); + + /* Make our operations with the send context atomic */ + osmv_txn_lock(p_bo); + + if (TRUE == p_bo->is_closing) { + + osm_log(p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 7310: " + "The handle %p is being unbound, cannot send.\n", + h_bind); + ret = IB_INTERRUPTED; + /* When closing p_bo could be detroyed or is going to , thus could not refer to it */ + goto send_done; + } + + if (TRUE == resp_expected || TRUE == is_rmpp) { + + /* We must run under a transaction framework. + * Get the transaction object (old or new) */ + ret = __osmv_get_send_txn(h_bind, p_madw, is_rmpp, + resp_expected, &p_txn); + if (IB_SUCCESS != ret) { + goto send_done; + } + } + + if (TRUE == is_rmpp) { + /* Do the job - RMPP! + * The call returns as all the packets are ACK'ed/upon error + * The txn lock will be released each time the function sleeps + * and re-acquired when it wakes up + */ + ret = osmv_rmpp_send_madw(h_bind, p_madw, p_txn, is_rmpp_ds); + } else { + + /* Do the job - single MAD! + * The call returns as soon as the MAD is put on the wire + */ + ret = osmv_simple_send_madw(h_bind, p_madw, p_txn, FALSE); + } + + if (IB_SUCCESS == ret) { + + if ((TRUE == is_rmpp) && (FALSE == is_rmpp_ds)) { + /* For double-sided sends, the txn continues to live */ + osmv_txn_done(h_bind, osmv_txn_get_key(p_txn), + FALSE /*not in callback */ ); + } + + if (FALSE == resp_expected) { + osm_mad_pool_put(p_mad_pool, p_madw); + } + } else if (IB_INTERRUPTED != ret) { + if (NULL != p_txn) { + osmv_txn_done(h_bind, osmv_txn_get_key(p_txn), + FALSE /*not in callback */ ); + } + + osm_log(p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 7311: failed to send MADW %p\n", + p_madw); + + if (TRUE == resp_expected) { + /* Change the status on the p_madw */ + p_madw->status = ret; + /* Only the requester expects the error callback */ + p_bo->send_err_cb(p_bo->cb_context, p_madw); + } else { + /* put back the mad - it is useless ... */ + osm_mad_pool_put(p_mad_pool, p_madw); + } + } else { /* the transaction was aborted due to p_bo exit */ + + osm_mad_pool_put(p_mad_pool, p_madw); + goto aborted; + } +send_done: + + osmv_txn_unlock(p_bo); +aborted: + OSM_LOG_EXIT(p_log); + return ret; +} + +/* + * NAME osm_vendor_put + * + * DESCRIPTION Free the MAD's memory + */ + +void +osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) +{ + + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + + if (p_bo->is_closing != TRUE) { + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + CL_ASSERT(p_vw->p_mad); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_put: " "Retiring MAD %p.\n", + p_vw->p_mad); + } + + free(p_vw->p_mad); + p_vw->p_mad = NULL; + + OSM_LOG_EXIT(p_vend->p_log); + } +} + +/* + * NAME osm_vendor_local_lid_change + * + * DESCRIPTION Notifies the vendor transport layer that the local address + * has changed. This allows the vendor layer to perform + * housekeeping functions such as address vector updates. + */ + +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) +{ + osm_vendor_t const *p_vend = ((osmv_bind_obj_t *) h_bind)->p_vendor; + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_local_lid_change: " "Change of LID.\n"); + + OSM_LOG_EXIT(p_vend->p_log); + + return (IB_SUCCESS); + +} + +/* + * NAME osm_vendor_set_sm + * + * DESCRIPTION Modifies the port info for the bound port to set the "IS_SM" bit + * according to the value given (TRUE or FALSE). + */ +#if !(defined(OSM_VENDOR_INTF_TS_NO_VAPI) || defined(OSM_VENDOR_INTF_SIM) || defined(OSM_VENDOR_INTF_TS)) +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + VAPI_ret_t status; + VAPI_hca_attr_t attr_mod; + VAPI_hca_attr_mask_t attr_mask; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&attr_mod, 0, sizeof(attr_mod)); + memset(&attr_mask, 0, sizeof(attr_mask)); + + attr_mod.is_sm = is_sm_val; + attr_mask = HCA_ATTR_IS_SM; + + status = + VAPI_modify_hca_attr(p_bo->hca_hndl, p_bo->port_num, &attr_mod, + &attr_mask); + if (status != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_set_sm: ERR 7312: " + "Unable set 'IS_SM' bit to:%u in port attributes (%d).\n", + is_sm_val, status); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +#endif + +/* + * NAME __osm_vendor_internal_unbind + * + * DESCRIPTION Destroying a bind: + * (1) Wait for the completion of the sends in flight + * (2) Destroy the associated data structures + */ + +static void __osm_vendor_internal_unbind(osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_log_t *p_log = p_bo->p_vendor->p_log; + + OSM_LOG_ENTER(p_log); + + /* "notifying" all that from now on no new sends can be done */ + p_bo->txn_mgr.p_event_wheel->closing = TRUE; + + osmv_txn_lock(p_bo); + + /* + the is_closing is set under lock we we know we only need to + check for it after obtaining the lock + */ + p_bo->is_closing = TRUE; + + /* notifying all sleeping rmpp sends to exit */ + osmv_txn_abort_rmpp_txns(h_bind); + + /* unlock the bo to allow for any residual mads to be dispatched */ + osmv_txn_unlock(p_bo); + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_vendor_internal_unbind: destroying transport mgr.. \n"); + /* wait for the receiver thread to exit */ + osmv_transport_done(h_bind); + + /* lock to avoid any collissions while we cleanup the structs */ + osmv_txn_lock(p_bo); + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_vendor_internal_unbind: destroying txn mgr.. \n"); + osmv_txnmgr_done(h_bind); + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_vendor_internal_unbind: destroying bind lock.. \n"); + osmv_txn_unlock(p_bo); + + /* + we intentionally let the p_bo and its lock leak - + as we did not implement a way to track active bind handles provided to + the client - and the client might use them + + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + */ + + OSM_LOG_EXIT(p_log); +} + +/* + * NAME __osmv_get_send_txn + * + * DESCRIPTION Return a transaction object that corresponds to this MAD. + * Optionally, create it, if the new request (query) is sent or received. + */ + +static ib_api_status_t +__osmv_get_send_txn(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN boolean_t is_rmpp, + IN boolean_t resp_expected, OUT osmv_txn_ctx_t ** pp_txn) +{ + ib_api_status_t ret; + uint64_t tid, key; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + CL_ASSERT(NULL != pp_txn); + + key = tid = cl_ntoh64(p_mad->trans_id); + if (TRUE == resp_expected) { + /* Create a unique identifier at the requester side */ + key = osmv_txn_uniq_key(tid); + } + + /* We must run under a transaction framework */ + ret = osmv_txn_lookup(h_bind, key, pp_txn); + if (IB_NOT_FOUND == ret) { + /* Generally, we start a new transaction */ + ret = osmv_txn_init(h_bind, tid, key, pp_txn); + if (IB_SUCCESS != ret) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_get_send_txn: ERR 7313: " + "The transaction id=0x%" PRIx64 " failed to init.\n", + tid); + goto get_send_txn_done; + } + } else { + CL_ASSERT(NULL != *pp_txn); + /* The transaction context exists. + * This is legal only if I am going to return an + * (RMPP?) reply to an RMPP request sent by the other part + * (double-sided RMPP transfer) + */ + if (FALSE == is_rmpp + || FALSE == osmv_txn_is_rmpp_init_by_peer(*pp_txn)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_get_send_txn: ERR 7314: " + "The transaction id=0x%" PRIx64 " is not unique. Send failed.\n", + tid); + + ret = IB_INVALID_SETTING; + goto get_send_txn_done; + } + + if (TRUE == resp_expected) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_get_send_txn: ERR 7315: " + "The transaction id=0x%" PRIx64 " can't expect a response. Send failed.\n", + tid); + + ret = IB_INVALID_PARAMETER; + goto get_send_txn_done; + } + } + + if (TRUE == is_rmpp) { + ret = osmv_txn_init_rmpp_sender(h_bind, *pp_txn, p_madw); + if (IB_SUCCESS != ret) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_get_send_txn: ERR 7316: " + "The transaction id=0x%" PRIx64 " failed to init the rmpp mad. Send failed.\n", + tid); + osmv_txn_done(h_bind, tid, FALSE); + goto get_send_txn_done; + } + } + + /* Save a reference to the MAD in the txn context + * We'll need to match it in two cases: + * (1) When the response is returned, if I am the requester + * (2) In RMPP retransmissions + */ + osmv_txn_set_madw(*pp_txn, p_madw); + +get_send_txn_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + + return ret; +} + +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level) +{ + +} diff --git a/libvendor/osm_vendor_mlx_anafa.c b/libvendor/osm_vendor_mlx_anafa.c new file mode 100644 index 0000000..fbaab1d --- /dev/null +++ b/libvendor/osm_vendor_mlx_anafa.c @@ -0,0 +1,751 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/** + * FORWARD REFERENCES + */ +static ib_api_status_t +__osmv_get_send_txn(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN boolean_t is_rmpp, + IN boolean_t resp_expected, OUT osmv_txn_ctx_t ** pp_txn); + +static void __osm_vendor_internal_unbind(osm_bind_handle_t h_bind); + +/* + * NAME osm_vendor_new + * + * DESCRIPTION Create and Initialize the osm_vendor_t Object + */ + +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + ib_api_status_t status; + osm_vendor_t *p_vend; + + OSM_LOG_ENTER(p_log); + + CL_ASSERT(p_log); + + p_vend = malloc(sizeof(*p_vend)); + if (p_vend != NULL) { + memset(p_vend, 0, sizeof(*p_vend)); + status = osm_vendor_init(p_vend, p_log, timeout); + if (status != IB_SUCCESS) { + osm_vendor_delete(&p_vend); + } + } else { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_new: ERR 7401: " + "Fail to allocate vendor object.\n"); + } + + OSM_LOG_EXIT(p_log); + return (p_vend); +} + +/* + * NAME osm_vendor_delete + * + * DESCRIPTION Delete all the binds behind the vendor + free the vendor object + */ + +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + cl_list_item_t *p_item; + cl_list_obj_t *p_obj; + osm_bind_handle_t bind_h; + osm_log_t *p_log; + + OSM_LOG_ENTER((*pp_vend)->p_log); + p_log = (*pp_vend)->p_log; + + /* go over the bind handles , unbind them and remove from list */ + /* Note that if we reached here due to problem in the init, then + the bind_handles list is not initialized yet */ + if ((*pp_vend)->bind_handles.state == CL_INITIALIZED) { + p_item = cl_qlist_remove_head(&((*pp_vend)->bind_handles)); + while (p_item != cl_qlist_end(&((*pp_vend)->bind_handles))) { + + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + bind_h = (osm_bind_handle_t *) cl_qlist_obj(p_obj); + osm_log(p_log, OSM_LOG_DEBUG, + "osm_vendor_delete: unbinding bind_h:%p \n", + bind_h); + + __osm_vendor_internal_unbind(bind_h); + + free(p_obj); + /* removing from list */ + p_item = + cl_qlist_remove_head(&((*pp_vend)->bind_handles)); + } + } + + if (NULL != ((*pp_vend)->p_transport_info)) { + free((*pp_vend)->p_transport_info); + (*pp_vend)->p_transport_info = NULL; + } + + /* remove the packet randomizer object */ + if ((*pp_vend)->run_randomizer == TRUE) + osm_pkt_randomizer_destroy(&((*pp_vend)->p_pkt_randomizer), + p_log); + + free(*pp_vend); + *pp_vend = NULL; + + OSM_LOG_EXIT(p_log); +} + +/* + * NAME osm_vendor_init + * + * DESCRIPTION Initialize the vendor object + */ + +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + ib_api_status_t status = IB_SUCCESS; + char device_file[16]; + int device_fd; + + OSM_LOG_ENTER(p_log); + + p_vend->p_log = p_log; + p_vend->resp_timeout = timeout; + p_vend->ttime_timeout = timeout * OSMV_TXN_TIMEOUT_FACTOR; + + p_vend->p_transport_info = (osmv_TOPSPIN_ANAFA_transport_info_t *) + malloc(sizeof(osmv_TOPSPIN_ANAFA_transport_info_t)); + if (!p_vend->p_transport_info) { + return IB_ERROR; + } + + memset(p_vend->p_transport_info, 0, + sizeof(osmv_TOPSPIN_ANAFA_transport_info_t)); + + /* update the run_randomizer flag */ + if (getenv("OSM_PKT_DROP_RATE") != NULL + && atol(getenv("OSM_PKT_DROP_RATE")) != 0) { + /* if the OSM_PKT_DROP_RATE global variable is defined + to a non-zero value - + then the randomizer should be called. + Need to create the packet randomizer object */ + p_vend->run_randomizer = TRUE; + status = + osm_pkt_randomizer_init(&(p_vend->p_pkt_randomizer), p_log); + if (status != IB_SUCCESS) + return status; + } else { + p_vend->run_randomizer = FALSE; + p_vend->p_pkt_randomizer = NULL; + } + + /* open TopSpin file device */ + sprintf(device_file, "/dev/ts_ua0"); + device_fd = open("/dev/ts_ua0", O_RDWR); + if (device_fd < 0) { + fprintf(stderr, "Fatal: Fail to open the file:%s(%d)\n", + device_file, errno); + return IB_ERROR; + } + + ((osmv_TOPSPIN_ANAFA_transport_info_t *) p_vend->p_transport_info)-> + device_fd = device_fd; + + cl_qlist_init(&p_vend->bind_handles); + + OSM_LOG_EXIT(p_log); + return (IB_SUCCESS); +} + +/* + * NAME osm_vendor_bind + * + * DESCRIPTION Create a new bind object under the vendor object + */ + +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_bind_info, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN osm_vend_mad_send_err_callback_t send_err_callback, + IN void *context) +{ + osmv_bind_obj_t *p_bo; + cl_status_t cl_st; + cl_list_obj_t *p_obj; + uint8_t hca_idx = 0; + + if (NULL == p_vend || NULL == p_bind_info || NULL == p_mad_pool + || NULL == mad_recv_callback || NULL == send_err_callback) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7402: " + "NULL parameter passed in: p_vend=%p p_bind_info=%p p_mad_pool=%p recv_cb=%p send_err_cb=%p\n", + p_vend, p_bind_info, p_mad_pool, mad_recv_callback, + send_err_callback); + + return OSM_BIND_INVALID_HANDLE; + } + + p_bo = malloc(sizeof(osmv_bind_obj_t)); + if (NULL == p_bo) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7403: " + "could not allocate the bind object\n"); + return OSM_BIND_INVALID_HANDLE; + } + + memset(p_bo, 0, sizeof(osmv_bind_obj_t)); + p_bo->p_vendor = p_vend; + p_bo->recv_cb = mad_recv_callback; + p_bo->send_err_cb = send_err_callback; + p_bo->cb_context = context; + p_bo->p_osm_pool = p_mad_pool; + p_bo->port_num = 1; /* anafa2 has one port */ + p_bo->hca_hndl = 0; /* only one ca on anafa system */ + + /* obtain the hca name and port num from the guid */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: " + "Finding CA and Port that owns port guid 0x%" PRIx64 ".\n", + cl_ntoh64(p_bind_info->port_guid)); + + p_bo->is_closing = FALSE; + cl_spinlock_construct(&(p_bo->lock)); + cl_st = cl_spinlock_init(&(p_bo->lock)); + if (cl_st != CL_SUCCESS) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7405: " + "could not initialize the spinlock ...\n"); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: osmv_txnmgr_init ... \n"); + if (osmv_txnmgr_init(&p_bo->txn_mgr, p_vend->p_log, &(p_bo->lock)) != + IB_SUCCESS) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7406: " + "osmv_txnmgr_init failed \n"); + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + + /* Do the real job! (Transport-dependent) */ + if (IB_SUCCESS != + osmv_transport_init(p_bind_info, OSMV_ANAFA_ID, hca_idx, p_bo)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7407: " + "osmv_transport_init failed \n"); + osmv_txnmgr_done((osm_bind_handle_t) p_bo); + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + + /* insert bind handle into db */ + p_obj = malloc(sizeof(cl_list_obj_t)); + if (NULL == p_obj) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 7408: " + "osm_vendor_bind: could not allocate the list object\n"); + + osmv_transport_done(p_bo->p_transp_mgr); + osmv_txnmgr_done((osm_bind_handle_t) p_bo); + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + return OSM_BIND_INVALID_HANDLE; + } + if (p_obj) + memset(p_obj, 0, sizeof(cl_list_obj_t)); + cl_qlist_set_obj(p_obj, p_bo); + + cl_qlist_insert_head(&p_vend->bind_handles, &p_obj->list_item); + + return (osm_bind_handle_t) p_bo; +} + +/* + * NAME osm_vendor_unbind + * + * DESCRIPTION Destroy the bind object and remove it from the vendor's list + */ + +void osm_vendor_unbind(IN osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_log_t *p_log = p_bo->p_vendor->p_log; + cl_list_obj_t *p_obj; + cl_list_item_t *p_item, *p_item_tmp; + cl_qlist_t *const p_bh_list = + (cl_qlist_t * const)&p_bo->p_vendor->bind_handles; + + OSM_LOG_ENTER(p_log); + + /* go over all the items in the list and remove the specific item */ + p_item = cl_qlist_head(&p_bo->p_vendor->bind_handles); + while (p_item != cl_qlist_end(&p_bo->p_vendor->bind_handles)) { + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + if (cl_qlist_obj(p_obj) == h_bind) { + break; + } + p_item_tmp = cl_qlist_next(p_item); + p_item = p_item_tmp; + } + + CL_ASSERT(p_item != cl_qlist_end(p_bh_list)); + + cl_qlist_remove_item(p_bh_list, p_item); + free(p_obj); + + __osm_vendor_internal_unbind(h_bind); + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +/* + * NAME osm_vendor_get + * + * DESCRIPTION Allocate the space for a new MAD + */ + +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * const p_vw) +{ + ib_mad_t *p_mad; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + uint32_t act_mad_size; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + + if (mad_size < MAD_BLOCK_SIZE) { + /* Stupid, but the applications want that! */ + act_mad_size = MAD_BLOCK_SIZE; + } else { + act_mad_size = mad_size; + } + + /* allocate it */ + p_mad = (ib_mad_t *) malloc(act_mad_size); + if (p_mad == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get: ERR 7409: " + "Error Obtaining MAD buffer.\n"); + goto Exit; + } + + memset(p_mad, 0, act_mad_size); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get: " + "Allocated MAD %p, size = %u.\n", p_mad, act_mad_size); + } + p_vw->p_mad = p_mad; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (p_mad); +} + +/* + * NAME osm_vendor_send + * + * DESCRIPTION Send a MAD buffer (RMPP or simple send). + * + * Semantics: + * (1) The RMPP send completes when every segment + * is acknowledged (synchronous) + * (2) The simple send completes when the send completion + * is received (asynchronous) + */ + +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, IN boolean_t const resp_expected) +{ + ib_api_status_t ret = IB_SUCCESS; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + boolean_t is_rmpp = FALSE, is_rmpp_ds = FALSE; + osmv_txn_ctx_t *p_txn = NULL; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + if (NULL == h_bind || NULL == p_madw || + NULL == (p_mad = osm_madw_get_mad_ptr(p_madw)) || + NULL == osm_madw_get_mad_addr_ptr(p_madw)) { + + return IB_INVALID_PARAMETER; + } + + is_rmpp = (p_madw->mad_size > MAD_BLOCK_SIZE + || osmv_mad_is_rmpp(p_mad)); + is_rmpp_ds = (TRUE == is_rmpp && TRUE == resp_expected); + + /* Make our operations with the send context atomic */ + osmv_txn_lock(p_bo); + + if (TRUE == p_bo->is_closing) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 7410: " + "The handle %p is being unbound, cannot send.\n", + h_bind); + ret = IB_INTERRUPTED; + goto send_done; + } + + if (TRUE == resp_expected || TRUE == is_rmpp) { + + /* We must run under a transaction framework. + * Get the transaction object (old or new) */ + ret = __osmv_get_send_txn(h_bind, p_madw, is_rmpp, + resp_expected, &p_txn); + if (IB_SUCCESS != ret) { + goto send_done; + } + } + + if (TRUE == is_rmpp) { + /* Do the job - RMPP! + * The call returns as all the packets are ACK'ed/upon error + * The txn lock will be released each time the function sleeps + * and re-acquired when it wakes up + */ + ret = osmv_rmpp_send_madw(h_bind, p_madw, p_txn, is_rmpp_ds); + } else { + + /* Do the job - single MAD! + * The call returns as soon as the MAD is put on the wire + */ + ret = osmv_simple_send_madw(h_bind, p_madw, p_txn, FALSE); /* anafa2 */ + } + + if (IB_SUCCESS == ret) { + + if ((TRUE == is_rmpp) && (FALSE == is_rmpp_ds)) { + /* For double-sided sends, the txn continues to live */ + osmv_txn_done(h_bind, osmv_txn_get_key(p_txn), + FALSE /*not in callback */ ); + } + + if (FALSE == resp_expected) { + osm_mad_pool_put(p_bo->p_osm_pool, p_madw); + } + } else { + if (NULL != p_txn) { + osmv_txn_done(h_bind, osmv_txn_get_key(p_txn), + FALSE /*not in callback */ ); + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 7411: failed to send MADW %p\n", + p_madw); + + if (TRUE == resp_expected) { + /* Change the status on the p_madw */ + p_madw->status = ret; + /* Only the requester expects the error callback */ + p_bo->send_err_cb(p_bo->cb_context, p_madw); + } else { + /* put back the mad - it is useless ... */ + osm_mad_pool_put(p_bo->p_osm_pool, p_madw); + } + } + +send_done: + + osmv_txn_unlock(p_bo); + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return ret; +} + +/* + * NAME osm_vendor_put + * + * DESCRIPTION Free the MAD's memory + */ + +void +osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) +{ + + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + CL_ASSERT(p_vw->p_mad); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_put: " "Retiring MAD %p.\n", p_vw->p_mad); + } + + free(p_vw->p_mad); + p_vw->p_mad = NULL; + + OSM_LOG_EXIT(p_vend->p_log); +} + +/* + * NAME osm_vendor_local_lid_change + * + * DESCRIPTION Notifies the vendor transport layer that the local address + * has changed. This allows the vendor layer to perform + * housekeeping functions such as address vector updates. + */ + +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) +{ + osm_vendor_t const *p_vend = ((osmv_bind_obj_t *) h_bind)->p_vendor; + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_local_lid_change: " "Change of LID.\n"); + + OSM_LOG_EXIT(p_vend->p_log); + + return (IB_SUCCESS); + +} + +/* + * NAME osm_vendor_set_sm + * + * DESCRIPTION Modifies the port info for the bound port to set the "IS_SM" bit. + */ + +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + osmv_TOPSPIN_ANAFA_transport_mgr_t *p_mgr; + int ioctl_ret; + osm_ts_set_port_info_ioctl port_info; + + OSM_LOG_ENTER(p_vend->p_log); + + port_info.port = 0; /* anafa has only 1 port */ + port_info.port_info.valid_fields = IB_PORT_IS_SM; + port_info.port_info.is_sm = is_sm_val; + + p_mgr = (osmv_TOPSPIN_ANAFA_transport_mgr_t *) p_bo->p_transp_mgr; + ioctl_ret = ioctl(p_mgr->device_fd, TS_IB_IOCSPORTINFO, &port_info); + + if (ioctl_ret < 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_set_sm: ERR 7412: " + "Unable set 'IS_SM' bit to:%u in port attributes (%d). errno=%d\n", + is_sm_val, ioctl_ret, errno); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +/* + * NAME __osm_vendor_internal_unbind + * + * DESCRIPTION Destroying a bind: + * (1) Wait for the completion of the sends in flight + * (2) Destroy the associated data structures + */ + +static void __osm_vendor_internal_unbind(osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_log_t *p_log = p_bo->p_vendor->p_log; + + OSM_LOG_ENTER(p_log); + + /* "notifying" all that from now on no new sends can be done */ + p_bo->txn_mgr.p_event_wheel->closing = TRUE; + + osmv_txn_lock(p_bo); + p_bo->is_closing = TRUE; + + /* notifying all sleeping rmpp sends to exit */ + osmv_txn_abort_rmpp_txns(h_bind); + + /* frees all data in bind handle */ + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_vendor_internal_unbind: destroying transport mgr.. \n"); + osmv_txn_unlock(p_bo); + + osmv_transport_done(h_bind); + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_vendor_internal_unbind: destroying txn mgr.. \n"); + osmv_txn_lock(p_bo); + osmv_txnmgr_done(h_bind); + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_vendor_internal_unbind: destroying bind lock.. \n"); + + osmv_txn_unlock(p_bo); + /* + we intentionally let the p_bo and its lock leak - + as we did not implement a way to track active bind handles provided to + the client - and the client might use them + + cl_spinlock_destroy(&p_bo->lock); + free(p_bo); + */ + + OSM_LOG_EXIT(p_log); +} + +/* + * NAME __osmv_get_send_txn + * + * DESCRIPTION Return a transaction object that corresponds to this MAD. + * Optionally, create it, if the new request (query) is sent or received. + */ + +static ib_api_status_t +__osmv_get_send_txn(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN boolean_t is_rmpp, + IN boolean_t resp_expected, OUT osmv_txn_ctx_t ** pp_txn) +{ + ib_api_status_t ret; + uint64_t tid, key; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + CL_ASSERT(NULL != pp_txn); + + key = tid = cl_ntoh64(p_mad->trans_id); + if (TRUE == resp_expected) { + /* Create a unique identifier at the requester side */ + key = osmv_txn_uniq_key(tid); + } + + /* We must run under a transaction framework */ + ret = osmv_txn_lookup(h_bind, key, pp_txn); + if (IB_NOT_FOUND == ret) { + /* Generally, we start a new transaction */ + ret = osmv_txn_init(h_bind, tid, key, pp_txn); + if (IB_SUCCESS != ret) { + goto get_send_txn_done; + } + } else { + CL_ASSERT(NULL != *pp_txn); + /* The transaction context exists. + * This is legal only if I am going to return an + * (RMPP?) reply to an RMPP request sent by the other part + * (double-sided RMPP transfer) + */ + if (FALSE == is_rmpp + || FALSE == osmv_txn_is_rmpp_init_by_peer(*pp_txn)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_get_send_txn: ERR 7413: " + "The transaction id=0x%llX is not unique. Send failed.\n", + tid); + + ret = IB_INVALID_SETTING; + goto get_send_txn_done; + } + + if (TRUE == resp_expected) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_get_send_txn: ERR 7414: " + "The transaction id=%llX can\'t expect a response. Send failed.\n", + tid); + + ret = IB_INVALID_PARAMETER; + goto get_send_txn_done; + } + } + + if (TRUE == is_rmpp) { + ret = osmv_txn_init_rmpp_sender(h_bind, *pp_txn, p_madw); + if (IB_SUCCESS != ret) { + osmv_txn_done(h_bind, tid, FALSE); + goto get_send_txn_done; + } + } + + /* Save a reference to the MAD in the txn context + * We'll need to match it in two cases: + * (1) When the response is returned, if I am the requester + * (2) In RMPP retransmissions + */ + osmv_txn_set_madw(*pp_txn, p_madw); + +get_send_txn_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + + return ret; +} + +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level) +{ + +} diff --git a/libvendor/osm_vendor_mlx_dispatcher.c b/libvendor/osm_vendor_mlx_dispatcher.c new file mode 100644 index 0000000..ba6c4e8 --- /dev/null +++ b/libvendor/osm_vendor_mlx_dispatcher.c @@ -0,0 +1,710 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +typedef enum _osmv_disp_route { + + OSMV_ROUTE_DROP, + OSMV_ROUTE_SIMPLE, + OSMV_ROUTE_RMPP, + +} osmv_disp_route_t; + +/** + * FORWARD REFERENCES TO PRIVATE FUNCTIONS + */ + +static osmv_disp_route_t +__osmv_dispatch_route(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, OUT osmv_txn_ctx_t ** pp_txn); + +static void +__osmv_dispatch_simple_mad(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr); + +static void +__osmv_dispatch_rmpp_mad(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr); + +static void +__osmv_dispatch_rmpp_snd(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr); + +static ib_api_status_t +__osmv_dispatch_rmpp_rcv(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr); + +static ib_api_status_t +__osmv_dispatch_accept_seg(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, + IN const ib_mad_t * p_mad); +static void +__osmv_dispatch_send_ack(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_req_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr); + +/* + * NAME + * osmv_dispatch_mad + * + * DESCRIPTION + * Lower-level MAD dispatcher. + * Implements a switch between the following MAD consumers: + * (1) Non-RMPP consumer (DATA) + * (2) RMPP receiver (DATA/ABORT/STOP) + * (3) RMPP sender (ACK/ABORT/STOP) + * + * PARAMETERS + * h_bind The bind handle + * p_mad_buf The 256 byte buffer of individual MAD + * p_mad_addr The MAD originator's address + */ + +ib_api_status_t +osmv_dispatch_mad(IN osm_bind_handle_t h_bind, + IN const void *p_mad_buf, + IN const osm_mad_addr_t * p_mad_addr) +{ + ib_api_status_t ret = IB_SUCCESS; + const ib_mad_t *p_mad = (ib_mad_t *) p_mad_buf; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osmv_txn_ctx_t *p_txn = NULL; + osm_log_t *p_log = p_bo->p_vendor->p_log; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + CL_ASSERT(NULL != h_bind && NULL != p_mad && NULL != p_mad_addr); + + osmv_txn_lock(p_bo); + + if (TRUE == p_bo->is_closing) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "The bind handle %p is being closed. " + "The MAD will not be dispatched.\n", p_bo); + + ret = IB_INTERRUPTED; + goto dispatch_mad_done; + } + + /* + Add call for packet drop randomizer. + This is a testing feature. If run_randomizer flag is set to TRUE, + the randomizer will be called, and randomally will drop + a packet. This is used for simulating unstable fabric. + */ + if (p_bo->p_vendor->run_randomizer == TRUE) { + /* Try the randomizer */ + if (osm_pkt_randomizer_mad_drop(p_bo->p_vendor->p_log, + p_bo->p_vendor-> + p_pkt_randomizer, + p_mad) == TRUE) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "The MAD will not be dispatched.\n"); + goto dispatch_mad_done; + } + } + + switch (__osmv_dispatch_route(h_bind, p_mad, &p_txn)) { + + case OSMV_ROUTE_DROP: + break; /* Do nothing */ + + case OSMV_ROUTE_SIMPLE: + __osmv_dispatch_simple_mad(h_bind, p_mad, p_txn, p_mad_addr); + break; + + case OSMV_ROUTE_RMPP: + __osmv_dispatch_rmpp_mad(h_bind, p_mad, p_txn, p_mad_addr); + break; + + default: + CL_ASSERT(FALSE); + } + +dispatch_mad_done: + osmv_txn_unlock(p_bo); + + OSM_LOG_EXIT(p_log); + return ret; +} + +/* + * NAME __osmv_dispatch_route() + * + * DESCRIPTION Decide which way to handle the received MAD: simple txn/RMPP/drop + */ + +static osmv_disp_route_t +__osmv_dispatch_route(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, OUT osmv_txn_ctx_t ** pp_txn) +{ + ib_api_status_t ret; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + boolean_t is_resp = ib_mad_is_response(p_mad); + boolean_t is_txn; + uint64_t key = cl_ntoh64(p_mad->trans_id); + + CL_ASSERT(NULL != pp_txn); + + ret = osmv_txn_lookup(h_bind, key, pp_txn); + is_txn = (IB_SUCCESS == ret); + + if (FALSE == is_txn && TRUE == is_resp) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Received a response to a non-started/aged-out transaction (tid=0x%" PRIx64 "). " + "Dropping the MAD.\n", key); + return OSMV_ROUTE_DROP; + } + + if (TRUE == osmv_mad_is_rmpp(p_mad)) { + /* An RMPP transaction. The filtering is more delicate there */ + return OSMV_ROUTE_RMPP; + } + + if (TRUE == is_txn && FALSE == is_resp) { + /* Does this MAD try to start a transaction with duplicate tid? */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Duplicate TID 0x%" PRIx64 " received (not a response). " + "Dropping the MAD.\n", key); + + return OSMV_ROUTE_DROP; + } + + return OSMV_ROUTE_SIMPLE; +} + +/* + * NAME __osmv_dispatch_simple_mad() + * + * DESCRIPTION Handle a MAD that is part of non-RMPP transfer + */ + +static void +__osmv_dispatch_simple_mad(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr) +{ + osm_madw_t *p_madw; + ib_mad_t *p_mad_buf; + osm_madw_t *p_req_madw = NULL; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + /* Build the MAD wrapper to be returned to the user. + * The actual storage for the MAD is allocated there. + */ + p_madw = + osm_mad_pool_get(p_bo->p_osm_pool, h_bind, MAD_BLOCK_SIZE, + p_mad_addr); + + if (NULL == p_madw) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_dispatch_simple_mad: ERR 6501: " + "Out Of Memory - could not allocate a buffer of size %d\n", + MAD_BLOCK_SIZE); + + goto dispatch_simple_mad_done; + } + + p_mad_buf = osm_madw_get_mad_ptr(p_madw); + /* Copy the payload to the MAD buffer */ + memcpy((void *)p_mad_buf, (void *)p_mad, MAD_BLOCK_SIZE); + + if (NULL != p_txn) { + /* This is a RESPONSE MAD. Pair it with the REQUEST MAD, pass upstream */ + p_req_madw = p_txn->p_madw; + CL_ASSERT(NULL != p_req_madw); + + p_mad_buf->trans_id = cl_hton64(osmv_txn_get_tid(p_txn)); + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Restoring the original TID to 0x%" PRIx64 "\n", + cl_ntoh64(p_mad_buf->trans_id)); + + /* Reply matched, transaction complete */ + osmv_txn_done(h_bind, osmv_txn_get_key(p_txn), FALSE); + } else { + /* This is a REQUEST MAD. Don't create a context, pass upstream */ + } + + /* Do the job ! */ + p_bo->recv_cb(p_madw, p_bo->cb_context, p_req_madw); + +dispatch_simple_mad_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +/* + * NAME __osmv_dispatch_rmpp_mad() + * + * DESCRIPTION Handle a MAD that is part of RMPP transfer + */ + +static void +__osmv_dispatch_rmpp_mad(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr) +{ + ib_api_status_t status = IB_SUCCESS; + uint64_t key = cl_ntoh64(p_mad->trans_id); + boolean_t is_init_by_peer = FALSE; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + if (NULL == p_txn) { + if (FALSE == osmv_rmpp_is_data(p_mad) + || FALSE == osmv_rmpp_is_first(p_mad)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "The MAD does not match any transaction " + "and does not start a sender-initiated RMPP transfer.\n"); + goto dispatch_rmpp_mad_done; + } + + /* IB Spec 13.6.2.2. This is a Sender Initiated Transfer. + My peer is the requester and RMPP Sender. I am the RMPP Receiver. + */ + status = osmv_txn_init(h_bind, /*tid==key */ key, key, &p_txn); + if (IB_SUCCESS != status) { + goto dispatch_rmpp_mad_done; + } + + is_init_by_peer = TRUE; + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "A new sender-initiated transfer (TID=0x%" PRIx64 ") started\n", + key); + } + + if (OSMV_TXN_RMPP_NONE == osmv_txn_get_rmpp_state(p_txn)) { + /* Case 1: Fall through from above. + * Case 2: When the transaction was initiated by me + * (a single request MAD), there was an uncertainty + * whether the reply will be RMPP. Now it's resolved, + * since the reply is RMPP! + */ + status = + osmv_txn_init_rmpp_receiver(h_bind, p_txn, is_init_by_peer); + if (IB_SUCCESS != status) { + goto dispatch_rmpp_mad_done; + } + } + + switch (osmv_txn_get_rmpp_state(p_txn)) { + + case OSMV_TXN_RMPP_RECEIVER: + status = + __osmv_dispatch_rmpp_rcv(h_bind, p_mad, p_txn, p_mad_addr); + if (IB_SUCCESS != status) { + if (FALSE == osmv_txn_is_rmpp_init_by_peer(p_txn)) { + /* This is a requester, still waiting for the reply. Apply the callback */ + /* update the status of the p_madw */ + p_madw = osmv_txn_get_madw(p_txn); + p_madw->status = status; + p_bo->send_err_cb(p_bo->cb_context, p_madw); + } + + /* ABORT/STOP/LOCAL ERROR */ + osmv_txn_done(h_bind, osmv_txn_get_key(p_txn), FALSE); + } + break; + + case OSMV_TXN_RMPP_SENDER: + __osmv_dispatch_rmpp_snd(h_bind, p_mad, p_txn, p_mad_addr); + /* If an error happens here, it's the sender thread to cleanup the txn */ + break; + + default: + CL_ASSERT(FALSE); + } + +dispatch_rmpp_mad_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +/* + * NAME __osmv_dispatch_rmpp_snd() + * + * DESCRIPTION MAD handling by an RMPP sender (ACK/ABORT/STOP) + */ + +static void +__osmv_dispatch_rmpp_snd(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr) +{ + osmv_rmpp_send_ctx_t *p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn); + + uint32_t old_wl = p_send_ctx->window_last; + uint32_t total_segs = osmv_rmpp_send_ctx_get_num_segs(p_send_ctx); + uint32_t seg_num = cl_ntoh32(((ib_rmpp_mad_t *) p_mad)->seg_num); + uint32_t new_wl = cl_ntoh32(((ib_rmpp_mad_t *) p_mad)->paylen_newwin); + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + if (TRUE == osmv_rmpp_is_abort_stop(p_mad)) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_dispatch_rmpp_snd: ERR 6502: " + "The remote side sent an ABORT/STOP indication.\n"); + osmv_rmpp_snd_error(p_send_ctx, IB_REMOTE_ERROR); + goto dispatch_rmpp_snd_done; + } + + if (FALSE == osmv_rmpp_is_ack(p_mad)) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Not supposed to receive DATA packets --> dropping the MAD\n"); + goto dispatch_rmpp_snd_done; + } + + /* Continue processing the ACK */ + if (seg_num > old_wl) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_dispatch_rmpp_snd: ERR 6503: " + "ACK received for a non-sent segment %d\n", seg_num); + + osmv_rmpp_send_nak(h_bind, p_mad, p_mad_addr, + IB_RMPP_TYPE_ABORT, IB_RMPP_STATUS_S2B); + + osmv_rmpp_snd_error(p_send_ctx, IB_REMOTE_ERROR); + goto dispatch_rmpp_snd_done; + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_dispatch_rmpp_snd: " + "New WL = %u Old WL = %u Total Segs = %u\n", + new_wl, old_wl, total_segs); + + if (new_wl < old_wl) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_dispatch_rmpp_snd: ERR 6508: " + "The receiver requests a smaller WL (%d) than before (%d)\n", + new_wl, old_wl); + + osmv_rmpp_send_nak(h_bind, p_mad, p_mad_addr, + IB_RMPP_TYPE_ABORT, IB_RMPP_STATUS_W2S); + + osmv_rmpp_snd_error(p_send_ctx, IB_REMOTE_ERROR); + goto dispatch_rmpp_snd_done; + } + + /* Update the sender's window, and optionally wake up the sender thread + * Note! A single ACK can acknowledge a whole range of segments: [WF..SEG_NUM] + */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "ACK for seg_num #%d accepted.\n", seg_num); + + if (seg_num == old_wl) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "The send window [%d:%d] is totally acknowledged.\n", + p_send_ctx->window_first, old_wl); + + p_send_ctx->window_first = seg_num + 1; + p_send_ctx->window_last = + (new_wl < total_segs) ? new_wl : total_segs; + + /* Remove the response timeout event for the window */ + osmv_txn_remove_timeout_ev(h_bind, osmv_txn_get_key(p_txn)); + + /* Wake up the sending thread */ + cl_event_signal(&p_send_ctx->event); + } + +dispatch_rmpp_snd_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +/* + * NAME __osmv_dispatch_rmpp_rcv() + * + * DESCRIPTION MAD handling by an RMPP receiver (DATA/ABORT/STOP) + */ + +static ib_api_status_t +__osmv_dispatch_rmpp_rcv(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_rmpp_recv_ctx_t *p_recv_ctx = osmv_txn_get_rmpp_recv_ctx(p_txn); + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + boolean_t is_last1 = FALSE, is_last2 = FALSE; + osm_madw_t *p_new_madw = NULL, *p_req_madw = NULL; + ib_mad_t *p_mad_buf; + uint32_t size = 0; + uint64_t key = osmv_txn_get_key(p_txn); + uint64_t tid = osmv_txn_get_tid(p_txn); + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + if (TRUE == osmv_rmpp_is_ack(p_mad)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Not supposed to receive ACK's --> dropping the MAD\n"); + + goto dispatch_rmpp_rcv_done; + } + + if (TRUE == osmv_rmpp_is_abort_stop(p_mad)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_dispatch_rmpp_rcv: ERR 6504: " + "The Remote Side stopped sending\n"); + + status = IB_REMOTE_ERROR; + goto dispatch_rmpp_rcv_done; + } + + status = __osmv_dispatch_accept_seg(h_bind, p_txn, p_mad); + switch (status) { + + case IB_SUCCESS: + + /* Check wheter this is the legal last MAD */ + /* Criteria #1: the received MAD is marked last */ + is_last1 = osmv_rmpp_is_last(p_mad); + + /* Criteria #2: the total accumulated length hits the advertised one */ + is_last2 = is_last1; + + size = osmv_rmpp_recv_ctx_get_byte_num_from_first(p_recv_ctx); + if (size > 0) { + is_last2 = + (osmv_rmpp_recv_ctx_get_cur_byte_num(p_recv_ctx) >= + size); + } + + if (is_last1 != is_last2) { + + osmv_rmpp_send_nak(h_bind, p_mad, p_mad_addr, + IB_RMPP_TYPE_ABORT, + IB_RMPP_STATUS_BAD_LEN); + + status = IB_ERROR; + goto dispatch_rmpp_rcv_done; + } + + /* TBD Consider an optimization - sending an ACK + * only for the last segment in the window + */ + __osmv_dispatch_send_ack(h_bind, p_mad, p_txn, p_mad_addr); + break; + + case IB_INSUFFICIENT_RESOURCES: + /* An out-of-order segment received. Send the ACK anyway */ + __osmv_dispatch_send_ack(h_bind, p_mad, p_txn, p_mad_addr); + status = IB_SUCCESS; + goto dispatch_rmpp_rcv_done; + + case IB_INSUFFICIENT_MEMORY: + osmv_rmpp_send_nak(h_bind, p_mad, p_mad_addr, + IB_RMPP_TYPE_STOP, IB_RMPP_STATUS_RESX); + goto dispatch_rmpp_rcv_done; + + default: + /* Illegal return code */ + CL_ASSERT(FALSE); + } + + if (TRUE != is_last1) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "RMPP MADW assembly continues, TID=0x%" PRIx64 "\n", tid); + goto dispatch_rmpp_rcv_done; + } + + /* This is the last packet. */ + if (0 == size) { + /* The total size was not advertised in the first packet */ + size = osmv_rmpp_recv_ctx_get_byte_num_from_last(p_recv_ctx); + } + + /* + NOTE: the received mad might not be >= 256 bytes. + some MADs might contain several SA records but still be + less then a full MAD. + We have to use RMPP to send them over since on a regular + "simple" MAD there is no way to know how many records were sent + */ + + /* Build the MAD wrapper to be returned to the user. + * The actual storage for the MAD is allocated there. + */ + p_new_madw = + osm_mad_pool_get(p_bo->p_osm_pool, h_bind, size, p_mad_addr); + if (NULL == p_new_madw) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_dispatch_rmpp_rcv: ERR 6506: " + "Out Of Memory - could not allocate %d bytes for the MADW\n", + size); + + status = IB_INSUFFICIENT_MEMORY; + goto dispatch_rmpp_rcv_done; + } + + p_req_madw = osmv_txn_get_madw(p_txn); + p_mad_buf = osm_madw_get_mad_ptr(p_new_madw); + status = osmv_rmpp_recv_ctx_reassemble_arbt_mad(p_recv_ctx, size, + (uint8_t *) p_mad_buf); + if (IB_SUCCESS != status) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_dispatch_rmpp_rcv: ERR 6507: " + "Internal error - could not reassemble the result MAD\n"); + goto dispatch_rmpp_rcv_done; /* What can happen here? */ + } + + /* The MAD is assembled, we are about to apply the callback. + * Delete the transaction context, unless the transaction is double sided */ + if (FALSE == osmv_txn_is_rmpp_init_by_peer(p_txn) + || FALSE == osmv_mad_is_multi_resp(p_mad)) { + + osmv_txn_done(h_bind, key, FALSE); + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "RMPP MADW %p assembly complete, TID=0x%" PRIx64 "\n", p_new_madw, + tid); + + p_mad_buf->trans_id = cl_hton64(tid); + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Restoring the original TID to 0x%" PRIx64 "\n", + cl_ntoh64(p_mad_buf->trans_id)); + + /* Finally, do the job! */ + p_bo->recv_cb(p_new_madw, p_bo->cb_context, p_req_madw); + +dispatch_rmpp_rcv_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return status; +} + +/* + * NAME __osmv_dispatch_accept_seg() + * + * DESCRIPTION Store a DATA segment at the RMPP receiver side, + * if one is received in order. + */ + +static ib_api_status_t +__osmv_dispatch_accept_seg(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, IN const ib_mad_t * p_mad) +{ + ib_api_status_t ret = IB_SUCCESS; + uint32_t seg_num = cl_ntoh32(((ib_rmpp_mad_t *) p_mad)->seg_num); + osmv_rmpp_recv_ctx_t *p_recv_ctx = osmv_txn_get_rmpp_recv_ctx(p_txn); + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + uint64_t tid = osmv_txn_get_tid(p_txn); + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + if (seg_num != p_recv_ctx->expected_seg) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "TID 0x%" PRIx64 ": can't accept this segment (%d) - " + "this is a Go-Back-N implementation\n", tid, seg_num); + return IB_INSUFFICIENT_RESOURCES; + } + + /* Store the packet's copy in the reassembly list. + * Promote the expected segment counter. + */ + ret = osmv_rmpp_recv_ctx_store_mad_seg(p_recv_ctx, (uint8_t *) p_mad); + if (IB_SUCCESS != ret) { + return ret; + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "TID 0x%" PRIx64 ": segment %d accepted\n", tid, seg_num); + p_recv_ctx->expected_seg = seg_num + 1; + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return IB_SUCCESS; +} + +/* + * NAME __osmv_dispatch_send_ack() + * + * DESCRIPTION + * + * ISSUES + * Consider sending the ACK from an async thread + * if problems with the receiving side processing arise. + */ + +static void +__osmv_dispatch_send_ack(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_req_mad, + IN osmv_txn_ctx_t * p_txn, + IN const osm_mad_addr_t * p_mad_addr) +{ + osmv_rmpp_recv_ctx_t *p_recv_ctx = osmv_txn_get_rmpp_recv_ctx(p_txn); + + /* ACK the segment # that was accepted */ + uint32_t seg_num = cl_ntoh32(((ib_rmpp_mad_t *) p_req_mad)->seg_num); + + /* NOTE! The receiver can publish the New Window Last (NWL) value + * that is greater than the total number of segments to be sent. + * It's the sender's responsibility to compute the correct number + * of segments to send in the next burst. + */ + uint32_t nwl = p_recv_ctx->expected_seg + OSMV_RMPP_RECV_WIN - 1; + + osmv_rmpp_send_ack(h_bind, p_req_mad, seg_num, nwl, p_mad_addr); +} diff --git a/libvendor/osm_vendor_mlx_hca.c b/libvendor/osm_vendor_mlx_hca.c new file mode 100644 index 0000000..910e81b --- /dev/null +++ b/libvendor/osm_vendor_mlx_hca.c @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#if defined(OSM_VENDOR_INTF_MTL) | defined(OSM_VENDOR_INTF_TS) +#undef IN +#undef OUT +#include +#include +#include +#include +#include +#include + +/******************************************************************************** + * + * Provide the functionality for selecting an HCA Port and Obtaining it's guid. + * + ********************************************************************************/ + +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; +} osm_ca_info_t; + +/********************************************************************** + * Convert the given GID to GUID by copy of it's upper 8 bytes + **********************************************************************/ +ib_api_status_t +__osm_vendor_gid_to_guid(IN u_int8_t * gid, OUT VAPI_gid_t * guid) +{ + memcpy(guid, gid + 8, 8); + return (IB_SUCCESS); +} + +/********************************************************************** + * Returns a pointer to the port attribute of the specified port + * owned by this CA. + ************************************************************************/ +static ib_port_attr_t *__osm_ca_info_get_port_attr_ptr(IN const osm_ca_info_t * + const p_ca_info, + IN const uint8_t index) +{ + return (&p_ca_info->p_attr->p_port_attr[index]); +} + +/******************************************************************************** + * get the CA names available on the system + * NOTE: user of this function needs to deallocate p_hca_ids after usage. + ********************************************************************************/ +static ib_api_status_t +__osm_vendor_get_ca_ids(IN osm_vendor_t * const p_vend, + IN VAPI_hca_id_t ** const p_hca_ids, + IN uint32_t * const p_num_guids) +{ + ib_api_status_t status; + VAPI_ret_t vapi_res; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_hca_ids); + CL_ASSERT(p_num_guids); + + /* first call is just to get the number */ + vapi_res = EVAPI_list_hcas(0, p_num_guids, NULL); + + /* fail ? */ + if (vapi_res == VAPI_EINVAL_PARAM) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 3D08: : " + "Bad parameter in calling: EVAPI_list_hcas. (%d)\n", + vapi_res); + status = IB_ERROR; + goto Exit; + } + + /* NO HCA ? */ + if (*p_num_guids == 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 3D09: " + "No available channel adapters.\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + /* allocate and really call - user of this function needs to deallocate it */ + *p_hca_ids = + (VAPI_hca_id_t *) malloc(*p_num_guids * sizeof(VAPI_hca_id_t)); + + /* now call it really */ + vapi_res = EVAPI_list_hcas(*p_num_guids, p_num_guids, *p_hca_ids); + + /* too many ? */ + if (vapi_res == VAPI_EAGAIN) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 3D10: " + "More CA GUIDs than allocated array (%d).\n", + *p_num_guids); + status = IB_ERROR; + goto Exit; + } + + /* fail ? */ + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 3D11: : " + "Bad parameter in calling: EVAPI_list_hcas.\n"); + status = IB_ERROR; + goto Exit; + } + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_vendor_get_ca_ids: " + "Detected %u local channel adapters.\n", *p_num_guids); + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Initialize an Info Struct for the Given HCA by its Id + **********************************************************************/ +static ib_api_status_t +__osm_ca_info_init(IN osm_vendor_t * const p_vend, + IN VAPI_hca_id_t ca_id, OUT osm_ca_info_t * const p_ca_info) +{ + ib_api_status_t status = IB_ERROR; + VAPI_ret_t vapi_res; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_vendor_t hca_vendor; + VAPI_hca_cap_t hca_cap; + VAPI_hca_port_t hca_port; + uint8_t port_num; + IB_gid_t *p_port_gid; + uint16_t maxNumGids; + + OSM_LOG_ENTER(p_vend->p_log); + + /* get the HCA handle */ + vapi_res = EVAPI_get_hca_hndl(ca_id, &hca_hndl); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 3D05: " + "Fail to get HCA handle (%u).\n", vapi_res); + goto Exit; + } + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_ca_info_init: " "Querying CA %s.\n", ca_id); + } + + /* query and get the HCA capability */ + vapi_res = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 3D06: " + "Fail to get HCA Capabilities (%u).\n", vapi_res); + goto Exit; + } + + /* get the guid of the HCA */ + memcpy(&(p_ca_info->guid), hca_cap.node_guid, 8 * sizeof(u_int8_t)); + p_ca_info->attr_size = 1; + p_ca_info->p_attr = (ib_ca_attr_t *) malloc(sizeof(ib_ca_attr_t)); + memcpy(&(p_ca_info->p_attr->ca_guid), hca_cap.node_guid, + 8 * sizeof(u_int8_t)); + + /* now obtain the attributes of the ports */ + p_ca_info->p_attr->num_ports = hca_cap.phys_port_num; + p_ca_info->p_attr->p_port_attr = + (ib_port_attr_t *) malloc(hca_cap.phys_port_num * + sizeof(ib_port_attr_t)); + + for (port_num = 0; port_num < p_ca_info->p_attr->num_ports; port_num++) { + + /* query the port attributes */ + vapi_res = + VAPI_query_hca_port_prop(hca_hndl, port_num + 1, &hca_port); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 3D07: " + "Fail to get HCA Port Attributes (%d).\n", + vapi_res); + goto Exit; + } + + /* first call to know the size of the gid table */ + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, port_num + 1, 0, + &maxNumGids, NULL); + p_port_gid = (IB_gid_t *) malloc(maxNumGids * sizeof(IB_gid_t)); + + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, port_num + 1, maxNumGids, + &maxNumGids, p_port_gid); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 3D12: " + "Fail to get HCA Port GID (%d).\n", vapi_res); + goto Exit; + } + + __osm_vendor_gid_to_guid(p_port_gid[0], + (IB_gid_t *) & p_ca_info->p_attr-> + p_port_attr[port_num].port_guid); + p_ca_info->p_attr->p_port_attr[port_num].lid = hca_port.lid; + p_ca_info->p_attr->p_port_attr[port_num].link_state = + hca_port.state; + p_ca_info->p_attr->p_port_attr[port_num].sm_lid = + hca_port.sm_lid; + + free(p_port_gid); + } + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void +osm_ca_info_destroy(IN osm_vendor_t * const p_vend, + IN osm_ca_info_t * const p_ca_info, IN uint8_t num_ca) +{ + osm_ca_info_t *p_ca; + uint8_t i; + + OSM_LOG_ENTER(p_vend->p_log); + + for (i = 0; i < num_ca; i++) { + p_ca = &p_ca_info[i]; + + if (NULL != p_ca->p_attr) { + if (0 != p_ca->p_attr->num_ports) { + free(p_ca->p_attr->p_port_attr); + } + + free(p_ca->p_attr); + } + } + + free(p_ca_info); + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** + * Fill in the array of port_attr with all available ports on ALL the + * avilable CAs on this machine. + * ALSO - + * Update the vendor object list of ca_info structs + **********************************************************************/ +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports) +{ + ib_api_status_t status; + + uint32_t ca; + uint32_t ca_count = 0; + uint32_t port_count = 0; + uint8_t port_num; + uint32_t total_ports = 0; + VAPI_hca_id_t *p_ca_ids = NULL; + osm_ca_info_t *p_ca_infos = NULL; + uint32_t attr_array_sz = *p_num_ports; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* determine the number of CA's */ + status = __osm_vendor_get_ca_ids(p_vend, &p_ca_ids, &ca_count); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 3D13: " + "Fail to get CA Ids.\n"); + goto Exit; + } + + /* Allocate an array big enough to hold the ca info objects */ + p_ca_infos = malloc(ca_count * sizeof(osm_ca_info_t)); + if (p_ca_infos == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 3D14: " + "Unable to allocate CA information array.\n"); + goto Exit; + } + + memset(p_ca_infos, 0, ca_count * sizeof(osm_ca_info_t)); + + /* + * For each CA, retrieve the CA info attributes + */ + for (ca = 0; ca < ca_count; ca++) { + status = + __osm_ca_info_init(p_vend, p_ca_ids[ca], &p_ca_infos[ca]); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 3D15: " + "Unable to initialize CA Info object (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + total_ports += p_ca_infos[ca].p_attr->num_ports; + } + + *p_num_ports = total_ports; + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get_all_port_attr: total ports:%u \n", total_ports); + + /* + * If the user supplied enough storage, return the port guids, + * otherwise, return the appropriate error. + */ + if (attr_array_sz >= total_ports) { + for (ca = 0; ca < ca_count; ca++) { + uint32_t num_ports; + + num_ports = p_ca_infos[ca].p_attr->num_ports; + + for (port_num = 0; port_num < num_ports; port_num++) { + p_attr_array[port_count] = + *__osm_ca_info_get_port_attr_ptr(&p_ca_infos + [ca], + port_num); + port_count++; + } + } + } else { + status = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + if (p_ca_ids) + free(p_ca_ids); + + if (p_ca_infos) { + osm_ca_info_destroy(p_vend, p_ca_infos, ca_count); + } + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Given the vendor obj and a guid + * return the ca id and port number that have that guid + **********************************************************************/ + +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, + IN ib_net64_t const guid, + OUT VAPI_hca_hndl_t * p_hca_hndl, + OUT VAPI_hca_id_t * p_hca_id, + OUT uint8_t * p_hca_idx, + OUT uint32_t * p_port_num) +{ + + ib_api_status_t status; + VAPI_hca_id_t *p_ca_ids = NULL; + VAPI_ret_t vapi_res; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_vendor_t hca_vendor; + VAPI_hca_cap_t hca_cap; + IB_gid_t *p_port_gid = NULL; + uint16_t maxNumGids; + ib_net64_t port_guid; + uint32_t ca, portIdx, ca_count; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* + * 1) Determine the number of CA's + * 2) Allocate an array big enough to hold the ca info objects. + * 3) Call again to retrieve the guids. + */ + status = __osm_vendor_get_ca_ids(p_vend, &p_ca_ids, &ca_count); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 3D16: " + "Fail to get CA Ids.\n"); + goto Exit; + } + + /* + * For each CA, retrieve the CA info attributes + */ + for (ca = 0; ca < ca_count; ca++) { + /* get the HCA handle */ + vapi_res = EVAPI_get_hca_hndl(p_ca_ids[ca], &hca_hndl); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 3D17: " + "Fail to get HCA handle (%u).\n", vapi_res); + goto Exit; + } + + /* get the CA attributes - to know how many ports it has: */ + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get_guid_ca_and_port: " + "Querying CA %s.\n", p_ca_ids[ca]); + } + + /* query and get the HCA capability */ + vapi_res = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 3D18: " + "Fail to get HCA Capabilities (%u).\n", + vapi_res); + goto Exit; + } + + /* go over all ports - to obtail their guids */ + for (portIdx = 0; portIdx < hca_cap.phys_port_num; portIdx++) { + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, portIdx + 1, 0, + &maxNumGids, NULL); + p_port_gid = + (IB_gid_t *) malloc(maxNumGids * sizeof(IB_gid_t)); + + /* get the port guid */ + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, portIdx + 1, + maxNumGids, &maxNumGids, + p_port_gid); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 3D19: " + "Fail to get HCA Port GID (%d).\n", + vapi_res); + goto Exit; + } + + /* convert to SF style */ + __osm_vendor_gid_to_guid(p_port_gid[0], + (VAPI_gid_t *) & port_guid); + + /* finally did we find it ? */ + if (port_guid == guid) { + *p_hca_hndl = hca_hndl; + memcpy(p_hca_id, p_ca_ids[ca], + sizeof(VAPI_hca_id_t)); + *p_hca_idx = ca; + *p_port_num = portIdx + 1; + status = IB_SUCCESS; + goto Exit; + } + + free(p_port_gid); + p_port_gid = NULL; + } /* ALL PORTS */ + } /* all HCAs */ + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 3D20: " + "Fail to find HCA and Port for Port Guid 0x%" PRIx64 "\n", + cl_ntoh64(guid)); + status = IB_INVALID_GUID; + +Exit: + if (p_ca_ids != NULL) + free(p_ca_ids); + if (p_port_gid != NULL) + free(p_port_gid); + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +#endif diff --git a/libvendor/osm_vendor_mlx_hca_anafa.c b/libvendor/osm_vendor_mlx_hca_anafa.c new file mode 100644 index 0000000..a12ed2b --- /dev/null +++ b/libvendor/osm_vendor_mlx_hca_anafa.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#if defined(OSM_VENDOR_INTF_ANAFA) +#undef IN +#undef OUT + +#include +#include +#include + +#include +#include +#include + +#include +#include + +/******************************************************************************** + * + * Provide the functionality for selecting an HCA Port and Obtaining it's guid. + * + ********************************************************************************/ + +typedef struct _osm_ca_info { + /* ib_net64_t guid; ?? */ + /* size_t attr_size; ?? */ + ib_ca_attr_t attr; +} osm_ca_info_t; + +/********************************************************************** + * Convert the given GID to GUID by copy of it's upper 8 bytes + **********************************************************************/ +ib_api_status_t +__osm_vendor_gid_to_guid(IN tTS_IB_GID gid, OUT ib_net64_t * p_guid) +{ + memcpy(p_guid, gid + 8, 8); + return (IB_SUCCESS); +} + +/********************************************************************** + * Initialize an Info Struct for the Given HCA by its Id + **********************************************************************/ +static ib_api_status_t +__osm_ca_info_init(IN osm_vendor_t * const p_vend, + OUT osm_ca_info_t * const p_ca_info) +{ + ib_api_status_t status = IB_ERROR; + int ioctl_ret = 0; + osmv_TOPSPIN_ANAFA_transport_info_t *p_tpot_info = + p_vend->p_transport_info; + osm_ts_gid_entry_ioctl gid_ioctl; + osm_ts_get_port_info_ioctl port_info; + struct ib_get_dev_info_ioctl dev_info; + + OSM_LOG_ENTER(p_vend->p_log); + + /* query HCA guid */ + ioctl_ret = ioctl(p_tpot_info->device_fd, TS_IB_IOCGDEVINFO, &dev_info); + if (ioctl_ret != 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 7001: " + "Fail to get HCA Capabilities (%d).\n", ioctl_ret); + goto Exit; + } + + memcpy(&(p_ca_info->attr.ca_guid), dev_info.dev_info.node_guid, + 8 * sizeof(uint8_t)); + +/* now obtain the attributes of the ports - on our case port 1*/ + + p_ca_info->attr.num_ports = 1; + p_ca_info->attr.p_port_attr = + (ib_port_attr_t *) malloc(1 * sizeof(ib_port_attr_t)); + + port_info.port = 1; + ioctl_ret = + ioctl(p_tpot_info->device_fd, TS_IB_IOCGPORTINFO, &port_info); + if (ioctl_ret) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 7002: " + "Fail to get HCA Port Attributes (%d).\n", ioctl_ret); + goto Exit; + } + + gid_ioctl.port = 1; + gid_ioctl.index = 0; + ioctl_ret = + ioctl(p_tpot_info->device_fd, TS_IB_IOCGGIDENTRY, &gid_ioctl); + if (ioctl_ret) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 7003: " + "Fail to get HCA Port GID (%d).\n", ioctl_ret); + goto Exit; + } + + __osm_vendor_gid_to_guid(gid_ioctl.gid_entry, + &(p_ca_info->attr.p_port_attr[0].port_guid)); + p_ca_info->attr.p_port_attr[0].lid = port_info.port_info.lid; + p_ca_info->attr.p_port_attr[0].link_state = + port_info.port_info.port_state; + p_ca_info->attr.p_port_attr[0].sm_lid = port_info.port_info.sm_lid; + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Fill in port_attr + * ALSO - + * Update the vendor object list of ca_info structs + **********************************************************************/ +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports) +{ + ib_api_status_t status; + osm_ca_info_t ca_info; + uint32_t attr_array_sz = *p_num_ports; + + OSM_LOG_ENTER(p_vend->p_log); + CL_ASSERT(p_vend); + + /* anafa has one port - the user didnt supply enough storage space */ + if (attr_array_sz < 1) { + status = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + + /* + * retrieve the CA info attributes + */ + status = __osm_ca_info_init(p_vend, &ca_info); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 7004: " + "Unable to initialize CA Info object (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + + *p_num_ports = 1; + + p_attr_array[0] = ca_info.attr.p_port_attr[0]; /* anafa has only one port */ + status = IB_SUCCESS; + +Exit: + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +#endif diff --git a/libvendor/osm_vendor_mlx_hca_pfs.c b/libvendor/osm_vendor_mlx_hca_pfs.c new file mode 100644 index 0000000..1190e9c --- /dev/null +++ b/libvendor/osm_vendor_mlx_hca_pfs.c @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#if defined(OSM_VENDOR_INTF_MTL) | defined(OSM_VENDOR_INTF_TS) +#undef IN +#undef OUT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/******************************************************************************** + * + * Provides the functionality for selecting an HCA Port and Obtaining it's guid. + * This version is based on /proc/infiniband file system. So it is limited to + * The gen1 of openib.org stack. + * + ********************************************************************************/ + +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; + +} osm_ca_info_t; + +/********************************************************************** + * Returns a pointer to the port attribute of the specified port + * owned by this CA. + ************************************************************************/ +static ib_port_attr_t *__osm_ca_info_get_port_attr_ptr(IN const osm_ca_info_t * + const p_ca_info, + IN const uint8_t index) +{ + return (&p_ca_info->p_attr->p_port_attr[index]); +} + +/********************************************************************** + * Obtain the number of local CAs by scanning /proc/infiniband/core + **********************************************************************/ +int __hca_pfs_get_num_cas() +{ + int num_cas = 0; + DIR *dp; + struct dirent *ep; + + dp = opendir("/proc/infiniband/core"); + if (dp != NULL) { + while ((ep = readdir(dp))) { + /* CAs are directories with the format ca[1-9][0-9]* */ + if ((ep->d_type == DT_DIR) + && !strncmp(ep->d_name, "ca", 2)) { + num_cas++; + } + } + closedir(dp); + } + return num_cas; +} + +/* + name: InfiniHost0 + provider: tavor + node GUID: 0002:c900:0120:3470 + ports: 2 + vendor ID: 0x2c9 + device ID: 0x5a44 + HW revision: 0xa1 + FW revision: 0x300020080 +*/ +typedef struct _pfs_ca_info { + char name[32]; + char provider[32]; + uint64_t guid; + uint8_t num_ports; + uint32_t vend_id; + uint16_t dev_id; + uint16_t rev_id; + uint64_t fw_rev; +} pfs_ca_info_t; + +/********************************************************************** + * Parse the CA Info file available in /proc/infiniband/core/caN/info + **********************************************************************/ +static ib_api_status_t +__parse_ca_info_file(IN osm_vendor_t * const p_vend, + IN uint32_t idx, OUT pfs_ca_info_t * pfs_ca_info) +{ + ib_api_status_t status = IB_ERROR; + int info_file; + char file_name[256]; + char file_buffer[3200]; + char *p_ch; + int g1, g2, g3, g4; + int num_ports; + uint32_t len; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_ca_info_file: " "Querying CA %d.\n", idx); + + /* we use the proc file system so we must be able to open the info file .. */ + sprintf(file_name, "/proc/infiniband/core/ca%d/info", idx); + info_file = open(file_name, O_RDONLY); + if (!info_file) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5205: " + "Fail to open HCA:%d info file:(%s).\n", idx, + file_name); + goto Exit; + } + + /* read in the file */ + len = read(info_file, file_buffer, 3200); + close(info_file); + file_buffer[len] = '\0'; + + /* + parse the file ... + name: InfiniHost0 + provider: tavor + node GUID: 0002:c900:0120:3470 + ports: 2 + vendor ID: 0x2c9 + device ID: 0x5a44 + HW revision: 0xa1 + FW revision: 0x300020080 + */ + if (!(p_ch = strstr(file_buffer, "name:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5206: " + "Fail to obtain HCA name. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "name: %s", pfs_ca_info->name) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5207: " + "Fail to parse name in info file:(%s).\n", p_ch); + goto Exit; + } + + /* get the guid of the HCA */ + if (!(p_ch = strstr(file_buffer, "node GUID:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5208: " + "Fail to obtain GUID in info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "node GUID: %x:%x:%x:%x", &g1, &g2, &g3, &g4) != 4) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5209: " + "Fail to parse GUID in info file:(%s).\n", p_ch); + goto Exit; + } + pfs_ca_info->guid = (uint64_t) g1 << 48 | (uint64_t) g1 << 32 + | (uint64_t) g1 << 16 | (uint64_t) g3; + + /* obtain number of ports */ + if (!(p_ch = strstr(file_buffer, "ports:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5210: " + "Fail to obtain number of ports in info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "ports: %d", &num_ports) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5211: " + "Fail to parse num ports in info file:(%s).\n", p_ch); + goto Exit; + } + pfs_ca_info->num_ports = num_ports; + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_ca_info_file: " + "CA1 = name:%s guid:0x%016llx ports:%d\n", + pfs_ca_info->name, pfs_ca_info->guid, pfs_ca_info->num_ports); + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return status; +} + +/* + state: ACTIVE + LID: 0x0001 + LMC: 0x0000 + SM LID: 0x0001 + SM SL: 0x0000 + Capabilities: IsSM + IsTrapSupported + IsAutomaticMigrationSupported + IsSLMappingSupported + IsLEDInfoSupported + IsSystemImageGUIDSupported + IsVendorClassSupported + IsCapabilityMaskNoticeSupported +*/ +typedef struct _pfs_port_info { + uint8_t state; + uint16_t lid; + uint8_t lmc; + uint16_t sm_lid; + uint8_t sm_sl; +} pfs_port_info_t; + +/********************************************************************** + * Parse the Port Info file available in /proc/infiniband/core/caN/portM/info + * Port num is 1..N + **********************************************************************/ +static ib_api_status_t +__parse_port_info_file(IN osm_vendor_t * const p_vend, + IN uint32_t hca_idx, + IN uint8_t port_num, OUT pfs_port_info_t * pfs_port_info) +{ + ib_api_status_t status = IB_ERROR; + int info_file; + char file_name[256]; + char file_buffer[3200]; + char state[12]; + char *p_ch; + int lid, sm_lid, lmc, sm_sl; + uint32_t len; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_port_info_file: " + "Parsing Proc File System Port Info CA %d Port %d.\n", hca_idx, + port_num); + + /* we use the proc file system so we must be able to open the info file .. */ + sprintf(file_name, "/proc/infiniband/core/ca%d/port%d/info", hca_idx, + port_num); + info_file = open(file_name, O_RDONLY); + if (!info_file) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5212: " + "Fail to open HCA:%d Port:%d info file:(%s).\n", + hca_idx, port_num, file_name); + goto Exit; + } + + /* read in the file */ + len = read(info_file, file_buffer, 3200); + close(info_file); + file_buffer[len] = '\0'; + + /* + parse the file ... + state: ACTIVE + LID: 0x0001 + LMC: 0x0000 + SM LID: 0x0001 + SM SL: 0x0000 + ... + */ + if (!(p_ch = strstr(file_buffer, "state:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5213: " + "Fail to obtain port state. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "state: %s", state) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5214: " + "Fail to parse state from info file:(%s).\n", p_ch); + goto Exit; + } + + if (!strcmp(state, "ACTIVE")) + pfs_port_info->state = IB_LINK_ACTIVE; + else if (!strcmp(state, "DOWN")) + pfs_port_info->state = IB_LINK_DOWN; + else if (!strcmp(state, "INIT")) + pfs_port_info->state = IB_LINK_INIT; + else if (!strcmp(state, "ARMED")) + pfs_port_info->state = IB_LINK_ARMED; + else + pfs_port_info->state = 0; + + /* get lid */ + if (!(p_ch = strstr(file_buffer, "LID:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5215: " + "Fail to obtain port lid. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "LID: %x", &lid) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5216: " + "Fail to parse lid from info file:(%s).\n", p_ch); + goto Exit; + } + pfs_port_info->lid = lid; + /* get LMC */ + if (!(p_ch = strstr(file_buffer, "LMC:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5217: " + "Fail to obtain port LMC. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "LMC: %x", &lmc) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5218: " + "Fail to parse LMC from info file:(%s).\n", p_ch); + goto Exit; + } + pfs_port_info->lmc = lmc; + + /* get SM LID */ + if (!(p_ch = strstr(file_buffer, "SM LID:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5219: " + "Fail to obtain port SM LID. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "SM LID: %x", &sm_lid) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5220: " + "Fail to parse SM LID from info file:(%s).\n", p_ch); + goto Exit; + } + pfs_port_info->sm_lid = sm_lid; + + /* get SM LID */ + if (!(p_ch = strstr(file_buffer, "SM SL:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5221: " + "Fail to obtain port SM SL. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "SM SL: %x", &sm_sl) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5222: " + "Fail to parse SM SL from info file:(%s).\n", p_ch); + goto Exit; + } + pfs_port_info->sm_sl = sm_sl; + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_port_info_file: " + "Obtained Port:%d = state:%d, lid:0x%04X, lmc:%d, sm_lid:0x%04X, sm_sl:%d\n", + port_num, pfs_port_info->state, pfs_port_info->lid, + pfs_port_info->lmc, pfs_port_info->sm_lid, + pfs_port_info->sm_sl); + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return status; +} + +/********************************************************************** + * Parse the port guid_tbl file to obtain the port guid. + * File format is: + * [ 0] fe80:0000:0000:0000:0002:c900:0120:3472 + **********************************************************************/ +static ib_api_status_t +__get_port_guid_from_port_gid_tbl(IN osm_vendor_t * const p_vend, + IN uint32_t hca_idx, + IN uint8_t port_num, OUT uint64_t * port_guid) +{ + ib_api_status_t status = IB_ERROR; + int info_file; + char file_name[256]; + char file_buffer[3200]; + char *p_ch; + int g[8]; + uint32_t len; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__get_port_guid_from_port_gid_tbl: " + "Parsing Proc File System Port Guid Table CA %d Port %d.\n", + hca_idx, port_num); + + /* we use the proc file system so we must be able to open the info file .. */ + sprintf(file_name, "/proc/infiniband/core/ca%d/port%d/gid_table", + hca_idx, port_num); + info_file = open(file_name, O_RDONLY); + if (!info_file) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__get_port_guid_from_port_gid_tbl: ERR 5223: " + "Fail to open HCA:%d Port:%d gid_table file:(%s).\n", + hca_idx, port_num, file_name); + goto Exit; + } + + /* read in the file */ + len = read(info_file, file_buffer, 3200); + close(info_file); + file_buffer[len] = '\0'; + + /* + parse the file ... + [ 0] fe80:0000:0000:0000:0002:c900:0120:3472 + ... + */ + if (!(p_ch = strstr(file_buffer, "[ 0]"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__get_port_guid_from_port_gid_tbl: ERR 5224: " + "Fail to obtain first gid index. In gid_table file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch + 6, "%x:%x:%x:%x:%x:%x:%x:%x", + &g[7], &g[6], &g[5], &g[4], &g[3], &g[2], &g[1], &g[0]) != 8) + { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__get_port_guid_from_port_gid_tbl: ERR 5225: " + "Fail to parse gid from gid_table file:(%s).\n", p_ch); + goto Exit; + } + + *port_guid = + (uint64_t) g[3] << 48 | (uint64_t) g[2] << 32 | (uint64_t) g[1] << + 16 | g[0]; + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return status; +} + +/********************************************************************** + * Initialize an Info Struct for the Given HCA by its index 1..N + **********************************************************************/ +static ib_api_status_t +__osm_ca_info_init(IN osm_vendor_t * const p_vend, + IN uint32_t const idx, OUT osm_ca_info_t * const p_ca_info) +{ + ib_api_status_t status = IB_ERROR; + uint8_t port_num; + uint64_t port_guid; + + pfs_ca_info_t pfs_ca_info; + + OSM_LOG_ENTER(p_vend->p_log); + + /* parse the CA info file */ + if (__parse_ca_info_file(p_vend, idx, &pfs_ca_info) != IB_SUCCESS) + goto Exit; + + p_ca_info->guid = cl_hton64(pfs_ca_info.guid); + + /* set size of attributes and allocate them */ + p_ca_info->attr_size = 1; + p_ca_info->p_attr = (ib_ca_attr_t *) malloc(sizeof(ib_ca_attr_t)); + + p_ca_info->p_attr->ca_guid = p_ca_info->guid; + p_ca_info->p_attr->num_ports = pfs_ca_info.num_ports; + + /* now obtain the attributes of the ports */ + p_ca_info->p_attr->p_port_attr = + (ib_port_attr_t *) malloc(pfs_ca_info.num_ports * + sizeof(ib_port_attr_t)); + + /* get all the ports info */ + for (port_num = 1; port_num <= pfs_ca_info.num_ports; port_num++) { + pfs_port_info_t pfs_port_info; + /* query the port attributes */ + if (__parse_port_info_file + (p_vend, idx, port_num, &pfs_port_info)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 5226: " + "Fail to get HCA:%d Port:%d Attributes.\n", idx, + port_num); + goto Exit; + } + + /* HACK: the lids should have been converted to network but the rest of the code + is wrong and provdes them as is (host order) - so we stick with it. */ + p_ca_info->p_attr->p_port_attr[port_num - 1].lid = + pfs_port_info.lid; + p_ca_info->p_attr->p_port_attr[port_num - 1].link_state = + pfs_port_info.state; + p_ca_info->p_attr->p_port_attr[port_num - 1].sm_lid = + pfs_port_info.sm_lid; + + /* get the port guid */ + if (__get_port_guid_from_port_gid_tbl + (p_vend, idx, port_num, &port_guid)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 5227: " + "Fail to get HCA:%d Port:%d Guid.\n", idx, + port_num); + goto Exit; + } + p_ca_info->p_attr->p_port_attr[port_num - 1].port_guid = + cl_hton64(port_guid); + } + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void +osm_ca_info_destroy(IN osm_vendor_t * const p_vend, + IN osm_ca_info_t * const p_ca_info, IN uint8_t num_ca) +{ + osm_ca_info_t *p_ca; + uint8_t i; + + OSM_LOG_ENTER(p_vend->p_log); + + for (i = 0; i < num_ca; i++) { + p_ca = &p_ca_info[i]; + + if (NULL != p_ca->p_attr) { + if (0 != p_ca->p_attr->num_ports) { + free(p_ca->p_attr->p_port_attr); + } + + free(p_ca->p_attr); + } + } + + free(p_ca_info); + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** + * Fill in the array of port_attr with all available ports on ALL the + * avilable CAs on this machine. + **********************************************************************/ +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports) +{ + ib_api_status_t status = IB_SUCCESS; + + uint32_t caIdx; + uint32_t ca_count = 0; + uint32_t port_count = 0; + uint8_t port_num; + uint32_t total_ports = 0; + osm_ca_info_t *p_ca_infos = NULL; + uint32_t attr_array_sz = *p_num_ports; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* determine the number of CA's */ + ca_count = __hca_pfs_get_num_cas(); + if (!ca_count) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 5228: " + "Fail to get Any CA Ids.\n"); + goto Exit; + } + + /* Allocate an array big enough to hold the ca info objects */ + p_ca_infos = malloc(ca_count * sizeof(osm_ca_info_t)); + if (p_ca_infos == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 5229: " + "Unable to allocate CA information array.\n"); + goto Exit; + } + + memset(p_ca_infos, 0, ca_count * sizeof(osm_ca_info_t)); + + /* + * For each CA, retrieve the CA info attributes + */ + for (caIdx = 1; caIdx <= ca_count; caIdx++) { + status = + __osm_ca_info_init(p_vend, caIdx, &p_ca_infos[caIdx - 1]); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 5230: " + "Unable to initialize CA Info object (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + total_ports += p_ca_infos[caIdx - 1].p_attr->num_ports; + } + + *p_num_ports = total_ports; + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get_all_port_attr: total ports:%u \n", total_ports); + + /* + * If the user supplied enough storage, return the port guids, + * otherwise, return the appropriate error. + */ + if (attr_array_sz >= total_ports) { + for (caIdx = 1; caIdx <= ca_count; caIdx++) { + uint32_t num_ports; + + num_ports = p_ca_infos[caIdx - 1].p_attr->num_ports; + + for (port_num = 0; port_num < num_ports; port_num++) { + p_attr_array[port_count] = + *__osm_ca_info_get_port_attr_ptr(&p_ca_infos + [caIdx - + 1], + port_num); + port_count++; + } + } + } else { + status = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + if (p_ca_infos) { + osm_ca_info_destroy(p_vend, p_ca_infos, ca_count); + } + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Given the vendor obj and a port guid + * return the ca id and port number that have that guid + **********************************************************************/ + +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, + IN ib_net64_t const guid, + OUT uint32_t * p_hca_hndl, + OUT char *p_hca_id, + OUT uint8_t * p_hca_idx, + OUT uint32_t * p_port_num) +{ + uint32_t caIdx; + uint32_t ca_count = 0; + uint8_t port_num; + ib_api_status_t status = IB_ERROR; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* determine the number of CA's */ + ca_count = __hca_pfs_get_num_cas(); + if (!ca_count) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 5231: " + "Fail to get Any CA Ids.\n"); + goto Exit; + } + + /* + * For each CA, retrieve the CA info attributes + */ + for (caIdx = 1; caIdx <= ca_count; caIdx++) { + pfs_ca_info_t pfs_ca_info; + if (__parse_ca_info_file(p_vend, caIdx, &pfs_ca_info) == + IB_SUCCESS) { + /* get all the ports info */ + for (port_num = 1; port_num <= pfs_ca_info.num_ports; + port_num++) { + uint64_t port_guid; + if (!__get_port_guid_from_port_gid_tbl + (p_vend, caIdx, port_num, &port_guid)) { + if (cl_hton64(port_guid) == guid) { + osm_log(p_vend->p_log, + OSM_LOG_DEBUG, + "osm_vendor_get_guid_ca_and_port: " + "Found Matching guid on HCA:%d Port:%d.\n", + caIdx, port_num); + strcpy(p_hca_id, + pfs_ca_info.name); + *p_port_num = port_num; + *p_hca_idx = caIdx - 1; + *p_hca_hndl = 0; + status = IB_SUCCESS; + goto Exit; + } + } + } + } + } + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 5232: " + "Fail to find HCA and Port for Port Guid 0x%" PRIx64 "\n", + cl_ntoh64(guid)); + status = IB_INVALID_GUID; + +Exit: + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +#endif diff --git a/libvendor/osm_vendor_mlx_hca_sim.c b/libvendor/osm_vendor_mlx_hca_sim.c new file mode 100644 index 0000000..a3055a3 --- /dev/null +++ b/libvendor/osm_vendor_mlx_hca_sim.c @@ -0,0 +1,859 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#if defined(OSM_VENDOR_INTF_SIM) +#undef IN +#undef OUT + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/****************************************************************************** +* +* Provides the functionality for selecting an HCA Port and Obtaining it's guid. +* This version is based on $IBMGTSIM_DIR/$IBMGTSIM_NODE file system. +* This is a mimic of the OpenIB gen1 file system +* +******************************************************************************/ + +char *__get_simulator_dir(void) +{ + static char *ibmgtSimDir = NULL; + static char *defaultIbmgtSimDir = "/tmp/ibmgtsim"; + static char *ibmgtSimNode = NULL; + static char dirName[1024]; + + /* we use the first pointer to know if we were here */ + if (ibmgtSimDir == NULL) { + /* obtain the simulator directory */ + ibmgtSimDir = getenv("IBMGTSIM_DIR"); + if (ibmgtSimDir == NULL) { + printf + ("-W- Environment variable: IBMGTSIM_DIR does not exist.\n"); + printf + (" Please create one used by the simulator.\n"); + printf(" Using /tmp/ibmgtsim as default.\n"); + ibmgtSimDir = defaultIbmgtSimDir; + } + + /* obtain the node name we simulate */ + ibmgtSimNode = getenv("IBMGTSIM_NODE"); + if (ibmgtSimNode == NULL) { + printf + ("-W- Environment variable: IBMGTSIM_NODE does not exist.\n"); + printf + (" This variable should be the name of the node you wish to simulate.\n"); + printf(" Using H-1 as default.\n"); + ibmgtSimNode = "H-1"; + } + sprintf(dirName, "%s/%s", ibmgtSimDir, ibmgtSimNode); + } + + return dirName; +} + +typedef struct _osm_ca_info { + ib_net64_t guid; + size_t attr_size; + ib_ca_attr_t *p_attr; + +} osm_ca_info_t; + +/********************************************************************** + * Returns a pointer to the port attribute of the specified port + * owned by this CA. + ************************************************************************/ +static ib_port_attr_t *__osm_ca_info_get_port_attr_ptr(IN const osm_ca_info_t * + const p_ca_info, + IN const uint8_t index) +{ + return (&p_ca_info->p_attr->p_port_attr[index]); +} + +/********************************************************************** + * Obtain the number of local CAs by scanning /proc/infiniband/core + **********************************************************************/ +int __hca_sim_get_num_cas(void) +{ + int num_cas = 0; + DIR *dp; + struct dirent *ep; + + dp = opendir(__get_simulator_dir()); + + if (dp != NULL) { + while ((ep = readdir(dp))) { + /* CAs are directories with the format ca[1-9][0-9]* */ + /* if ((ep->d_type == DT_DIR) && !strncmp(ep->d_name, "ca", 2)) */ + if (!strncmp(ep->d_name, "ca", 2)) { + num_cas++; + } + } + closedir(dp); + } else { + printf("__hca_sim_get_num_cas: ERROR: fail to open dir %s\n", + __get_simulator_dir()); + } + + return num_cas; +} + +/* + name: InfiniHost0 + provider: tavor + node GUID: 0002:c900:0120:3470 + ports: 2 + vendor ID: 0x2c9 + device ID: 0x5a44 + HW revision: 0xa1 + FW revision: 0x300020080 +*/ +typedef struct _sim_ca_info { + char name[32]; + char provider[32]; + uint64_t guid; + uint8_t num_ports; + uint32_t vend_id; + uint16_t dev_id; + uint16_t rev_id; + uint64_t fw_rev; +} sim_ca_info_t; + +/********************************************************************** + * Parse the CA Info file available in ibmgtSimDir/caN/info + **********************************************************************/ +static ib_api_status_t +__parse_ca_info_file(IN osm_vendor_t * const p_vend, + IN uint32_t idx, OUT sim_ca_info_t * sim_ca_info) +{ + ib_api_status_t status = IB_ERROR; + int info_file; + char file_name[256]; + char file_buffer[3200]; + char *p_ch; + int g1, g2, g3, g4; + int num_ports; + uint32_t len; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_ca_info_file: " "Querying CA %d.\n", idx); + + /* we use the proc file system so we must be able to open the info file .. */ + sprintf(file_name, "%s/ca%d/info", __get_simulator_dir(), idx); + info_file = open(file_name, O_RDONLY); + if (!info_file) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5105: " + "Fail to open HCA:%d info file:(%s).\n", idx, + file_name); + goto Exit; + } + + /* read in the file */ + len = read(info_file, file_buffer, 3200); + close(info_file); + file_buffer[len] = '\0'; + + /* + parse the file ... + name: InfiniHost0 + provider: tavor + node GUID: 0002:c900:0120:3470 + ports: 2 + vendor ID: 0x2c9 + device ID: 0x5a44 + HW revision: 0xa1 + FW revision: 0x300020080 + */ + if (!(p_ch = strstr(file_buffer, "name:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5106: " + "Fail to obtain HCA name. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "name: %s", sim_ca_info->name) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5107: " + "Fail to parse name in info file:(%s).\n", p_ch); + goto Exit; + } + + /* get the guid of the HCA */ + if (!(p_ch = strstr(file_buffer, "node GUID:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5108: " + "Fail to obtain GUID in info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "node GUID: %x:%x:%x:%x", &g1, &g2, &g3, &g4) != 4) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5109: " + "Fail to parse GUID in info file:(%s).\n", p_ch); + goto Exit; + } + sim_ca_info->guid = (uint64_t) g1 << 48 | (uint64_t) g1 << 32 + | (uint64_t) g1 << 16 | (uint64_t) g3; + + /* obtain number of ports */ + if (!(p_ch = strstr(file_buffer, "ports:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5110: " + "Fail to obtain number of ports in info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "ports: %d", &num_ports) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_ca_info_file: ERR 5111: " + "Fail to parse num ports in info file:(%s).\n", p_ch); + goto Exit; + } + sim_ca_info->num_ports = num_ports; + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_ca_info_file: " + "CA1 = name:%s guid:0x%" PRIx64 " ports:%d\n", + sim_ca_info->name, sim_ca_info->guid, sim_ca_info->num_ports); + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return status; +} + +/* + state: ACTIVE + LID: 0x0001 + LMC: 0x0000 + SM LID: 0x0001 + SM SL: 0x0000 + Capabilities: IsSM + IsTrapSupported + IsAutomaticMigrationSupported + IsSLMappingSupported + IsLEDInfoSupported + IsSystemImageGUIDSupported + IsVendorClassSupported + IsCapabilityMaskNoticeSupported +*/ +typedef struct _sim_port_info { + uint8_t state; + uint16_t lid; + uint8_t lmc; + uint16_t sm_lid; + uint8_t sm_sl; +} sim_port_info_t; + +/********************************************************************** + * Parse the Port Info file available in ibmgtSimDir/caN/portM/info + * Port num is 1..N + **********************************************************************/ +static ib_api_status_t +__parse_port_info_file(IN osm_vendor_t * const p_vend, + IN uint32_t hca_idx, + IN uint8_t port_num, OUT sim_port_info_t * sim_port_info) +{ + ib_api_status_t status = IB_ERROR; + int info_file; + char file_name[256]; + char file_buffer[3200]; + char state[12]; + char *p_ch; + int lid, sm_lid, lmc, sm_sl; + uint32_t len; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_port_info_file: " + "Parsing Proc File System Port Info CA %d Port %d.\n", hca_idx, + port_num); + + /* we use the proc file system so we must be able to open the info file .. */ + sprintf(file_name, "%s/ca%d/port%d/info", __get_simulator_dir(), + hca_idx, port_num); + info_file = open(file_name, O_RDONLY); + if (!info_file) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5112: " + "Fail to open HCA:%d Port:%d info file:(%s).\n", + hca_idx, port_num, file_name); + goto Exit; + } + + /* read in the file */ + len = read(info_file, file_buffer, 3200); + close(info_file); + file_buffer[len] = '\0'; + + /* + parse the file ... + state: ACTIVE + LID: 0x0001 + LMC: 0x0000 + SM LID: 0x0001 + SM SL: 0x0000 + ... + */ + if (!(p_ch = strstr(file_buffer, "state:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5113: " + "Fail to obtain port state. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "state: %s", state) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5114: " + "Fail to parse state from info file:(%s).\n", p_ch); + goto Exit; + } + + if (!strcmp(state, "ACTIVE")) + sim_port_info->state = IB_LINK_ACTIVE; + else if (!strcmp(state, "DOWN")) + sim_port_info->state = IB_LINK_DOWN; + else if (!strcmp(state, "INIT")) + sim_port_info->state = IB_LINK_INIT; + else if (!strcmp(state, "ARMED")) + sim_port_info->state = IB_LINK_ARMED; + else + sim_port_info->state = 0; + + /* get lid */ + if (!(p_ch = strstr(file_buffer, "LID:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5115: " + "Fail to obtain port lid. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "LID: %x", &lid) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5116: " + "Fail to parse lid from info file:(%s).\n", p_ch); + goto Exit; + } + sim_port_info->lid = lid; + /* get LMC */ + if (!(p_ch = strstr(file_buffer, "LMC:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5117: " + "Fail to obtain port LMC. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "LMC: %x", &lmc) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5118: " + "Fail to parse LMC from info file:(%s).\n", p_ch); + goto Exit; + } + sim_port_info->lmc = lmc; + + /* get SM LID */ + if (!(p_ch = strstr(file_buffer, "SM LID:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5119: " + "Fail to obtain port SM LID. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "SM LID: %x", &sm_lid) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5120: " + "Fail to parse SM LID from info file:(%s).\n", p_ch); + goto Exit; + } + sim_port_info->sm_lid = sm_lid; + + /* get SM LID */ + if (!(p_ch = strstr(file_buffer, "SM SL:"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5121: " + "Fail to obtain port SM SL. In info file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch, "SM SL: %x", &sm_sl) != 1) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__parse_port_info_file: ERR 5122: " + "Fail to parse SM SL from info file:(%s).\n", p_ch); + goto Exit; + } + sim_port_info->sm_sl = sm_sl; + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__parse_port_info_file: " + "Obtained Port:%d = state:%d, lid:0x%04X, lmc:%d, sm_lid:0x%04X, sm_sl:%d\n", + port_num, sim_port_info->state, sim_port_info->lid, + sim_port_info->lmc, sim_port_info->sm_lid, + sim_port_info->sm_sl); + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return status; +} + +/********************************************************************** + * Parse the port guid_tbl file to obtain the port guid. + * File format is: + * [ 0] fe80:0000:0000:0000:0002:c900:0120:3472 + **********************************************************************/ +static ib_api_status_t +__get_port_guid_from_port_gid_tbl(IN osm_vendor_t * const p_vend, + IN uint32_t hca_idx, + IN uint8_t port_num, OUT uint64_t * port_guid) +{ + ib_api_status_t status = IB_ERROR; + int info_file; + char file_name[256]; + char file_buffer[3200]; + char *p_ch; + int g[8]; + uint32_t len; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__get_port_guid_from_port_gid_tbl: " + "Parsing Proc File System Port Guid Table CA %d Port %d.\n", + hca_idx, port_num); + + /* we use the proc file system so we must be able to open the info file .. */ + sprintf(file_name, "%s/ca%d/port%d/gid_table", + __get_simulator_dir(), hca_idx, port_num); + info_file = open(file_name, O_RDONLY); + if (!info_file) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__get_port_guid_from_port_gid_tbl: ERR 5123: " + "Fail to open HCA:%d Port:%d gid_table file:(%s).\n", + hca_idx, port_num, file_name); + goto Exit; + } + + /* read in the file */ + len = read(info_file, file_buffer, 3200); + close(info_file); + file_buffer[len] = '\0'; + + /* + parse the file ... + [ 0] fe80:0000:0000:0000:0002:c900:0120:3472 + ... + */ + if (!(p_ch = strstr(file_buffer, "[ 0]"))) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__get_port_guid_from_port_gid_tbl: ERR 5124: " + "Fail to obtain first gid index. In gid_table file:(%s).\n", + file_buffer); + goto Exit; + } + if (sscanf(p_ch + 6, "%x:%x:%x:%x:%x:%x:%x:%x", + &g[7], &g[6], &g[5], &g[4], &g[3], &g[2], &g[1], &g[0]) != 8) + { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__get_port_guid_from_port_gid_tbl: ERR 5125: " + "Fail to parse gid from gid_table file:(%s).\n", p_ch); + goto Exit; + } + + *port_guid = + (uint64_t) g[3] << 48 | (uint64_t) g[2] << 32 | (uint64_t) g[1] << + 16 | g[0]; + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return status; +} + +/********************************************************************** + * Initialize an Info Struct for the Given HCA by its index 1..N + **********************************************************************/ +static ib_api_status_t +__osm_ca_info_init(IN osm_vendor_t * const p_vend, + IN uint32_t const idx, OUT osm_ca_info_t * const p_ca_info) +{ + ib_api_status_t status = IB_ERROR; + uint8_t port_num; + uint64_t port_guid; + + sim_ca_info_t sim_ca_info; + + OSM_LOG_ENTER(p_vend->p_log); + + /* parse the CA info file */ + if (__parse_ca_info_file(p_vend, idx, &sim_ca_info) != IB_SUCCESS) + goto Exit; + + p_ca_info->guid = cl_hton64(sim_ca_info.guid); + + /* set size of attributes and allocate them */ + p_ca_info->attr_size = 1; + p_ca_info->p_attr = (ib_ca_attr_t *) malloc(sizeof(ib_ca_attr_t)); + + p_ca_info->p_attr->ca_guid = p_ca_info->guid; + p_ca_info->p_attr->num_ports = sim_ca_info.num_ports; + + /* now obtain the attributes of the ports */ + p_ca_info->p_attr->p_port_attr = + (ib_port_attr_t *) malloc(sim_ca_info.num_ports * + sizeof(ib_port_attr_t)); + + /* get all the ports info */ + for (port_num = 1; port_num <= sim_ca_info.num_ports; port_num++) { + sim_port_info_t sim_port_info; + /* query the port attributes */ + if (__parse_port_info_file + (p_vend, idx, port_num, &sim_port_info)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 5126: " + "Fail to get HCA:%d Port:%d Attributes.\n", idx, + port_num); + goto Exit; + } + + /* HACK: the lids should have been converted to network but the rest of the code + is wrong and provdes them as is (host order) - so we stick with it. */ + p_ca_info->p_attr->p_port_attr[port_num - 1].lid = + sim_port_info.lid; + p_ca_info->p_attr->p_port_attr[port_num - 1].link_state = + sim_port_info.state; + p_ca_info->p_attr->p_port_attr[port_num - 1].sm_lid = + sim_port_info.sm_lid; + + /* get the port guid */ + if (__get_port_guid_from_port_gid_tbl + (p_vend, idx, port_num, &port_guid)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 5127: " + "Fail to get HCA:%d Port:%d Guid.\n", idx, + port_num); + goto Exit; + } + p_ca_info->p_attr->p_port_attr[port_num - 1].port_guid = + cl_hton64(port_guid); + } + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void +osm_ca_info_destroy(IN osm_vendor_t * const p_vend, + IN osm_ca_info_t * const p_ca_info, IN uint8_t num_ca) +{ + osm_ca_info_t *p_ca; + uint8_t i; + + OSM_LOG_ENTER(p_vend->p_log); + + for (i = 0; i < num_ca; i++) { + p_ca = &p_ca_info[i]; + + if (NULL != p_ca->p_attr) { + if (0 != p_ca->p_attr->num_ports) { + free(p_ca->p_attr->p_port_attr); + } + + free(p_ca->p_attr); + } + } + + free(p_ca_info); + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** + * Fill in the array of port_attr with all available ports on ALL the + * avilable CAs on this machine. + **********************************************************************/ +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports) +{ + ib_api_status_t status = IB_SUCCESS; + + uint32_t caIdx; + uint32_t ca_count = 0; + uint32_t port_count = 0; + uint8_t port_num; + uint32_t total_ports = 0; + osm_ca_info_t *p_ca_infos = NULL; + uint32_t attr_array_sz = *p_num_ports; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* determine the number of CA's */ + ca_count = __hca_sim_get_num_cas(); + if (!ca_count) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 5128: " + "Fail to get Any CA Ids.\n"); + goto Exit; + } + + /* Allocate an array big enough to hold the ca info objects */ + p_ca_infos = malloc(ca_count * sizeof(osm_ca_info_t)); + if (p_ca_infos == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 5129: " + "Unable to allocate CA information array.\n"); + goto Exit; + } + + memset(p_ca_infos, 0, ca_count * sizeof(osm_ca_info_t)); + + /* + * For each CA, retrieve the CA info attributes + */ + for (caIdx = 1; caIdx <= ca_count; caIdx++) { + status = + __osm_ca_info_init(p_vend, caIdx, &p_ca_infos[caIdx - 1]); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 5130: " + "Unable to initialize CA Info object (%s).\n", + ib_get_err_str(status)); + goto Exit; + } + total_ports += p_ca_infos[caIdx - 1].p_attr->num_ports; + } + + *p_num_ports = total_ports; + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get_all_port_attr: total ports:%u \n", total_ports); + + /* + * If the user supplied enough storage, return the port guids, + * otherwise, return the appropriate error. + */ + if (attr_array_sz >= total_ports) { + for (caIdx = 1; caIdx <= ca_count; caIdx++) { + uint32_t num_ports; + + num_ports = p_ca_infos[caIdx - 1].p_attr->num_ports; + + for (port_num = 0; port_num < num_ports; port_num++) { + p_attr_array[port_count] = + *__osm_ca_info_get_port_attr_ptr(&p_ca_infos + [caIdx - + 1], + port_num); + port_count++; + } + } + } else { + status = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + if (p_ca_infos) { + osm_ca_info_destroy(p_vend, p_ca_infos, ca_count); + } + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Given the vendor obj and a port guid + * return the ca id and port number that have that guid + **********************************************************************/ + +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, + IN ib_net64_t const guid, + OUT uint32_t * p_hca_hndl, + OUT char *p_hca_id, + OUT uint8_t * p_hca_idx, + OUT uint32_t * p_port_num) +{ + uint32_t caIdx; + uint32_t ca_count = 0; + uint8_t port_num; + ib_api_status_t status = IB_ERROR; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* determine the number of CA's */ + ca_count = __hca_sim_get_num_cas(); + if (!ca_count) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 5131: " + "Fail to get Any CA Ids.\n"); + goto Exit; + } + + /* + * For each CA, retrieve the CA info attributes + */ + for (caIdx = 1; caIdx <= ca_count; caIdx++) { + sim_ca_info_t sim_ca_info; + if (__parse_ca_info_file(p_vend, caIdx, &sim_ca_info) == + IB_SUCCESS) { + /* get all the ports info */ + for (port_num = 1; port_num <= sim_ca_info.num_ports; + port_num++) { + uint64_t port_guid; + if (!__get_port_guid_from_port_gid_tbl + (p_vend, caIdx, port_num, &port_guid)) { + if (cl_hton64(port_guid) == guid) { + osm_log(p_vend->p_log, + OSM_LOG_DEBUG, + "osm_vendor_get_guid_ca_and_port: " + "Found Matching guid on HCA:%d Port:%d.\n", + caIdx, port_num); + strcpy(p_hca_id, + sim_ca_info.name); + *p_port_num = port_num; + *p_hca_idx = caIdx - 1; + *p_hca_hndl = 0; + status = IB_SUCCESS; + goto Exit; + } + } + } + } + } + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 5132: " + "Fail to find HCA and Port for Port Guid 0x%" PRIx64 "\n", + cl_ntoh64(guid)); + status = IB_INVALID_GUID; + +Exit: + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Given the vendor obj HCA ID and Port Num + * update the given port guid if found. Return 0 on success. + **********************************************************************/ + +ib_api_status_t +osm_vendor_get_guid_by_ca_and_port(IN osm_vendor_t * const p_vend, + IN char *hca_id, + IN uint32_t port_num, + OUT uint64_t * p_port_guid) +{ + uint32_t caIdx; + uint32_t ca_count = 0; + ib_api_status_t status = IB_ERROR; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* determine the number of CA's */ + ca_count = __hca_sim_get_num_cas(); + if (!ca_count) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_by_ca_and_port: ERR 5133: " + "Fail to get Any CA Ids.\n"); + goto Exit; + } + + /* + * For each CA, retrieve the CA info attributes + */ + for (caIdx = 1; caIdx <= ca_count; caIdx++) { + sim_ca_info_t sim_ca_info; + if (__parse_ca_info_file(p_vend, caIdx, &sim_ca_info) == + IB_SUCCESS) { + /* if not identical by id - go to next one */ + if (strcmp(sim_ca_info.name, hca_id)) + continue; + + if ((port_num < 1) + || (port_num > sim_ca_info.num_ports)) { + return 1; + } + + if (!__get_port_guid_from_port_gid_tbl + (p_vend, caIdx, port_num, p_port_guid)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get_guid_by_ca_and_port: " + "Found Matching guid on HCA:%d Port:%d.\n", + caIdx, port_num); + status = IB_SUCCESS; + goto Exit; + } + } + } + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_by_ca_and_port: ERR 5134: " + "Fail to find HCA:%s\n", hca_id); + status = IB_INVALID_GUID; + +Exit: + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +#endif diff --git a/libvendor/osm_vendor_mlx_ibmgt.c b/libvendor/osm_vendor_mlx_ibmgt.c new file mode 100644 index 0000000..9df6624 --- /dev/null +++ b/libvendor/osm_vendor_mlx_ibmgt.c @@ -0,0 +1,783 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* AUTHOR Edward Bortnikov + * + * DESCRIPTION + * The lower-level MAD transport interface implementation + * that allows sending a single MAD/receiving a callback + * when a single MAD is received. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +typedef struct _osmv_IBMGT_transport_mgr_ { + IB_MGT_mad_type_t mad_type; + uint8_t mgmt_class; /* for gsi */ + /* for communication between send call back and send mad */ + boolean_t is_send_ok; + cl_event_t send_done; +} osmv_IBMGT_transport_mgr_t; + +typedef struct _osmv_IBMGT_transport_info_ { + IB_MGT_mad_hndl_t smi_h; + cl_qlist_t *p_smi_list; + + IB_MGT_mad_hndl_t gsi_h; + /* holds bind object list for every binded mgmt class */ + cl_qlist_t *gsi_mgmt_lists[15]; +} osmv_IBMGT_transport_info_t; + +static void +__osmv_IBMGT_rcv_desc_to_osm_addr(IN IB_MGT_mad_rcv_desc_t * p_rcv_desc, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr); + +static void +__osmv_IBMGT_osm_addr_to_ibmgt_addr(IN const osm_mad_addr_t * p_mad_addr, + IN uint8_t is_smi, OUT IB_ud_av_t * p_av); + +void +__osmv_IBMGT_send_cb(IN IB_MGT_mad_hndl_t mad_hndl, + IN u_int64_t wrid, + IN IB_comp_status_t status, IN void *private_ctx_p); + +void +__osmv_IBMGT_rcv_cb(IN IB_MGT_mad_hndl_t mad_hndl, + IN void *private_ctx_p, + IN void *payload_p, + IN IB_MGT_mad_rcv_desc_t * rcv_remote_info_p); + +/* + * NAME + * osmv_transport_init + * + * DESCRIPTION + * Setup the MAD transport infrastructure (filters, callbacks etc). + */ + +ib_api_status_t +osmv_transport_init(IN osm_bind_info_t * p_info, + IN char hca_id[VENDOR_HCA_MAXNAMES], + IN uint8_t hca_idx, IN osmv_bind_obj_t * p_bo) +{ + ib_api_status_t st = IB_SUCCESS; + IB_MGT_ret_t ret; + IB_MGT_mad_type_t mad_type; + osmv_IBMGT_transport_mgr_t *p_mgr; + osmv_IBMGT_transport_info_t *p_tpot_info; + cl_list_obj_t *p_obj = NULL; + osm_log_t *p_log = p_bo->p_vendor->p_log; + int i; + + UNUSED_PARAM(hca_idx); + + /* if first bind, allocate tranport_info at vendor */ + if (NULL == p_bo->p_vendor->p_transport_info) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_init: first bind() for the vendor\n"); + p_bo->p_vendor->p_transport_info + = (osmv_IBMGT_transport_info_t *) + malloc(sizeof(osmv_IBMGT_transport_info_t)); + if (NULL == p_bo->p_vendor->p_transport_info) { + return IB_INSUFFICIENT_MEMORY; + } + memset(p_bo->p_vendor->p_transport_info, 0, + sizeof(osmv_IBMGT_transport_info_t)); + p_tpot_info = + (osmv_IBMGT_transport_info_t *) (p_bo->p_vendor-> + p_transport_info); + + p_tpot_info->smi_h = 0xffffffff; + p_tpot_info->p_smi_list = NULL; + + p_tpot_info->gsi_h = 0xffffffff; + for (i = 0; i < 15; i++) { + + p_tpot_info->gsi_mgmt_lists[i] = NULL; + } + + } else { + + p_tpot_info = + (osmv_IBMGT_transport_info_t *) (p_bo->p_vendor-> + p_transport_info); + } + + /* Initialize the magic_ptr to the pointer of the p_bo info. + This will be used to signal when the object is being destroyed, so no + real action will be done then. */ + p_bo->magic_ptr = p_bo; + + /* allocate transport mgr */ + p_mgr = malloc(sizeof(osmv_IBMGT_transport_mgr_t)); + if (NULL == p_mgr) { + free(p_tpot_info); + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7201: " "alloc failed \n"); + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_mgr, 0, sizeof(osmv_IBMGT_transport_mgr_t)); + + p_bo->p_transp_mgr = p_mgr; + + switch (p_info->mad_class) { + case IB_MCLASS_SUBN_LID: + case IB_MCLASS_SUBN_DIR: + mad_type = IB_MGT_SMI; + break; + + case IB_MCLASS_SUBN_ADM: + default: + mad_type = IB_MGT_GSI; + break; + } + + /* we only support one class registration per SMI/GSI !!! */ + switch (mad_type) { + case IB_MGT_SMI: + /* we do not need to bind the handle if already available */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_init: SMI bind\n"); + + if (p_tpot_info->smi_h == 0xffffffff) { + ret = IB_MGT_get_handle(hca_id, + p_bo->port_num, + IB_MGT_SMI, + &(p_tpot_info->smi_h)); + if (IB_MGT_OK != ret) { + osm_log(p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7202: " + "IB_MGT_get_handle for smi failed \n"); + st = IB_ERROR; + free(p_mgr); + goto Exit; + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_init: got smi handle:%d \n", + p_tpot_info->smi_h); + + ret = IB_MGT_bind_sm(p_tpot_info->smi_h); + if (IB_MGT_OK != ret) { + osm_log(p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7203: " + "IB_MGT_bind_sm failed \n"); + st = IB_ERROR; + free(p_mgr); + goto Exit; + } + + /* init smi list */ + p_tpot_info->p_smi_list = malloc(sizeof(cl_qlist_t)); + if (NULL == p_tpot_info->p_smi_list) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7204: " + "alloc failed \n"); + IB_MGT_unbind_sm(p_tpot_info->smi_h); + IB_MGT_release_handle(p_tpot_info->smi_h); + free(p_mgr); + return IB_INSUFFICIENT_MEMORY; + } + memset(p_tpot_info->p_smi_list, 0, sizeof(cl_qlist_t)); + cl_qlist_init(p_tpot_info->p_smi_list); + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_init: before reg_cb\n"); + ret = IB_MGT_reg_cb(p_tpot_info->smi_h, + &__osmv_IBMGT_rcv_cb, + p_bo, + &__osmv_IBMGT_send_cb, + p_tpot_info->p_smi_list, + IB_MGT_RCV_CB_MASK | + IB_MGT_SEND_CB_MASK); + if (ret != IB_SUCCESS) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7205: " + "reg_cb failed with return code:%x \n", + ret); + IB_MGT_unbind_sm(p_tpot_info->smi_h); + IB_MGT_release_handle(p_tpot_info->smi_h); + free(p_tpot_info->p_smi_list); + free(p_mgr); + st = IB_ERROR; + goto Exit; + } + + } + /* insert to list of smi's - for raising callbacks later on */ + p_obj = malloc(sizeof(cl_list_obj_t)); + if (p_obj) + memset(p_obj, 0, sizeof(cl_list_obj_t)); + cl_qlist_set_obj(p_obj, p_bo); + cl_qlist_insert_tail(p_tpot_info->p_smi_list, + &p_obj->list_item); + + break; + + case IB_MGT_GSI: + /* we do not need to bind the handle if already available */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_init: ERR 7206: GSI bind\n"); + if (p_tpot_info->gsi_h == 0xffffffff) { + ret = IB_MGT_get_handle(hca_id, + p_bo->port_num, + IB_MGT_GSI, + &(p_tpot_info->gsi_h)); + if (IB_MGT_OK != ret) { + osm_log(p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7207: " + "IB_MGT_get_handle for gsi failed \n"); + st = IB_ERROR; + free(p_mgr); + goto Exit; + } + } + + /* this mgmt class was not binded yet */ + if (p_tpot_info->gsi_mgmt_lists[p_info->mad_class] == NULL) { + ret = + IB_MGT_bind_gsi_class(p_tpot_info->gsi_h, + p_info->mad_class); + if (IB_MGT_OK != ret) { + osm_log(p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7208: " + "IB_MGT_bind_gsi_class failed \n"); + st = IB_ERROR; + free(p_mgr); + goto Exit; + } + + p_tpot_info->gsi_mgmt_lists[p_info->mad_class] = + malloc(sizeof(cl_qlist_t)); + if (NULL == + p_tpot_info->gsi_mgmt_lists[p_info->mad_class]) { + IB_MGT_unbind_gsi_class(p_tpot_info->gsi_h, + p_info->mad_class); + free(p_mgr); + return IB_INSUFFICIENT_MEMORY; + } + memset(p_tpot_info->gsi_mgmt_lists[p_info->mad_class], + 0, sizeof(cl_qlist_t)); + cl_qlist_init(p_tpot_info-> + gsi_mgmt_lists[p_info->mad_class]); + } + /* insert to list of smi's - for raising callbacks later on */ + p_obj = malloc(sizeof(cl_list_obj_t)); + if (p_obj) + memset(p_obj, 0, sizeof(cl_list_obj_t)); + cl_qlist_set_obj(p_obj, p_bo); + cl_qlist_insert_tail(p_tpot_info-> + gsi_mgmt_lists[p_info->mad_class], + &p_obj->list_item); + + p_mgr->mgmt_class = p_info->mad_class; + ret = IB_MGT_reg_cb(p_tpot_info->gsi_h, + &__osmv_IBMGT_rcv_cb, + p_bo, + &__osmv_IBMGT_send_cb, + p_bo, + IB_MGT_RCV_CB_MASK | IB_MGT_SEND_CB_MASK); + + if (ret != IB_SUCCESS) { + IB_MGT_unbind_gsi_class(p_tpot_info->gsi_h, + p_mgr->mgmt_class); + free(p_tpot_info->gsi_mgmt_lists[p_mgr->mgmt_class]); + free(p_mgr); + st = IB_ERROR; + goto Exit; + } + + break; + + default: + osm_log(p_log, OSM_LOG_ERROR, + "osmv_transport_init: ERR 7209: unrecognized mgmt class \n"); + st = IB_ERROR; + free(p_mgr); + goto Exit; + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_init: GSI bind\n"); + cl_event_construct(&p_mgr->send_done); + cl_event_init(&p_mgr->send_done, TRUE); + p_mgr->is_send_ok = FALSE; + p_mgr->mad_type = mad_type; + +Exit: + /* OSM_LOG_EXIT(p_log ); */ + return (ib_api_status_t) st; +} + +/* + * NAME + * osmv_transport_send_mad + * + * DESCRIPTION + * Send a single MAD (256 byte) + */ + +ib_api_status_t +osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, + IN void *p_ib_mad, IN const osm_mad_addr_t * p_mad_addr) +{ + + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osmv_IBMGT_transport_info_t *p_tpot_info = + (osmv_IBMGT_transport_info_t *) (p_bo->p_vendor->p_transport_info); + osm_vendor_t const *p_vend = p_bo->p_vendor; + ib_api_status_t status; + IB_ud_av_t av; + IB_MGT_ret_t ret; + ib_mad_t *p_mad = p_ib_mad; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_bo->p_vendor->p_transport_info); + + /* + * For all sends other than directed route SM MADs, + * acquire an address vector for the destination. + */ + if (p_mad->mgmt_class != IB_MCLASS_SUBN_DIR) { + __osmv_IBMGT_osm_addr_to_ibmgt_addr(p_mad_addr, + p_mad->mgmt_class == + IB_MCLASS_SUBN_LID, &av); + } else { + /* is a directed route - we need to construct a permissive address */ + memset(&av, 0, sizeof(av)); + /* we do not need port number since it is part of the mad_hndl */ + av.dlid = IB_LID_PERMISSIVE; + } + + /* send it */ + if ((p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) || + (p_mad->mgmt_class == IB_MCLASS_SUBN_LID)) { + + /* SMI CASE */ + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osmv_transport_mad_send: " + "av.dlid:0x%X, " + "av.static_rate:%d, " + "av.path_bits:%d.\n", + cl_ntoh16(av.dlid), av.static_rate, + av.src_path_bits); + } + + ret = IB_MGT_send_mad(p_tpot_info->smi_h, p_mad, /* actual payload */ + &av, /* address vector */ + (u_int64_t) CAST_P2LONG(p_bo), + IB_MGT_DEFAULT_SEND_TIME); + } else { + /* GSI CASE - Support Remote QP */ + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osmv_transport_mad_send: " + "av.dlid:0x%X, av.static_rate:%d, av.path_bits:%d, remote qp:%d \n", + cl_ntoh16(av.dlid), av.static_rate, + av.src_path_bits, + cl_ntoh32(p_mad_addr->addr_type.gsi.remote_qp) + ); + } + + ret = IB_MGT_send_mad_to_qp(p_tpot_info->gsi_h, p_mad, /* actual payload */ + &av, /* address vector */ + (u_int64_t) CAST_P2LONG(p_bo), + IB_MGT_DEFAULT_SEND_TIME, + cl_ntoh32(p_mad_addr->addr_type.gsi. + remote_qp)); + + } + + status = IB_SUCCESS; + if (ret != IB_MGT_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osmv_transport_mad_send: ERR 7210: " + "Error sending mad (%d).\n", ret); + status = IB_ERROR; + } else { + osmv_IBMGT_transport_mgr_t *p_mgr = + (osmv_IBMGT_transport_mgr_t *) (p_bo->p_transp_mgr); + + /* Let the others work when I am sleeping ... */ + osmv_txn_unlock(p_bo); + + cl_event_wait_on(&(p_mgr->send_done), 0xffffffff, TRUE); + + /* Re-acquire the lock */ + osmv_txn_lock(p_bo); + + if (TRUE == p_bo->is_closing) { + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osmv_transport_mad_send: ERR 7211: " + "The handle %p is being unbound, cannot send.\n", + h_bind); + status = IB_ERROR; + } + + if (p_mgr->is_send_ok == FALSE) { + status = IB_ERROR; + } + } + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void osmv_transport_done(IN const osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_log_t *p_log = p_bo->p_vendor->p_log; + osmv_IBMGT_transport_mgr_t *p_mgr; + osmv_IBMGT_transport_info_t *p_tpot_info; + IB_MGT_ret_t ret; + cl_list_obj_t *p_obj = NULL; + cl_list_item_t *p_item, *p_item_tmp; + int i; + cl_qlist_t *p_list = NULL; + + OSM_LOG_ENTER(p_log); + + CL_ASSERT(p_bo); + + /* First of all - zero out the magic_ptr, so if a callback is called - + it'll know that we are currently closing down, and will not handle the + mad. */ + p_bo->magic_ptr = 0; + + p_mgr = (osmv_IBMGT_transport_mgr_t *) (p_bo->p_transp_mgr); + p_tpot_info = + (osmv_IBMGT_transport_info_t *) (p_bo->p_vendor->p_transport_info); + + switch (p_mgr->mad_type) { + case IB_MGT_SMI: + p_list = p_tpot_info->p_smi_list; + + /* remove from the bindings list */ + p_item = cl_qlist_head(p_list); + while (p_item != cl_qlist_end(p_list)) { + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + if (cl_qlist_obj(p_obj) == h_bind) { + break; + } + p_item_tmp = cl_qlist_next(p_item); + p_item = p_item_tmp; + } + + CL_ASSERT(p_item != cl_qlist_end(p_list)); + cl_qlist_remove_item(p_list, p_item); + if (p_obj) + free(p_obj); + + /* no one is binded to smi anymore - we can free the list, unbind & realease the hndl */ + if (cl_is_qlist_empty(p_list) == TRUE) { + free(p_list); + p_list = NULL; + + ret = IB_MGT_unbind_sm(p_tpot_info->smi_h); + if (ret != IB_MGT_OK) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_transport_done: ERR 7212: " + "Failed to unbind sm\n"); + } + + ret = IB_MGT_release_handle(p_tpot_info->smi_h); + if (ret != IB_MGT_OK) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_transport_done: ERR 7213: " + "Failed to release smi handle\n"); + } + p_tpot_info->smi_h = 0xffffffff; + } + break; + + case IB_MGT_GSI: + p_list = p_tpot_info->gsi_mgmt_lists[p_mgr->mgmt_class]; + /* remove from the bindings list */ + p_item = cl_qlist_head(p_list); + while (p_item != cl_qlist_end(p_list)) { + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + if (cl_qlist_obj(p_obj) == h_bind) { + break; + } + p_item_tmp = cl_qlist_next(p_item); + p_item = p_item_tmp; + } + + CL_ASSERT(p_item != cl_qlist_end(p_list)); + cl_qlist_remove_item(p_list, p_item); + if (p_obj) + free(p_obj); + + /* no one is binded to this class anymore - we can free the list and unbind this class */ + if (cl_is_qlist_empty(p_list) == TRUE) { + free(p_list); + p_list = NULL; + + ret = + IB_MGT_unbind_gsi_class(p_tpot_info->gsi_h, + p_mgr->mgmt_class); + if (ret != IB_MGT_OK) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_transport_done: ERR 7214: " + "Failed to unbind gsi class\n"); + } + } + + /* all the mgmt classes are unbinded - release gsi handle */ + for (i = 0; i < 15; i++) { + if (p_tpot_info->gsi_mgmt_lists[i] != NULL) { + break; + } + } + + if (i == 15) { + ret = IB_MGT_release_handle(p_tpot_info->gsi_h); + if (ret != IB_MGT_OK) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_transport_done: ERR 7215: " + "Failed to release gsi handle\n"); + } + p_tpot_info->gsi_h = 0xffffffff; + } + } /* end switch */ + + free(p_mgr); +} + +/********************************************************************** + * IB_MGT Receive callback : invoked after each receive + **********************************************************************/ +void +__osmv_IBMGT_rcv_cb(IN IB_MGT_mad_hndl_t mad_hndl, + IN void *private_ctx_p, + IN void *payload_p, + IN IB_MGT_mad_rcv_desc_t * rcv_remote_info_p) +{ + osmv_bind_obj_t *p_bo; + osm_mad_addr_t mad_addr; + cl_list_item_t *p_item; + cl_list_obj_t *p_obj; + cl_qlist_t *p_list; + ib_mad_t *p_mad = (ib_mad_t *) payload_p; + osm_vendor_t *p_vendor; + osmv_IBMGT_transport_info_t *p_tinfo; + + __osmv_IBMGT_rcv_desc_to_osm_addr(rcv_remote_info_p, + ((p_mad->mgmt_class == + IB_MCLASS_SUBN_LID) + || (p_mad->mgmt_class == + IB_MCLASS_SUBN_DIR)), &mad_addr); + + /* different handling of SMI and GSI */ + if ((p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) || + (p_mad->mgmt_class == IB_MCLASS_SUBN_LID)) { + /* SMI CASE */ + p_bo = (osmv_bind_obj_t *) private_ctx_p; + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + p_vendor = p_bo->p_vendor; + p_tinfo = + (osmv_IBMGT_transport_info_t *) p_vendor->p_transport_info; + p_list = p_tinfo->p_smi_list; + } else { + /* GSI CASE */ + p_bo = (osmv_bind_obj_t *) private_ctx_p; + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + p_vendor = p_bo->p_vendor; + p_tinfo = + (osmv_IBMGT_transport_info_t *) p_vendor->p_transport_info; + p_list = p_tinfo->gsi_mgmt_lists[p_mad->mgmt_class]; + } + + /* go over the bindings list and send the mad, one of them will accept it, + the others will drope + */ + p_item = cl_qlist_head(p_list); + while (p_item != cl_qlist_end(p_list)) { + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + p_bo = cl_qlist_obj(p_obj); + /* give upper layer the mad */ + osmv_dispatch_mad((osm_bind_handle_t) p_bo, payload_p, + &mad_addr); + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + p_item = cl_qlist_next(p_item); + } +} + +/********************************************************************** + * IB_MGT Send callback : invoked after each send + **********************************************************************/ +void +__osmv_IBMGT_send_cb(IN IB_MGT_mad_hndl_t mad_hndl, + IN u_int64_t wrid, + IN IB_comp_status_t status, IN void *private_ctx_p) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) CAST_P2LONG(wrid); + + osmv_IBMGT_transport_mgr_t *p_mgr = + (osmv_IBMGT_transport_mgr_t *) p_bo->p_transp_mgr; + + /* Make sure the p_bo object is still relevant */ + if (p_bo->magic_ptr != p_bo) + return; + + /* we assume that each send on a bind object is synchronized, and no paralel sends + from diffrent threads with same object can be made */ + if (status == IB_COMP_SUCCESS) { + p_mgr->is_send_ok = TRUE; + } else + p_mgr->is_send_ok = FALSE; + cl_event_signal(&p_mgr->send_done); + +} + +/********************************************************************** + * IB_MGT to OSM ADDRESS VECTOR + **********************************************************************/ +static void +__osmv_IBMGT_rcv_desc_to_osm_addr(IN IB_MGT_mad_rcv_desc_t * p_rcv_desc, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr) +{ + /* p_mad_addr->dest_lid = p_osm->subn.sm_base_lid; - for resp we use the dest lid ... */ + p_mad_addr->dest_lid = cl_hton16(p_rcv_desc->remote_lid); + p_mad_addr->static_rate = 0; /* HACK - we do not know the rate ! */ + p_mad_addr->path_bits = p_rcv_desc->local_path_bits; + /* Clear the grh any way to avoid unset fields */ + memset(&p_mad_addr->addr_type.gsi.grh_info, 0, + sizeof(p_mad_addr->addr_type.gsi.grh_info)); + + if (is_smi) { + /* SMI */ + p_mad_addr->addr_type.smi.source_lid = + cl_hton16(p_rcv_desc->remote_lid); + p_mad_addr->addr_type.smi.port_num = 99; /* HACK - if used - should fail */ + } else { + /* GSI */ + /* seems to me there is a IBMGT bug reversing the QPN ... */ + /* Does IBMGT supposed to provide the QPN is network or HOST ? */ + p_mad_addr->addr_type.gsi.remote_qp = cl_hton32(p_rcv_desc->qp); + + p_mad_addr->addr_type.gsi.remote_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + /* we do have the p_mad_addr->pkey_ix but how to get the PKey by index ? */ + /* the only way seems to be to use VAPI_query_hca_pkey_tbl and obtain */ + /* the full PKey table - than go by the index. */ + /* since this does not seem reasonable to me I simply use the default */ + /* There is a TAVOR limitation that only one P_KEY is supported per */ + /* QP - so QP1 must use IB_DEFAULT_PKEY */ + p_mad_addr->addr_type.gsi.pkey_ix = 0; + p_mad_addr->addr_type.gsi.service_level = p_rcv_desc->sl; + + p_mad_addr->addr_type.gsi.global_route = p_rcv_desc->grh_flag; + /* copy the GRH data if relevant */ + if (p_mad_addr->addr_type.gsi.global_route) { + p_mad_addr->addr_type.gsi.grh_info.ver_class_flow = + ib_grh_set_ver_class_flow(p_rcv_desc->grh. + IP_version, + p_rcv_desc->grh. + traffic_class, + p_rcv_desc->grh. + flow_label); + p_mad_addr->addr_type.gsi.grh_info.hop_limit = + p_rcv_desc->grh.hop_limit; + memcpy(&p_mad_addr->addr_type.gsi.grh_info.src_gid.raw, + &p_rcv_desc->grh.sgid, sizeof(ib_net64_t)); + memcpy(&p_mad_addr->addr_type.gsi.grh_info.dest_gid.raw, + p_rcv_desc->grh.dgid, sizeof(ib_net64_t)); + } + } +} + +/********************************************************************** + * OSM ADDR VECTOR TO IB_MGT + **********************************************************************/ +void +__osmv_IBMGT_osm_addr_to_ibmgt_addr(IN const osm_mad_addr_t * p_mad_addr, + IN uint8_t is_smi, OUT IB_ud_av_t * p_av) +{ + + /* For global destination or Multicast address: */ + u_int8_t ver; + + memset(p_av, 0, sizeof(IB_ud_av_t)); + + p_av->src_path_bits = p_mad_addr->path_bits; + p_av->static_rate = p_mad_addr->static_rate; + p_av->dlid = cl_ntoh16(p_mad_addr->dest_lid); + + if (is_smi) { + p_av->sl = 0; /* Just to note we use 0 here. */ + } else { + p_av->sl = p_mad_addr->addr_type.gsi.service_level; + p_av->grh_flag = p_mad_addr->addr_type.gsi.global_route; + + if (p_mad_addr->addr_type.gsi.global_route) { + ib_grh_get_ver_class_flow(p_mad_addr->addr_type.gsi. + grh_info.ver_class_flow, &ver, + &p_av->traffic_class, + &p_av->flow_label); + p_av->hop_limit = + p_mad_addr->addr_type.gsi.grh_info.hop_limit; + p_av->sgid_index = 0; /* we always use source GID 0 */ + memcpy(&p_av->dgid, + &p_mad_addr->addr_type.gsi.grh_info.dest_gid.raw, + sizeof(ib_net64_t)); + + } + } +} diff --git a/libvendor/osm_vendor_mlx_rmpp_ctx.c b/libvendor/osm_vendor_mlx_rmpp_ctx.c new file mode 100644 index 0000000..f59620e --- /dev/null +++ b/libvendor/osm_vendor_mlx_rmpp_ctx.c @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +#include +#include + +ib_api_status_t +osmv_rmpp_send_ctx_init(osmv_rmpp_send_ctx_t * p_ctx, void *p_arbt_mad, + uint32_t mad_sz, osm_log_t * p_log) +{ + ib_api_status_t st = IB_SUCCESS; + cl_status_t cl_st; + + CL_ASSERT(p_ctx); + if (NULL == p_arbt_mad) { + return IB_INVALID_PARAMETER; + } + + if (osmv_mad_is_sa((ib_mad_t *) p_arbt_mad)) { + p_ctx->is_sa_mad = TRUE; + } else + p_ctx->is_sa_mad = FALSE; + + p_ctx->mad_sz = mad_sz; + + cl_event_construct(&p_ctx->event); + cl_st = cl_event_init(&p_ctx->event, FALSE); + if (cl_st != CL_SUCCESS) { + return IB_ERROR; + } + + st = osmv_rmpp_sar_init(&p_ctx->sar, p_arbt_mad, p_ctx->mad_sz, + p_ctx->is_sa_mad); + if (st == IB_SUCCESS) { + p_ctx->window_first = 1; + p_ctx->window_last = 1; + } + + p_ctx->p_log = p_log; + return st; +} + +void osmv_rmpp_send_ctx_done(IN osmv_rmpp_send_ctx_t * p_ctx) +{ + CL_ASSERT(p_ctx); + cl_event_destroy(&p_ctx->event); + osmv_rmpp_sar_done(&p_ctx->sar); + free(p_ctx); +} + +uint32_t osmv_rmpp_send_ctx_get_num_segs(IN osmv_rmpp_send_ctx_t * p_send_ctx) +{ + uint32_t data_len, data_sz, num; + + CL_ASSERT(p_send_ctx); + + if (p_send_ctx->is_sa_mad) { + data_len = p_send_ctx->mad_sz - IB_SA_MAD_HDR_SIZE; + data_sz = IB_SA_DATA_SIZE; + } else { + data_len = p_send_ctx->mad_sz - MAD_RMPP_HDR_SIZE; + data_sz = MAD_RMPP_DATA_SIZE; + } + + num = data_len / data_sz; + if (0 == data_len || (data_len % data_sz) > 0) { + num++; + } + + return num; +} + +ib_api_status_t +osmv_rmpp_send_ctx_get_seg(IN osmv_rmpp_send_ctx_t * p_send_ctx, + IN uint32_t seg_idx, + IN uint32_t resp_timeout, OUT void *p_buf) +{ + ib_api_status_t st = IB_SUCCESS; + uint32_t num_segs, paylen = 0; + ib_rmpp_mad_t *p_rmpp_mad; + + OSM_LOG_ENTER(p_send_ctx->p_log); + CL_ASSERT(p_send_ctx); + + st = osmv_rmpp_sar_get_mad_seg(&p_send_ctx->sar, seg_idx, p_buf); + if (st != IB_SUCCESS) { + goto Exit; + } + + p_rmpp_mad = (ib_rmpp_mad_t *) p_buf; + /* Set the relevant bits in the RMPP hdr */ + p_rmpp_mad->rmpp_status = IB_RMPP_STATUS_SUCCESS; + p_rmpp_mad->rmpp_flags |= IB_RMPP_FLAG_ACTIVE; + p_rmpp_mad->rmpp_flags |= resp_timeout << 3; + + num_segs = osmv_rmpp_send_ctx_get_num_segs(p_send_ctx); + + if (1 == seg_idx) { + p_rmpp_mad->rmpp_flags |= IB_RMPP_FLAG_FIRST; + + /* This is the first segment - + the reported paylen is the total amount of data. + */ + if (p_send_ctx->is_sa_mad) { + /* sa mad hdr sz */ + paylen = p_send_ctx->mad_sz - IB_SA_MAD_HDR_SIZE; + paylen += + num_segs * (IB_SA_MAD_HDR_SIZE - MAD_RMPP_HDR_SIZE); + } else { + /* mad hdr sz */ + paylen = p_send_ctx->mad_sz - MAD_RMPP_HDR_SIZE; + } + } + + if (seg_idx == num_segs) { + p_rmpp_mad->rmpp_flags |= IB_RMPP_FLAG_LAST; + + /* + This is the last segment - + the reported paylen is only the amount of data left on this segment. + */ + if (p_send_ctx->is_sa_mad) { + paylen = p_send_ctx->mad_sz - IB_SA_MAD_HDR_SIZE; + paylen -= (num_segs - 1) * IB_SA_DATA_SIZE; + paylen += (IB_SA_MAD_HDR_SIZE - MAD_RMPP_HDR_SIZE); + } else { + paylen = p_send_ctx->mad_sz - MAD_RMPP_HDR_SIZE; + paylen -= + (num_segs - 1) * (MAD_BLOCK_SIZE - + MAD_RMPP_HDR_SIZE); + } + } + + p_rmpp_mad->rmpp_type = IB_RMPP_TYPE_DATA; + p_rmpp_mad->rmpp_version = 1; + p_rmpp_mad->paylen_newwin = cl_ntoh32(paylen); + p_rmpp_mad->seg_num = cl_ntoh32(seg_idx); + +Exit: + OSM_LOG_EXIT(p_send_ctx->p_log); + return st; +} + +ib_api_status_t +osmv_rmpp_recv_ctx_init(osmv_rmpp_recv_ctx_t * p_ctx, osm_log_t * p_log) +{ + ib_api_status_t st = IB_SUCCESS; + + CL_ASSERT(p_ctx); + + p_ctx->is_sa_mad = FALSE; + + p_ctx->p_rbuf = malloc(sizeof(cl_qlist_t)); + if (p_ctx->p_rbuf) { + memset(p_ctx->p_rbuf, 0, sizeof(cl_qlist_t)); + cl_qlist_init(p_ctx->p_rbuf); + p_ctx->expected_seg = 1; + } else + st = IB_INSUFFICIENT_MEMORY; + + p_ctx->p_log = p_log; + + return st; +} + +void osmv_rmpp_recv_ctx_done(IN osmv_rmpp_recv_ctx_t * p_ctx) +{ + cl_list_item_t *p_list_item; + cl_list_obj_t *p_obj; + + CL_ASSERT(p_ctx); + + /* go over all the items in the list and remove them */ + p_list_item = cl_qlist_remove_head(p_ctx->p_rbuf); + while (p_list_item != cl_qlist_end(p_ctx->p_rbuf)) { + + p_obj = PARENT_STRUCT(p_list_item, cl_list_obj_t, list_item); + + free(cl_qlist_obj(p_obj)); + free(p_obj); + + p_list_item = cl_qlist_remove_head(p_ctx->p_rbuf); + } + + osmv_rmpp_sar_done(&p_ctx->sar); + + free(p_ctx->p_rbuf); + free(p_ctx); +} + +ib_api_status_t +osmv_rmpp_recv_ctx_store_mad_seg(IN osmv_rmpp_recv_ctx_t * p_recv_ctx, + IN void *p_mad) +{ + cl_list_obj_t *p_obj = NULL; + void *p_list_mad; + + OSM_LOG_ENTER(p_recv_ctx->p_log); + + CL_ASSERT(p_recv_ctx); + p_list_mad = malloc(MAD_BLOCK_SIZE); + if (NULL == p_list_mad) { + return IB_INSUFFICIENT_MEMORY; + } + memset(p_list_mad, 0, MAD_BLOCK_SIZE); + memcpy(p_list_mad, p_mad, MAD_BLOCK_SIZE); + + p_obj = malloc(sizeof(cl_list_obj_t)); + if (NULL == p_obj) { + free(p_list_mad); + return IB_INSUFFICIENT_MEMORY; + } + memset(p_obj, 0, sizeof(cl_list_obj_t)); + cl_qlist_set_obj(p_obj, p_list_mad); + + cl_qlist_insert_tail(p_recv_ctx->p_rbuf, &p_obj->list_item); + + if (osmv_mad_is_sa((ib_mad_t *) p_mad)) { + p_recv_ctx->is_sa_mad = TRUE; + } + + return IB_SUCCESS; + +} + +uint32_t +osmv_rmpp_recv_ctx_get_cur_byte_num(IN osmv_rmpp_recv_ctx_t * p_recv_ctx) +{ + uint32_t num_segs; + + num_segs = cl_qlist_count(p_recv_ctx->p_rbuf); + if (p_recv_ctx->is_sa_mad) + return ((num_segs * IB_SA_DATA_SIZE) + IB_SA_MAD_HDR_SIZE); + else + return ((num_segs * MAD_RMPP_DATA_SIZE) + MAD_RMPP_HDR_SIZE); +} + +uint32_t +osmv_rmpp_recv_ctx_get_byte_num_from_first(IN osmv_rmpp_recv_ctx_t * p_recv_ctx) +{ + cl_list_item_t *p_item; + cl_list_obj_t *p_obj; + void *p_list_mad; + uint32_t num_bytes, num_segs; + + p_item = cl_qlist_head(p_recv_ctx->p_rbuf); + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + p_list_mad = cl_qlist_obj(p_obj); + + /* mad data sz */ + num_bytes = cl_ntoh32(((ib_rmpp_mad_t *) p_list_mad)->paylen_newwin); + if (0 != num_bytes) { + if (p_recv_ctx->is_sa_mad) { + /* sa mad hdr sz */ + num_segs = cl_qlist_count(p_recv_ctx->p_rbuf); + num_bytes -= + num_segs * (IB_SA_MAD_HDR_SIZE - MAD_RMPP_HDR_SIZE); + num_bytes += IB_SA_MAD_HDR_SIZE; + } else { + /* mad hdr sz */ + num_bytes += MAD_RMPP_HDR_SIZE; + } + } + + return num_bytes; +} + +uint32_t +osmv_rmpp_recv_ctx_get_byte_num_from_last(IN osmv_rmpp_recv_ctx_t * p_recv_ctx) +{ + cl_list_item_t *p_item; + cl_list_obj_t *p_obj; + void *p_list_mad; + uint32_t num_bytes, num_segs; + + p_item = cl_qlist_tail(p_recv_ctx->p_rbuf); + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + p_list_mad = cl_qlist_obj(p_obj); + + /* mad data sz */ + num_segs = cl_qlist_count(p_recv_ctx->p_rbuf); + num_bytes = cl_ntoh32(((ib_rmpp_mad_t *) p_list_mad)->paylen_newwin); + + if (0 != num_bytes) { + if (p_recv_ctx->is_sa_mad) { + /* sa mad hdr sz */ + num_bytes += MAD_RMPP_HDR_SIZE; + num_bytes += (num_segs - 1) * IB_SA_DATA_SIZE; + } else { + /* mad hdr sz */ + num_bytes += MAD_RMPP_HDR_SIZE; + num_bytes += (num_segs - 1) * MAD_RMPP_DATA_SIZE; + } + } + + return num_bytes; +} + +/* assuming that the last rmpp pkt arrived so that data member: total_bytes has the right value */ +ib_api_status_t +osmv_rmpp_recv_ctx_reassemble_arbt_mad(IN osmv_rmpp_recv_ctx_t * p_recv_ctx, + IN uint32_t size, IN void *p_arbt_mad) +{ + ib_api_status_t st = IB_SUCCESS; + + CL_ASSERT(p_recv_ctx); + + st = osmv_rmpp_sar_init(&p_recv_ctx->sar, p_arbt_mad, size, + p_recv_ctx->is_sa_mad); + if (st != IB_SUCCESS) { + return st; + } + + st = osmv_rmpp_sar_reassemble_arbt_mad(&p_recv_ctx->sar, + p_recv_ctx->p_rbuf); + + osmv_rmpp_sar_done(&p_recv_ctx->sar); + + return st; +} diff --git a/libvendor/osm_vendor_mlx_sa.c b/libvendor/osm_vendor_mlx_sa.c new file mode 100644 index 0000000..bfc643e --- /dev/null +++ b/libvendor/osm_vendor_mlx_sa.c @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +/* this struct is the internal rep of the bind handle */ +typedef struct _osmv_sa_bind_info { + osm_bind_handle_t h_bind; + osm_log_t *p_log; + osm_vendor_t *p_vendor; + osm_mad_pool_t *p_mad_pool; + uint64_t port_guid; + cl_event_t sync_event; + uint64_t last_lids_update_sec; + uint16_t lid; + uint16_t sm_lid; +} osmv_sa_bind_info_t; + +/* + Call back on new mad received: + + We basically only need to set the context of the query. + Or report an error. + + A pointer to the actual context of the request (a copy of the oriignal + request structure) is attached as the p_madw->context.ni_context.node_guid +*/ +static void +__osmv_sa_mad_rcv_cb(IN osm_madw_t * p_madw, + IN void *bind_context, IN osm_madw_t * p_req_madw) +{ + osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) bind_context; + osmv_query_req_t *p_query_req_copy = NULL; + osmv_query_res_t query_res; + ib_sa_mad_t *p_sa_mad; + ib_net16_t mad_status; + + OSM_LOG_ENTER(p_bind->p_log); + + if (!p_req_madw) { + OSM_LOG(p_bind->p_log, OSM_LOG_DEBUG, + "Ignoring a non-response mad\n"); + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + goto Exit; + } + + /* obtain the sent context */ + p_query_req_copy = + (osmv_query_req_t *) (p_req_madw->context.arb_context.context1); + + /* provide the context of the original request in the result */ + query_res.query_context = p_query_req_copy->query_context; + + /* provide the resulting madw */ + query_res.p_result_madw = p_madw; + + /* update the req fields */ + p_sa_mad = (ib_sa_mad_t *) p_madw->p_mad; + + /* if we got a remote error track it in the status */ + mad_status = (ib_net16_t) (p_sa_mad->status & IB_SMP_STATUS_MASK); + if (mad_status != IB_SUCCESS) { + OSM_LOG(p_bind->p_log, OSM_LOG_ERROR, "ERR 0501: " + "Remote error:0x%04X\n", cl_ntoh16(mad_status)); + query_res.status = IB_REMOTE_ERROR; + } else + query_res.status = IB_SUCCESS; + + /* what if we have got back an empty mad ? */ + if (!p_madw->mad_size) { + OSM_LOG(p_bind->p_log, OSM_LOG_ERROR, "ERR 0502: " + "Got an empty mad\n"); + query_res.status = IB_ERROR; + } + + if (IB_SUCCESS == mad_status) { + + /* if we are in not in a method response of an rmpp nature we must get only 1 */ + /* HACK: in the future we might need to be smarter for other methods... */ + if (p_sa_mad->method != IB_MAD_METHOD_GETTABLE_RESP) { + query_res.result_cnt = 1; + } else { +#ifndef VENDOR_RMPP_SUPPORT + if (mad_status != IB_SUCCESS) + query_res.result_cnt = 0; + else + query_res.result_cnt = 1; +#else + /* we used the offset value to calculate the number of + records in here */ + if (ib_get_attr_size(p_sa_mad->attr_offset) == 0) { + query_res.result_cnt = 0; + OSM_LOG(p_bind->p_log, OSM_LOG_DEBUG, + "Count = 0\n"); + } + else { + query_res.result_cnt = + (p_madw->mad_size - IB_SA_MAD_HDR_SIZE) / + ib_get_attr_size(p_sa_mad->attr_offset); + OSM_LOG(p_bind->p_log, OSM_LOG_DEBUG, + "Count = %u = %zu / %u (%zu)\n", + query_res.result_cnt, + p_madw->mad_size - IB_SA_MAD_HDR_SIZE, + ib_get_attr_size(p_sa_mad->attr_offset), + (p_madw->mad_size - IB_SA_MAD_HDR_SIZE) % + ib_get_attr_size(p_sa_mad->attr_offset)); + } +#endif + } + } + + query_res.query_type = p_query_req_copy->query_type; + + p_query_req_copy->pfn_query_cb(&query_res); + + if ((p_query_req_copy->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC) + cl_event_signal(&p_bind->sync_event); + +Exit: + + /* free the copied query request if found */ + if (p_query_req_copy) + free(p_query_req_copy); + + /* put back the request madw */ + if (p_req_madw) + osm_mad_pool_put(p_bind->p_mad_pool, p_req_madw); + + OSM_LOG_EXIT(p_bind->p_log); +} + +/* + Send Error Callback: + + Only report the error and get rid of the mad wrapper +*/ +static void __osmv_sa_mad_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) +{ + osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) bind_context; + osmv_query_req_t *p_query_req_copy = NULL; + osmv_query_res_t query_res; + + OSM_LOG_ENTER(p_bind->p_log); + + /* Obtain the sent context etc */ + p_query_req_copy = + (osmv_query_req_t *) (p_madw->context.arb_context.context1); + + /* provide the context of the original request in the result */ + query_res.query_context = p_query_req_copy->query_context; + + query_res.p_result_madw = p_madw; + + query_res.status = IB_TIMEOUT; + query_res.result_cnt = 0; + query_res.p_result_madw->status = IB_TIMEOUT; + p_madw->status = IB_TIMEOUT; + query_res.query_type = p_query_req_copy->query_type; + + p_query_req_copy->pfn_query_cb(&query_res); + + if ((p_query_req_copy->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC) + cl_event_signal(&p_bind->sync_event); + + free(p_query_req_copy); + OSM_LOG_EXIT(p_bind->p_log); +} + +/***************************************************************************** + This routine needs to be invoked on every send - since the SM LID and Local + lid might change. To do that without any major perfoermance impact we cache + the results and time they were obtained. Refresh only twice a minute. + To avoid the need to use statics and risk a race - we require the refresh time + to be stored in the context of the results. Also this coveres cases were + we query for multiple guids. + *****************************************************************************/ +static ib_api_status_t +__osmv_get_lid_and_sm_lid_by_port_guid(IN osm_vendor_t * const p_vend, + IN ib_net64_t port_guid, + IN OUT uint64_t * p_lids_update_time_sec, + OUT uint16_t * lid, + OUT uint16_t * sm_lid) +{ + + ib_api_status_t status; + ib_port_attr_t *p_attr_array; + uint32_t num_ports; + uint32_t port_num; + + OSM_LOG_ENTER(p_vend->p_log); + + /* use prevous values if current time is close enough to previous query */ + if (cl_get_time_stamp_sec() <= *p_lids_update_time_sec + 30) { + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, + "Using previously stored lid:0x%04x sm_lid:0x%04x\n", + *lid, *sm_lid); + status = IB_SUCCESS; + goto Exit; + } + + /* obtain the number of available ports */ + num_ports = 0; + status = osm_vendor_get_all_port_attr(p_vend, NULL, &num_ports); + if (status != IB_INSUFFICIENT_MEMORY) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 0503: " + "Expected to get the IB_INSUFFICIENT_MEMORY but got: %s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, + "Found total of %u ports. Looking for guid:0x%016" PRIx64 "\n", + num_ports, cl_ntoh64(port_guid)); + + /* allocate the attributes */ + p_attr_array = + (ib_port_attr_t *) malloc(sizeof(ib_port_attr_t) * num_ports); + + /* obtain the attributes */ + status = osm_vendor_get_all_port_attr(p_vend, p_attr_array, &num_ports); + if (status != IB_SUCCESS) { + OSM_LOG(p_vend->p_log, OSM_LOG_ERROR, "ERR 0504: " + "Failed to get port attributes (error: %s)\n", + ib_get_err_str(status)); + free(p_attr_array); + goto Exit; + } + + status = IB_ERROR; + /* find the port requested in the list */ + for (port_num = 0; (port_num < num_ports) && (status == IB_ERROR); + port_num++) { + if (p_attr_array[port_num].port_guid == port_guid) { + *lid = p_attr_array[port_num].lid; + *sm_lid = p_attr_array[port_num].sm_lid; + *p_lids_update_time_sec = cl_get_time_stamp_sec(); + status = IB_SUCCESS; + OSM_LOG(p_vend->p_log, OSM_LOG_DEBUG, + "Found guid:0x%016" PRIx64 " with idx:%d\n", + cl_ntoh64(port_guid), port_num); + } + } + + free(p_attr_array); + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +osm_bind_handle_t +osmv_bind_sa(IN osm_vendor_t * const p_vend, + IN osm_mad_pool_t * const p_mad_pool, IN ib_net64_t port_guid) +{ + osm_bind_info_t bind_info; + osm_log_t *p_log = p_vend->p_log; + ib_api_status_t status = IB_SUCCESS; + osmv_sa_bind_info_t *p_sa_bind_info; + cl_status_t cl_status; + + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Binding to port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); + + bind_info.port_guid = port_guid; + bind_info.mad_class = IB_MCLASS_SUBN_ADM; + bind_info.class_version = 2; + bind_info.is_responder = FALSE; + bind_info.is_trap_processor = FALSE; + bind_info.is_report_processor = FALSE; + bind_info.send_q_size = 256; + bind_info.recv_q_size = 256; + + /* allocate the new sa bind info */ + p_sa_bind_info = + (osmv_sa_bind_info_t *) malloc(sizeof(osmv_sa_bind_info_t)); + if (!p_sa_bind_info) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0505: " + "Failed to allocate new bind structure\n"); + p_sa_bind_info = OSM_BIND_INVALID_HANDLE; + goto Exit; + } + + /* store some important context */ + p_sa_bind_info->p_log = p_log; + p_sa_bind_info->port_guid = port_guid; + p_sa_bind_info->p_mad_pool = p_mad_pool; + p_sa_bind_info->p_vendor = p_vend; + p_sa_bind_info->last_lids_update_sec = 0; + + /* Bind to the lower level */ + p_sa_bind_info->h_bind = osm_vendor_bind(p_vend, &bind_info, p_mad_pool, __osmv_sa_mad_rcv_cb, __osmv_sa_mad_err_cb, p_sa_bind_info); /* context provided to CBs */ + + if (p_sa_bind_info->h_bind == OSM_BIND_INVALID_HANDLE) { + free(p_sa_bind_info); + p_sa_bind_info = OSM_BIND_INVALID_HANDLE; + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0506: " + "Failed to bind to vendor GSI\n"); + goto Exit; + } + + /* obtain the sm_lid from the vendor */ + status = + __osmv_get_lid_and_sm_lid_by_port_guid(p_vend, port_guid, + &p_sa_bind_info-> + last_lids_update_sec, + &p_sa_bind_info->lid, + &p_sa_bind_info->sm_lid); + if (status != IB_SUCCESS) { + free(p_sa_bind_info); + p_sa_bind_info = OSM_BIND_INVALID_HANDLE; + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0507: " + "Failed to obtain the SM lid\n"); + goto Exit; + } + + /* initialize the sync_event */ + cl_event_construct(&p_sa_bind_info->sync_event); + cl_status = cl_event_init(&p_sa_bind_info->sync_event, TRUE); + if (cl_status != CL_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0508: " + "cl_init_event failed: %s\n", ib_get_err_str(cl_status)); + free(p_sa_bind_info); + p_sa_bind_info = OSM_BIND_INVALID_HANDLE; + } + +Exit: + OSM_LOG_EXIT(p_log); + return (p_sa_bind_info); +} + +/****t* OSM Vendor SA Client/osmv_sa_mad_data + * NAME + * osmv_sa_mad_data + * + * DESCRIPTION + * Extra fields required to perform a mad query + * This struct is passed to the actual send method + * + * SYNOPSIS + */ +typedef struct _osmv_sa_mad_data { + /* MAD data. */ + uint8_t method; + ib_net16_t attr_id; + ib_net16_t attr_offset; + ib_net32_t attr_mod; + ib_net64_t comp_mask; + void *p_attr; +} osmv_sa_mad_data_t; +/* + * method + * The method of the mad to be sent + * + * attr_id + * Attribute ID + * + * attr_offset + * Offset as defined by RMPP + * + * attr_mod + * Attribute modifier + * + * comp_mask + * The component mask of the query + * + * p_attr + * A pointer to the record of the attribute to be sent. + * + *****/ + +/* Send a MAD out on the GSI interface */ +static ib_api_status_t +__osmv_send_sa_req(IN osmv_sa_bind_info_t * p_bind, + IN const osmv_sa_mad_data_t * const p_sa_mad_data, + IN const osmv_query_req_t * const p_query_req) +{ + ib_api_status_t status; + ib_mad_t *p_mad_hdr; + ib_sa_mad_t *p_sa_mad; + osm_madw_t *p_madw; + osm_log_t *p_log = p_bind->p_log; + static atomic32_t trans_id; + boolean_t sync; + osmv_query_req_t *p_query_req_copy; + uint32_t sa_size; + + OSM_LOG_ENTER(p_log); + + /* + since the sm_lid might change we obtain it every send + (actually it is cached in the bind object and refreshed + every 30sec by this proc) + */ + status = + __osmv_get_lid_and_sm_lid_by_port_guid(p_bind->p_vendor, + p_bind->port_guid, + &p_bind-> + last_lids_update_sec, + &p_bind->lid, + &p_bind->sm_lid); + if (status != IB_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0509: " + "Failed to obtain the SM lid\n"); + goto Exit; + } + + /* Get a MAD wrapper for the send */ + p_madw = osm_mad_pool_get(p_bind->p_mad_pool, + p_bind->h_bind, MAD_BLOCK_SIZE, NULL); + + if (p_madw == NULL) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0510: " + "Unable to acquire MAD\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + /* Initialize the Sent MAD: */ + + /* Initialize the MAD buffer for the send operation. */ + p_mad_hdr = osm_madw_get_mad_ptr(p_madw); + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + /* Get a new transaction Id */ + cl_atomic_inc(&trans_id); + + /* Cleanup the MAD from any residue */ + memset(p_sa_mad, 0, MAD_BLOCK_SIZE); + + /* Initialize the standard MAD header. */ + ib_mad_init_new(p_mad_hdr, /* mad pointer */ + IB_MCLASS_SUBN_ADM, /* class */ + (uint8_t) 2, /* version */ + p_sa_mad_data->method, /* method */ + cl_hton64((uint64_t) trans_id), /* tid */ + p_sa_mad_data->attr_id, /* attr id */ + p_sa_mad_data->attr_mod /* attr mod */); + + /* Set the query information. */ + p_sa_mad->sm_key = p_query_req->sm_key; + p_sa_mad->attr_offset = 0; + p_sa_mad->comp_mask = p_sa_mad_data->comp_mask; + if (p_sa_mad->comp_mask) { + p_sa_mad_data->attr_offset ? (sa_size = ib_get_attr_size(p_sa_mad_data->attr_offset)) : (sa_size = IB_SA_DATA_SIZE); + memcpy(p_sa_mad->data, p_sa_mad_data->p_attr, sa_size); + } + + /* + Provide the address to send to + */ + /* Patch to handle IBAL - host order , where it should take destination lid in network order */ +#ifdef OSM_VENDOR_INTF_AL + p_madw->mad_addr.dest_lid = p_bind->sm_lid; +#else + p_madw->mad_addr.dest_lid = cl_hton16(p_bind->sm_lid); +#endif + p_madw->mad_addr.addr_type.smi.source_lid = cl_hton16(p_bind->lid); + p_madw->mad_addr.addr_type.gsi.remote_qp = CL_HTON32(1); + p_madw->resp_expected = TRUE; + p_madw->fail_msg = CL_DISP_MSGID_NONE; + + /* + Provide MAD context such that the call back will know what to do. + We have to keep the entire request structure so we know the CB. + Since we can not rely on the client to keep it around until + the response - we duplicate it and will later dispose it (in CB). + To store on the MADW we cast it into what opensm has: + p_madw->context.arb_context.context1 + */ + p_query_req_copy = malloc(sizeof(*p_query_req_copy)); + if (!p_query_req_copy) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0511: " + "Unable to acquire memory for query copy\n"); + osm_mad_pool_put(p_bind->p_mad_pool, p_madw); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + *p_query_req_copy = *p_query_req; + p_madw->context.arb_context.context1 = p_query_req_copy; + + /* we can support async as well as sync calls */ + sync = ((p_query_req->flags & OSM_SA_FLAGS_SYNC) == OSM_SA_FLAGS_SYNC); + + /* send the mad asynchronously */ + status = osm_vendor_send(osm_madw_get_bind_handle(p_madw), + p_madw, p_madw->resp_expected); + + /* if synchronous - wait on the event */ + if (sync) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Waiting for async event\n"); + cl_event_wait_on(&p_bind->sync_event, EVENT_NO_TIMEOUT, FALSE); + cl_event_reset(&p_bind->sync_event); + status = p_madw->status; + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +/* + * Query the SA based on the user's request. + */ +ib_api_status_t +osmv_query_sa(IN osm_bind_handle_t h_bind, + IN const osmv_query_req_t * const p_query_req) +{ + union { + ib_service_record_t svc_rec; + ib_node_record_t node_rec; + ib_portinfo_record_t port_info; + ib_path_rec_t path_rec; +#ifdef DUAL_SIDED_RMPP + ib_multipath_rec_t multipath_rec; +#endif + ib_class_port_info_t class_port_info; + } u; + osmv_sa_mad_data_t sa_mad_data; + osmv_sa_bind_info_t *p_bind = (osmv_sa_bind_info_t *) h_bind; + osmv_user_query_t *p_user_query; +#ifdef DUAL_SIDED_RMPP + osmv_multipath_req_t *p_mpr_req; + int i, j; +#endif + osm_log_t *p_log = p_bind->p_log; + ib_api_status_t status; + + OSM_LOG_ENTER(p_log); + + /* Set the request information. */ + sa_mad_data.method = IB_MAD_METHOD_GETTABLE; + sa_mad_data.attr_mod = 0; + sa_mad_data.attr_offset = 0; + + /* Set the MAD attributes and component mask correctly. */ + switch (p_query_req->query_type) { + + case OSMV_QUERY_USER_DEFINED: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 USER_DEFINED\n"); + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + if (p_user_query->method) + sa_mad_data.method = p_user_query->method; +#ifdef DUAL_SIDED_RMPP + if (sa_mad_data.method == IB_MAD_METHOD_GETMULTI || + sa_mad_data.method == IB_MAD_METHOD_GETTRACETABLE) + sa_mad_data.attr_offset = p_user_query->attr_offset; +#endif + sa_mad_data.attr_id = p_user_query->attr_id; + sa_mad_data.attr_mod = p_user_query->attr_mod; + sa_mad_data.comp_mask = p_user_query->comp_mask; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_ALL_SVC_RECS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 SVC_REC_BY_NAME\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + sa_mad_data.comp_mask = 0; + sa_mad_data.p_attr = &u.svc_rec; + break; + + case OSMV_QUERY_SVC_REC_BY_NAME: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 SVC_REC_BY_NAME\n"); + sa_mad_data.method = IB_MAD_METHOD_GET; + sa_mad_data.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + sa_mad_data.comp_mask = IB_SR_COMPMASK_SNAME; + sa_mad_data.p_attr = &u.svc_rec; + memcpy(u.svc_rec.service_name, p_query_req->p_query_input, + sizeof(ib_svc_name_t)); + break; + + case OSMV_QUERY_SVC_REC_BY_ID: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 SVC_REC_BY_ID\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + sa_mad_data.comp_mask = IB_SR_COMPMASK_SID; + sa_mad_data.p_attr = &u.svc_rec; + u.svc_rec.service_id = + *(ib_net64_t *) (p_query_req->p_query_input); + break; + + case OSMV_QUERY_CLASS_PORT_INFO: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 CLASS_PORT_INFO\n"); + sa_mad_data.method = IB_MAD_METHOD_GET; + sa_mad_data.attr_id = IB_MAD_ATTR_CLASS_PORT_INFO; + sa_mad_data.comp_mask = 0; + sa_mad_data.p_attr = &u.class_port_info; + break; + + case OSMV_QUERY_NODE_REC_BY_NODE_GUID: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 NODE_REC_BY_NODE_GUID\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_NODE_RECORD; + sa_mad_data.comp_mask = IB_NR_COMPMASK_NODEGUID; + sa_mad_data.p_attr = &u.node_rec; + u.node_rec.node_info.node_guid = + *(ib_net64_t *) (p_query_req->p_query_input); + break; + + case OSMV_QUERY_PORT_REC_BY_LID: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PORT_REC_BY_LID\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_PORTINFO_RECORD; + sa_mad_data.comp_mask = IB_PIR_COMPMASK_LID; + sa_mad_data.p_attr = &u.port_info; + u.port_info.lid = *(ib_net16_t *) (p_query_req->p_query_input); + break; + + case OSMV_QUERY_PORT_REC_BY_LID_AND_NUM: + sa_mad_data.method = IB_MAD_METHOD_GET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PORT_REC_BY_LID_AND_NUM\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_PORTINFO_RECORD; + sa_mad_data.comp_mask = + IB_PIR_COMPMASK_LID | IB_PIR_COMPMASK_PORTNUM; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK: + sa_mad_data.method = IB_MAD_METHOD_GET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_VLARB_RECORD; + sa_mad_data.comp_mask = + IB_VLA_COMPMASK_LID | IB_VLA_COMPMASK_OUT_PORT | + IB_VLA_COMPMASK_BLOCK; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_SLVL_BY_LID_AND_PORTS: + sa_mad_data.method = IB_MAD_METHOD_GET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_SLVL_RECORD; + sa_mad_data.comp_mask = + IB_SLVL_COMPMASK_LID | IB_SLVL_COMPMASK_OUT_PORT | + IB_SLVL_COMPMASK_IN_PORT; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_PATH_REC_BY_PORT_GUIDS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PATH_REC_BY_PORT_GUIDS\n"); + memset(&u.path_rec, 0, sizeof(ib_path_rec_t)); + sa_mad_data.attr_id = IB_MAD_ATTR_PATH_RECORD; + sa_mad_data.comp_mask = + (IB_PR_COMPMASK_DGID | IB_PR_COMPMASK_SGID | IB_PR_COMPMASK_NUMBPATH); + u.path_rec.num_path = 0x7f; + sa_mad_data.p_attr = &u.path_rec; + ib_gid_set_default(&u.path_rec.dgid, + ((osmv_guid_pair_t *) (p_query_req-> + p_query_input))-> + dest_guid); + ib_gid_set_default(&u.path_rec.sgid, + ((osmv_guid_pair_t *) (p_query_req-> + p_query_input))-> + src_guid); + break; + + case OSMV_QUERY_PATH_REC_BY_GIDS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PATH_REC_BY_GIDS\n"); + memset(&u.path_rec, 0, sizeof(ib_path_rec_t)); + sa_mad_data.attr_id = IB_MAD_ATTR_PATH_RECORD; + sa_mad_data.comp_mask = + (IB_PR_COMPMASK_DGID | IB_PR_COMPMASK_SGID | IB_PR_COMPMASK_NUMBPATH); + u.path_rec.num_path = 0x7f; + sa_mad_data.p_attr = &u.path_rec; + memcpy(&u.path_rec.dgid, + &((osmv_gid_pair_t *) (p_query_req->p_query_input))-> + dest_gid, sizeof(ib_gid_t)); + memcpy(&u.path_rec.sgid, + &((osmv_gid_pair_t *) (p_query_req->p_query_input))-> + src_gid, sizeof(ib_gid_t)); + break; + + case OSMV_QUERY_PATH_REC_BY_LIDS: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 PATH_REC_BY_LIDS\n"); + memset(&u.path_rec, 0, sizeof(ib_path_rec_t)); + sa_mad_data.method = IB_MAD_METHOD_GET; + sa_mad_data.attr_id = IB_MAD_ATTR_PATH_RECORD; + sa_mad_data.comp_mask = + (IB_PR_COMPMASK_DLID | IB_PR_COMPMASK_SLID); + sa_mad_data.p_attr = &u.path_rec; + u.path_rec.dlid = + ((osmv_lid_pair_t *) (p_query_req->p_query_input))-> + dest_lid; + u.path_rec.slid = + ((osmv_lid_pair_t *) (p_query_req->p_query_input))->src_lid; + break; + + case OSMV_QUERY_UD_MULTICAST_SET: + sa_mad_data.method = IB_MAD_METHOD_SET; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_UD_MULTICAST_SET\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + sa_mad_data.comp_mask = p_user_query->comp_mask; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + + case OSMV_QUERY_UD_MULTICAST_DELETE: + sa_mad_data.method = IB_MAD_METHOD_DELETE; + p_user_query = (osmv_user_query_t *) p_query_req->p_query_input; + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 OSMV_QUERY_UD_MULTICAST_DELETE\n"); + sa_mad_data.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + sa_mad_data.comp_mask = p_user_query->comp_mask; + sa_mad_data.p_attr = p_user_query->p_attr; + break; + +#ifdef DUAL_SIDED_RMPP + case OSMV_QUERY_MULTIPATH_REC: + OSM_LOG(p_log, OSM_LOG_DEBUG, "DBG:001 MULTIPATH_REC\n"); + /* Validate sgid/dgid counts against SA client limit */ + p_mpr_req = (osmv_multipath_req_t *) p_query_req->p_query_input; + if (p_mpr_req->sgid_count + p_mpr_req->dgid_count > + IB_MULTIPATH_MAX_GIDS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "DBG:001 MULTIPATH_REC " + "SGID count %d DGID count %d max count %d\n", + p_mpr_req->sgid_count, p_mpr_req->dgid_count, + IB_MULTIPATH_MAX_GIDS); + CL_ASSERT(0); + return IB_ERROR; + } + memset(&u.multipath_rec, 0, sizeof(ib_multipath_rec_t)); + sa_mad_data.method = IB_MAD_METHOD_GETMULTI; + sa_mad_data.attr_id = IB_MAD_ATTR_MULTIPATH_RECORD; + sa_mad_data.attr_offset = + ib_get_attr_offset(sizeof(ib_multipath_rec_t)); + sa_mad_data.p_attr = &u.multipath_rec; + sa_mad_data.comp_mask = p_mpr_req->comp_mask; + u.multipath_rec.num_path = p_mpr_req->num_path; + if (p_mpr_req->reversible) + u.multipath_rec.num_path |= 0x80; + else + u.multipath_rec.num_path &= ~0x80; + u.multipath_rec.pkey = p_mpr_req->pkey; + ib_multipath_rec_set_sl(&u.multipath_rec, p_mpr_req->sl); + ib_multipath_rec_set_qos_class(&u.multipath_rec, 0); + u.multipath_rec.independence = p_mpr_req->independence; + u.multipath_rec.sgid_count = p_mpr_req->sgid_count; + u.multipath_rec.dgid_count = p_mpr_req->dgid_count; + j = 0; + for (i = 0; i < p_mpr_req->sgid_count; i++, j++) + u.multipath_rec.gids[j] = p_mpr_req->gids[j]; + for (i = 0; i < p_mpr_req->dgid_count; i++, j++) + u.multipath_rec.gids[j] = p_mpr_req->gids[j]; + break; +#endif + + default: + OSM_LOG(p_log, OSM_LOG_ERROR, "DBG:001 UNKNOWN\n"); + CL_ASSERT(0); + return IB_ERROR; + } + + status = __osmv_send_sa_req(h_bind, &sa_mad_data, p_query_req); + + OSM_LOG_EXIT(p_log); + return status; +} diff --git a/libvendor/osm_vendor_mlx_sar.c b/libvendor/osm_vendor_mlx_sar.c new file mode 100644 index 0000000..5523284 --- /dev/null +++ b/libvendor/osm_vendor_mlx_sar.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +ib_api_status_t +osmv_rmpp_sar_init(osmv_rmpp_sar_t * p_sar, void *p_arbt_mad, + uint32_t mad_size, boolean_t is_sa_mad) +{ + CL_ASSERT(p_sar); + p_sar->p_arbt_mad = p_arbt_mad; + if (is_sa_mad) { + p_sar->data_len = mad_size - IB_SA_MAD_HDR_SIZE; + p_sar->hdr_sz = IB_SA_MAD_HDR_SIZE; + p_sar->data_sz = IB_SA_DATA_SIZE; + } else { + p_sar->data_len = mad_size - MAD_RMPP_HDR_SIZE; + p_sar->hdr_sz = MAD_RMPP_HDR_SIZE; + p_sar->data_sz = MAD_RMPP_DATA_SIZE; + } + return IB_SUCCESS; +} + +void osmv_rmpp_sar_done(osmv_rmpp_sar_t * p_sar) +{ + p_sar->p_arbt_mad = NULL; +} + +/* the big mad should be with mad header, rmpp header ( &sa hdr) space */ +ib_api_status_t +osmv_rmpp_sar_get_mad_seg(IN osmv_rmpp_sar_t * p_sar, + IN uint32_t seg_idx, OUT void *p_buf) +{ + void *p_seg; + uint32_t sz_left; + uint32_t num_segs; + + CL_ASSERT(p_sar); + + num_segs = p_sar->data_len / p_sar->data_sz; + if ((p_sar->data_len % p_sar->data_sz) > 0) { + num_segs++; + } + + if ((seg_idx > num_segs) && (seg_idx != 1)) { + return IB_NOT_FOUND; + } + + /* cleanup */ + memset(p_buf, 0, MAD_BLOCK_SIZE); + + /* attach header */ + memcpy(p_buf, p_sar->p_arbt_mad, p_sar->hdr_sz); + + /* fill data */ + p_seg = + (char *)p_sar->p_arbt_mad + p_sar->hdr_sz + + ((seg_idx - 1) * p_sar->data_sz); + sz_left = p_sar->data_len - ((seg_idx - 1) * p_sar->data_sz); + if (sz_left > p_sar->data_sz) + memcpy((char *)p_buf + p_sar->hdr_sz, (char *)p_seg, + p_sar->data_sz); + else + memcpy((char *)p_buf + p_sar->hdr_sz, (char *)p_seg, sz_left); + + return IB_SUCCESS; +} + +/* turns a list of mads to one big mad - including header */ +/* ALSO - deallocates the list */ +ib_api_status_t +osmv_rmpp_sar_reassemble_arbt_mad(osmv_rmpp_sar_t * p_sar, cl_qlist_t * p_bufs) +{ + void *buf_tmp, *p_mad; + cl_list_item_t *p_item; + cl_list_obj_t *p_obj; + uint32_t space_left = p_sar->data_len + p_sar->hdr_sz; + + CL_ASSERT(p_sar); + CL_ASSERT(FALSE == cl_is_qlist_empty(p_bufs)); + + /* attach header */ + p_mad = p_sar->p_arbt_mad; + p_item = cl_qlist_head(p_bufs); + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + buf_tmp = cl_qlist_obj(p_obj); + memcpy(p_mad, buf_tmp, p_sar->hdr_sz); + p_mad = (char *)p_mad + p_sar->hdr_sz; + space_left -= p_sar->hdr_sz; + + /* reassemble data */ + while (FALSE == cl_is_qlist_empty(p_bufs)) { + + p_item = cl_qlist_remove_head(p_bufs); + p_obj = PARENT_STRUCT(p_item, cl_list_obj_t, list_item); + buf_tmp = cl_qlist_obj(p_obj); + + if (FALSE == cl_is_qlist_empty(p_bufs)) { + memcpy((char *)p_mad, (char *)buf_tmp + p_sar->hdr_sz, + p_sar->data_sz); + p_mad = (char *)p_mad + p_sar->data_sz; + space_left -= p_sar->data_sz; + } else { + /* the last mad on the list */ + memcpy((char *)p_mad, (char *)buf_tmp + p_sar->hdr_sz, + space_left); + p_mad = (char *)p_mad + space_left; + } + + free(buf_tmp); + free(p_obj); + } + + return IB_SUCCESS; +} diff --git a/libvendor/osm_vendor_mlx_sender.c b/libvendor/osm_vendor_mlx_sender.c new file mode 100644 index 0000000..319a54e --- /dev/null +++ b/libvendor/osm_vendor_mlx_sender.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include + +static ib_api_status_t +__osmv_rmpp_send_segment(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, IN uint32_t seg_num); + +/****d* OSM Vendor/osmv_simple_send_madw + * NAME + * osmv_simple_send_madw + * + * DESCRIPTION + * Send a single MAD (256 bytes). + * + * If this MAD requires a response, set the timeout event. + * The function call returns when the MAD's send completion is received. + * + */ + +ib_api_status_t +osmv_simple_send_madw(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN osmv_txn_ctx_t * p_txn, IN boolean_t is_retry) +{ + ib_api_status_t ret; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_mad_addr_t *p_mad_addr = osm_madw_get_mad_addr_ptr(p_madw); + uint8_t mad_buf[MAD_BLOCK_SIZE]; + ib_mad_t *p_mad = (ib_mad_t *) mad_buf; + uint64_t key = 0; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + CL_ASSERT(p_madw->mad_size <= MAD_BLOCK_SIZE); + + memset(p_mad, 0, MAD_BLOCK_SIZE); + memcpy(p_mad, osm_madw_get_mad_ptr(p_madw), p_madw->mad_size); + + if (NULL != p_txn) { + /* Push a fake txn id to the MAD */ + key = osmv_txn_get_key(p_txn); + p_mad->trans_id = cl_hton64(key); + } + + /* + Add call for packet drop randomizer. + This is a testing feature. If run_randomizer flag is set to TRUE, + the randomizer will be called, and randomally will drop + a packet. This is used for simulating unstable fabric. + */ + if (p_bo->p_vendor->run_randomizer == TRUE) { + /* Try the randomizer */ + if (osm_pkt_randomizer_mad_drop(p_bo->p_vendor->p_log, + p_bo->p_vendor-> + p_pkt_randomizer, + p_mad) == TRUE) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "The MAD will not be sent. \n"); + ret = IB_SUCCESS; + } else { + ret = + osmv_transport_mad_send(h_bind, p_mad, p_mad_addr); + } + } else { + ret = osmv_transport_mad_send(h_bind, p_mad, p_mad_addr); + } + + if ((IB_SUCCESS == ret) && (NULL != p_txn) && (!is_retry)) { + /* Set the timeout for receiving the response MAD */ + ret = osmv_txn_set_timeout_ev(h_bind, key, + p_bo->p_vendor->resp_timeout); + } + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return ret; +} + +/***** OSM Vendor/osmv_rmpp_send_madw + * NAME + * osmv_rmpp_send_madw + * + * DESCRIPTION + * Send a single message (MAD wrapper of arbitrary length). + * Follow the RMPP semantics + * (segmentation, send window, timeouts etc). + * + * The function call returns either when the whole message + * has been acknowledged, or upon error. + * + * ASSUMPTIONS + * The RMPP sender context is set up + */ + +ib_api_status_t +osmv_rmpp_send_madw(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, + IN osmv_txn_ctx_t * p_txn, IN boolean_t is_rmpp_ds) +{ + ib_api_status_t ret = IB_SUCCESS; + uint32_t i, total_segs; + + osmv_rmpp_send_ctx_t *p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn); + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + total_segs = osmv_rmpp_send_ctx_get_num_segs(p_send_ctx); + CL_ASSERT(total_segs >= 1); + + /* In the double-sided transfer, wait for ACK 0 */ + + for (;;) { + + if (p_send_ctx->window_first > total_segs) { + + /* Every segment is acknowledged */ + break; + } + + /* Send the next burst. */ + for (i = p_send_ctx->window_first; i <= p_send_ctx->window_last; + i++) { + + /* Send a segment and setup a timeout timer */ + ret = __osmv_rmpp_send_segment(h_bind, p_txn, i); + if (IB_SUCCESS != ret) { + goto send_done; + } + } + + /* Set the Response Timeout for the ACK on the last DATA segment */ + ret = osmv_txn_set_timeout_ev(h_bind, osmv_txn_get_key(p_txn), + p_bo->p_vendor->resp_timeout); + if (IB_SUCCESS != ret) { + goto send_done; + } + + /* Going to sleep. Let the others access the transaction DB */ + osmv_txn_unlock(p_bo); + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "RMPP Sender thread (madw=%p) going to sleep ...\n", + p_madw); + + /* Await the next event to happen */ + cl_event_wait_on(&p_send_ctx->event, + EVENT_NO_TIMEOUT, TRUE /* interruptible */ ); + + /* Got a signal from the MAD dispatcher/timeout handler */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "RMPP Sender thread (madw=%p) waking up on a signal ...\n", + p_madw); + + /* Let's see what changed... Make this atomic - re-acquire the lock. */ + osmv_txn_lock(p_bo); + + if (TRUE == p_bo->is_closing) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_rmpp_send_madw: ERR 6601: " + "The bind handle %p is being closed. " + "Stopping the RMPP Send of MADW %p\n", + h_bind, p_madw); + + ret = IB_TIMEOUT; + return IB_INTERRUPTED; + } + + /* STOP? ABORT? TIMEOUT? */ + if (IB_SUCCESS != p_send_ctx->status) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_rmpp_send_madw: ERR 6602: " + "An error (%s) happened during the RMPP send of %p. Bailing out.\n", + ib_get_err_str(p_send_ctx->status), p_madw); + ret = p_send_ctx->status; + goto send_done; + } + } + + if (TRUE == is_rmpp_ds) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Double-sided RMPP - switching to be the receiver.\n"); + + ret = osmv_txn_init_rmpp_receiver(h_bind, p_txn, FALSE + /*Send was initiated by me */ + ); + + if (IB_SUCCESS == ret) { + /* Send ACK on the 0 segment */ + ret = __osmv_rmpp_send_segment(h_bind, p_txn, 0); + } + } + +send_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return ret; +} + +/* + * NAME osmv_rmpp_send_ack + * + * DESCRIPTION + * + */ + +ib_api_status_t +osmv_rmpp_send_ack(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_req_mad, + IN uint32_t seg_num, + IN uint32_t nwl, IN const osm_mad_addr_t * p_mad_addr) +{ + uint8_t resp_mad[MAD_BLOCK_SIZE]; + ib_rmpp_mad_t *p_resp_mad = (ib_rmpp_mad_t *) resp_mad; + +#ifdef OSMV_RANDOM_DROP + if (TRUE == osmv_random_drop()) { + osm_log(((osmv_bind_obj_t *) h_bind)->p_vendor->p_log, + OSM_LOG_DEBUG, + "Error injection - dropping the RMPP ACK\n"); + return IB_SUCCESS; + } +#endif + + memcpy(p_resp_mad, p_req_mad, MAD_BLOCK_SIZE); + + p_resp_mad->common_hdr.method = osmv_invert_method(p_req_mad->method); + p_resp_mad->rmpp_type = IB_RMPP_TYPE_ACK; + p_resp_mad->seg_num = cl_hton32(seg_num); + p_resp_mad->paylen_newwin = cl_hton32(nwl); + p_resp_mad->rmpp_flags = IB_RMPP_FLAG_ACTIVE; + + return osmv_transport_mad_send(h_bind, p_resp_mad, p_mad_addr); +} + +/* + * NAME osmv_rmpp_send_nak + * + * DESCRIPTION Send the RMPP ABORT or STOP packet + */ + +ib_api_status_t +osmv_rmpp_send_nak(IN osm_bind_handle_t h_bind, + IN const ib_mad_t * p_req_mad, + IN const osm_mad_addr_t * p_mad_addr, + IN uint8_t nak_type, IN uint8_t status) +{ + uint8_t resp_mad[MAD_BLOCK_SIZE]; + ib_rmpp_mad_t *p_resp_mad = (ib_rmpp_mad_t *) resp_mad; + + memcpy(p_resp_mad, p_req_mad, MAD_BLOCK_SIZE); + + p_resp_mad->common_hdr.method = osmv_invert_method(p_req_mad->method); + p_resp_mad->rmpp_type = nak_type; + p_resp_mad->rmpp_status = status; + + return osmv_transport_mad_send(h_bind, p_resp_mad, p_mad_addr); +} + +/* + * NAME __osmv_rmpp_send_segment + * + * DESCRIPTION Build a MAD for a specific segment and send it + */ + +static ib_api_status_t +__osmv_rmpp_send_segment(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, IN uint32_t seg_num) +{ + ib_api_status_t ret; + osmv_rmpp_send_ctx_t *p_send_ctx; + uint8_t mad_buf[MAD_BLOCK_SIZE]; + ib_mad_t *p_mad = (ib_mad_t *) mad_buf; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_mad_addr_t *p_mad_addr = + osm_madw_get_mad_addr_ptr(osmv_txn_get_madw(p_txn)); + uint32_t timeout = p_bo->p_vendor->resp_timeout; + uint64_t key; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + +#ifdef OSMV_RANDOM_DROP + if (TRUE == osmv_random_drop()) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Error injection - simulating the RMPP segment drop\n"); + return IB_SUCCESS; + } +#endif + + p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn); + key = osmv_txn_get_key(p_txn); + + if (0 != seg_num) { + ret = + osmv_rmpp_send_ctx_get_seg(p_send_ctx, seg_num, timeout, + p_mad); + CL_ASSERT(IB_SUCCESS == ret); + + /* Put the segment to the wire ! */ + p_mad->trans_id = cl_hton64(key); + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Sending RMPP segment #%d, on-wire TID=0x%" PRIx64 "\n", + seg_num, p_mad->trans_id); + + /* + Add call for packet drop randomizer. + This is a testing feature. If run_randomizer flag is set to TRUE, + the randomizer will be called, and randomally will drop + a packet. This is used for simulating unstable fabric. + */ + if (p_bo->p_vendor->run_randomizer == TRUE) { + /* Try the randomizer */ + if (osm_pkt_randomizer_mad_drop(p_bo->p_vendor->p_log, + p_bo->p_vendor-> + p_pkt_randomizer, + p_mad) == TRUE) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "The MAD will not be sent. \n"); + ret = IB_SUCCESS; + } else { + ret = + osmv_transport_mad_send((osm_bind_handle_t) + p_bo, p_mad, + p_mad_addr); + } + } else { + ret = + osmv_transport_mad_send((osm_bind_handle_t) p_bo, + p_mad, p_mad_addr); + } + } else { + /* This is an ACK for double-sided handshake. Give it a special treatment. */ + + /* It doesn't really matter which data to put. Only the header matters. */ + ret = osmv_rmpp_send_ctx_get_seg(p_send_ctx, 1, timeout, p_mad); + CL_ASSERT(IB_SUCCESS == ret); + + p_mad->trans_id = cl_hton64(key); + ret = + osmv_rmpp_send_ack((osm_bind_handle_t) p_bo, p_mad, + 0 /* segnum */ , + OSMV_RMPP_RECV_WIN /* NWL */ , + p_mad_addr); + } + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return ret; +} diff --git a/libvendor/osm_vendor_mlx_sim.c b/libvendor/osm_vendor_mlx_sim.c new file mode 100644 index 0000000..d2b2b64 --- /dev/null +++ b/libvendor/osm_vendor_mlx_sim.c @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* AUTHOR Eitan Zahavi + * + * DESCRIPTION + * The lower-level MAD transport interface implementation + * that allows sending a single MAD/receiving a callback + * when a single MAD is received. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* the simulator messages definition */ +#include + +typedef struct _osmv_ibms_transport_mgr { + ibms_conn_handle_t conHdl; /* the connection handle we talk to */ + ibms_bind_msg_t filter; /* the bind message defining the filtering */ + cl_thread_t receiver; /* the thread waiting for incomming messages */ +} osmv_ibms_transport_mgr_t; + +static void +__osmv_ibms_mad_addr_to_osm_addr(IN osm_vendor_t const *p_vend, + IN struct _ibms_mad_addr *p_ibms_addr, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_osm_addr); + +static void +__osmv_ibms_osm_addr_to_mad_addr(IN const osm_mad_addr_t * p_osm_addr, + IN uint8_t is_smi, + OUT struct _ibms_mad_addr *p_ibms_addr); + +/* this is the callback function the "server" will call on incoming + messages */ +void __osmv_ibms_receiver_callback(void *p_ctx, ibms_mad_msg_t * p_mad) +{ + osm_mad_addr_t mad_addr; + osmv_bind_obj_t *const p_bo = (osmv_bind_obj_t *) p_ctx; + ib_api_status_t status = IB_SUCCESS; + + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + { + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + /* some logging */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_ibms_receiver_callback: " + "MAD QPN:%d SLID:0x%04x class:0x%02x " + "method:0x%02x attr:0x%04x status:0x%04x " + "tid:0x%016" PRIx64 "\n", + p_mad->addr.dqpn, + cl_ntoh16(p_mad->addr.slid), + p_mad->header.mgmt_class, + p_mad->header.method, + cl_ntoh16(p_mad->header.attr_id), + cl_ntoh16(p_mad->header.status), + cl_ntoh64(p_mad->header.trans_id)); + + /* first arrange an address */ + __osmv_ibms_mad_addr_to_osm_addr(p_bo->p_vendor, + &p_mad->addr, + (((ib_mad_t *) & p_mad-> + header)->mgmt_class == + IB_MCLASS_SUBN_LID) + || + (((ib_mad_t *) & p_mad-> + header)->mgmt_class == + IB_MCLASS_SUBN_DIR), + &mad_addr); + + /* call the receiver callback */ + + status = + osmv_dispatch_mad((osm_bind_handle_t) p_bo, + (void *)&p_mad->header, &mad_addr); + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + } +} + +ib_api_status_t +osm_vendor_get_guid_by_ca_and_port(IN osm_vendor_t * const p_vend, + IN char *hca_id, + IN uint32_t port_num, + OUT uint64_t * p_port_guid); + +/* + * NAME + * osmv_transport_init + * + * DESCRIPTION + * Setup the MAD transport infrastructure (filters, callbacks etc). + */ + +ib_api_status_t +osmv_transport_init(IN osm_bind_info_t * p_info, + IN char hca_id[VENDOR_HCA_MAXNAMES], + IN uint8_t hca_idx, IN osmv_bind_obj_t * p_bo) +{ + ibms_conn_handle_t conHdl; /* the connection we talk to the simulator through */ + osmv_ibms_transport_mgr_t *p_mgr = + malloc(sizeof(osmv_ibms_transport_mgr_t)); + int qpn; + int ibms_status; + uint64_t port_guid; + + if (!p_mgr) { + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_mgr, 0, sizeof(osmv_ibms_transport_mgr_t)); + + /* create the client socket connected to the simulator */ + /* also perform the "connect" message - such that we + validate the target guid */ + if (osm_vendor_get_guid_by_ca_and_port + (p_bo->p_vendor, hca_id, p_bo->port_num, &port_guid)) { + free(p_mgr); + return IB_INVALID_GUID; + } + + conHdl = + ibms_connect(port_guid, __osmv_ibms_receiver_callback, + (void *)p_bo); + if (!conHdl) { + printf("fail to connect to the server.\n"); + free(p_mgr); + return IB_ERROR; + } + + /* + * Create the MAD filter on this file handle. + */ + + p_mgr->filter.port = p_bo->port_num; + p_mgr->filter.only_input = 1; + p_mgr->filter.mask = + IBMS_BIND_MASK_PORT | + IBMS_BIND_MASK_INPUT | IBMS_BIND_MASK_QP | IBMS_BIND_MASK_CLASS; + + switch (p_info->mad_class) { + case IB_MCLASS_SUBN_LID: + case IB_MCLASS_SUBN_DIR: + qpn = 0; + p_mgr->filter.qpn = qpn; + p_mgr->filter.mgt_class = IB_MCLASS_SUBN_LID; + ibms_status = ibms_bind(conHdl, &p_mgr->filter); + if (ibms_status) { + return IB_ERROR; + } + + p_mgr->filter.mgt_class = IB_MCLASS_SUBN_DIR; + ibms_status = ibms_bind(conHdl, &p_mgr->filter); + if (ibms_status) { + return IB_ERROR; + } + + break; + + case IB_MCLASS_SUBN_ADM: + default: + qpn = 1; + p_mgr->filter.qpn = qpn; + p_mgr->filter.mgt_class = p_info->mad_class; + ibms_status = ibms_bind(conHdl, &p_mgr->filter); + if (ibms_status) { + return IB_ERROR; + } + break; + } + + p_mgr->conHdl = conHdl; + + p_bo->p_transp_mgr = p_mgr; + + /* Initialize the magic_ptr to the pointer of the p_bo info. + This will be used to signal when the object is being destroyed, so no + real action will be done then. */ + p_bo->magic_ptr = p_bo; + + return IB_SUCCESS; +} + +/* + * NAME + * osmv_transport_send_mad + * + * DESCRIPTION + * Send a single MAD (256 byte) + */ + +ib_api_status_t +osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, + IN void *p_mad, IN const osm_mad_addr_t * p_mad_addr) +{ + + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + int ret; + ibms_mad_msg_t mad_msg; + ib_api_status_t status; + + const ib_mad_t *p_mad_hdr = p_mad; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&mad_msg, 0, sizeof(mad_msg)); + + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return IB_INVALID_CALLBACK; + + /* + * Copy the MAD over to the sent mad + */ + memcpy(&mad_msg.header, p_mad_hdr, MAD_BLOCK_SIZE); + + /* + * For all sends other than directed route SM MADs, + * acquire an address vector for the destination. + */ + if (p_mad_hdr->mgmt_class != IB_MCLASS_SUBN_DIR) { + + __osmv_ibms_osm_addr_to_mad_addr(p_mad_addr, + p_mad_hdr->mgmt_class == + IB_MCLASS_SUBN_LID, + &mad_msg.addr); + } else { + /* is a directed route - we need to construct a permissive address */ + /* we do not need port number since it is part of the mad_hndl */ + mad_msg.addr.dlid = IB_LID_PERMISSIVE; + mad_msg.addr.slid = IB_LID_PERMISSIVE; + mad_msg.addr.sqpn = 0; + mad_msg.addr.dqpn = 0; + } + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_mad_send: " + "Sending QPN:%d DLID:0x%04x class:0x%02x " + "method:0x%02x attr:0x%04x status:0x%04x " + "tid:0x%016" PRIx64 "\n", + mad_msg.addr.dqpn, + cl_ntoh16(mad_msg.addr.dlid), + mad_msg.header.mgmt_class, + mad_msg.header.method, + cl_ntoh16(mad_msg.header.attr_id), + cl_ntoh16(mad_msg.header.status), + cl_ntoh64(mad_msg.header.trans_id) + ); + + /* send it */ + ret = + ibms_send(((osmv_ibms_transport_mgr_t *) (p_bo->p_transp_mgr))-> + conHdl, &mad_msg); + if (ret) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osmv_transport_mad_send: ERR 5304: " + "Error sending mad (%d).\n", ret); + status = IB_ERROR; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void osmv_transport_done(IN const osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osmv_ibms_transport_mgr_t *p_tpot_mgr = + (osmv_ibms_transport_mgr_t *) (p_bo->p_transp_mgr); + + CL_ASSERT(p_bo); + + /* First of all - zero out the magic_ptr, so if a callback is called - + it'll know that we are currently closing down, and will not handle the + mad. */ + p_bo->magic_ptr = 0; + /* usleep(3000000); */ + + ibms_disconnect(p_tpot_mgr->conHdl); + + /* seems the only way to abort a blocking read is to make it read something */ + free(p_tpot_mgr); +} + +static void +__osmv_ibms_osm_addr_to_mad_addr(IN const osm_mad_addr_t * p_osm_addr, + IN uint8_t is_smi, + OUT struct _ibms_mad_addr *p_ibms_addr) +{ + + /* For global destination or Multicast address: */ + p_ibms_addr->dlid = cl_ntoh16(p_osm_addr->dest_lid); + p_ibms_addr->sl = p_osm_addr->addr_type.gsi.service_level; + if (is_smi) { + p_ibms_addr->sqpn = 0; + p_ibms_addr->dqpn = 0; + } else { + p_ibms_addr->sqpn = 1; + p_ibms_addr->dqpn = + cl_ntoh32(p_osm_addr->addr_type.gsi.remote_qp); + } + /* + HACK we limit to the first PKey Index assuming it will + always be the default PKey + */ + p_ibms_addr->pkey_index = 0; +} + +static void +__osmv_ibms_mad_addr_to_osm_addr(IN osm_vendor_t const *p_vend, + IN struct _ibms_mad_addr *p_ibms_addr, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_osm_addr) +{ + memset(p_osm_addr, 0, sizeof(osm_mad_addr_t)); + p_osm_addr->dest_lid = cl_hton16(p_ibms_addr->slid); + p_osm_addr->static_rate = 0; + p_osm_addr->path_bits = 0; + if (is_smi) { + /* SMI */ + p_osm_addr->addr_type.smi.source_lid = + cl_hton16(p_ibms_addr->slid); + p_osm_addr->addr_type.smi.port_num = 1; /* TODO add if required p_ibms_addr->port; */ + } else { + /* GSI */ + p_osm_addr->addr_type.gsi.remote_qp = + cl_ntoh32(p_ibms_addr->sqpn); + p_osm_addr->addr_type.gsi.remote_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + p_osm_addr->addr_type.gsi.pkey_ix = p_ibms_addr->pkey_index; + p_osm_addr->addr_type.gsi.service_level = p_ibms_addr->sl; + + p_osm_addr->addr_type.gsi.global_route = FALSE; + /* copy the GRH data if relevant - TopSpin imp doesnt relate to GRH!!! */ + /* + if (p_osm_addr->addr_type.gsi.global_route) + { + p_osm_addr->addr_type.gsi.grh_info.ver_class_flow = + ib_grh_set_ver_class_flow(p_rcv_desc->grh.IP_version, + p_rcv_desc->grh.traffic_class, + p_rcv_desc->grh.flow_label); + p_osm_addr->addr_type.gsi.grh_info.hop_limit = p_rcv_desc->grh.hop_limit; + memcpy(&p_osm_addr->addr_type.gsi.grh_info.src_gid.raw, + &p_rcv_desc->grh.sgid, sizeof(ib_net64_t)); + memcpy(&p_osm_addr->addr_type.gsi.grh_info.dest_gid.raw, + p_rcv_desc->grh.dgid, sizeof(ib_net64_t)); + } + */ + } +} + +/* + * NAME osm_vendor_set_sm + * + * DESCRIPTION Modifies the port info for the bound port to set the "IS_SM" bit + * according to the value given (TRUE or FALSE). + */ + +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + int ret; + ibms_cap_msg_t cap_msg; + + OSM_LOG_ENTER(p_vend->p_log); + + cap_msg.mask = IB_PORT_CAP_IS_SM; + if (is_sm_val) + cap_msg.capabilities = IB_PORT_CAP_IS_SM; + else + cap_msg.capabilities = 0; + + ret = ibms_set_cap(((osmv_ibms_transport_mgr_t *) (p_bo-> + p_transp_mgr))-> + conHdl, &cap_msg); + + if (ret) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_set_sm: ERR 5312: " + "Unable set 'IS_SM' bit to:%u in port attributes.\n", + is_sm_val); + } + OSM_LOG_EXIT(p_vend->p_log); +} diff --git a/libvendor/osm_vendor_mlx_ts.c b/libvendor/osm_vendor_mlx_ts.c new file mode 100644 index 0000000..d7ab3b3 --- /dev/null +++ b/libvendor/osm_vendor_mlx_ts.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* AUTHOR Edward Bortnikov + * + * DESCRIPTION + * The lower-level MAD transport interface implementation + * that allows sending a single MAD/receiving a callback + * when a single MAD is received. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +typedef struct _osmv_TOPSPIN_transport_mgr_ { + int device_fd; + osm_ts_user_mad_filter filter; + cl_thread_t receiver; +} osmv_TOPSPIN_transport_mgr_t; + +static void +__osmv_TOPSPIN_mad_addr_to_osm_addr(IN osm_vendor_t const *p_vend, + IN struct ib_mad *p_mad, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr); + +static void +__osmv_TOPSPIN_osm_addr_to_mad_addr(IN const osm_mad_addr_t * p_mad_addr, + IN uint8_t is_smi, + OUT struct ib_mad *p_mad); + +void __osmv_TOPSPIN_receiver_thr(void *p_ctx) +{ + int ts_ret_code; + struct ib_mad mad; + osm_mad_addr_t mad_addr; + osmv_bind_obj_t *const p_bo = (osmv_bind_obj_t *) p_ctx; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + /* we set the type of cancelation for this thread */ + /* pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); */ + + while (1) { + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + /* we read one mad at a time and pass it to the read callback function */ + ts_ret_code = + read(((osmv_TOPSPIN_transport_mgr_t *) (p_bo-> + p_transp_mgr))-> + device_fd, &mad, sizeof(mad)); + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + if (ts_ret_code != sizeof(mad)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_TOPSPIN_receiver_thr: ERR 6803: " + "error with read, bytes = %d, errno = %d\n", + ts_ret_code, errno); + break; + } else { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_TOPSPIN_receiver_thr: " + "MAD QPN:%d SLID:0x%04x class:0x%02x " + "method:0x%02x attr:0x%04x status:0x%04x " + "tid:0x%016" PRIx64 "\n", + mad.dqpn, + cl_ntoh16(mad.slid), + mad.mgmt_class, + mad.r_method, + cl_ntoh16(mad.attribute_id), + cl_ntoh16(mad.status), + cl_ntoh64(mad.transaction_id)); + + /* first arrange an address */ + __osmv_TOPSPIN_mad_addr_to_osm_addr(p_bo->p_vendor, + &mad, + (((ib_mad_t *) & + mad)-> + mgmt_class == + IB_MCLASS_SUBN_LID) + || + (((ib_mad_t *) & + mad)-> + mgmt_class == + IB_MCLASS_SUBN_DIR), + &mad_addr); + + /* call the receiver callback */ + + status = + osmv_dispatch_mad((osm_bind_handle_t) p_bo, + (void *)&mad, &mad_addr); + + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + if (IB_INTERRUPTED == status) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_TOPSPIN_receiver_thr: " + "The bind handle %p is being closed. " + "Breaking the loop.\n", p_bo); + break; + } + } + } + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +/* + * NAME + * osmv_transport_init + * + * DESCRIPTION + * Setup the MAD transport infrastructure (filters, callbacks etc). + */ + +ib_api_status_t +osmv_transport_init(IN osm_bind_info_t * p_info, + IN char hca_id[VENDOR_HCA_MAXNAMES], + IN uint8_t hca_idx, IN osmv_bind_obj_t * p_bo) +{ + cl_status_t cl_st; + char device_file[16]; + int device_fd; + int ts_ioctl_ret; + osmv_TOPSPIN_transport_mgr_t *p_mgr = + malloc(sizeof(osmv_TOPSPIN_transport_mgr_t)); + int qpn; + + if (!p_mgr) { + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_mgr, 0, sizeof(osmv_TOPSPIN_transport_mgr_t)); + + /* open TopSpin file device */ + /* HACK: assume last char in hostid is the HCA index */ + sprintf(device_file, "/dev/ts_ua%u", hca_idx); + device_fd = open(device_file, O_RDWR); + if (device_fd < 0) { + fprintf(stderr, "Fatal: Fail to open the file:%s err:%d\n", + device_file, errno); + return IB_ERROR; + } + + /* + * Create the MAD filter on this file handle. + */ + + p_mgr->filter.port = p_bo->port_num; + p_mgr->filter.direction = TS_IB_MAD_DIRECTION_IN; + p_mgr->filter.mask = + TS_IB_MAD_FILTER_DIRECTION | + TS_IB_MAD_FILTER_PORT | + TS_IB_MAD_FILTER_QPN | TS_IB_MAD_FILTER_MGMT_CLASS; + + switch (p_info->mad_class) { + case IB_MCLASS_SUBN_LID: + case IB_MCLASS_SUBN_DIR: + qpn = 0; + p_mgr->filter.qpn = qpn; + p_mgr->filter.mgmt_class = IB_MCLASS_SUBN_LID; + ts_ioctl_ret = + ioctl(device_fd, TS_IB_IOCSMADFILTADD, &p_mgr->filter); + if (ts_ioctl_ret < 0) { + return IB_ERROR; + } + + p_mgr->filter.mgmt_class = IB_MCLASS_SUBN_DIR; + ts_ioctl_ret = + ioctl(device_fd, TS_IB_IOCSMADFILTADD, &p_mgr->filter); + if (ts_ioctl_ret < 0) { + return IB_ERROR; + } + + break; + + case IB_MCLASS_SUBN_ADM: + default: + qpn = 1; + p_mgr->filter.qpn = qpn; + p_mgr->filter.mgmt_class = p_info->mad_class; + ts_ioctl_ret = + ioctl(device_fd, TS_IB_IOCSMADFILTADD, &p_mgr->filter); + if (ts_ioctl_ret < 0) { + return IB_ERROR; + } + break; + } + + p_mgr->device_fd = device_fd; + + p_bo->p_transp_mgr = p_mgr; + + /* Initialize the magic_ptr to the pointer of the p_bo info. + This will be used to signal when the object is being destroyed, so no + real action will be done then. */ + p_bo->magic_ptr = p_bo; + + /* init receiver thread */ + cl_st = + cl_thread_init(&p_mgr->receiver, __osmv_TOPSPIN_receiver_thr, + (void *)p_bo, "osmv TOPSPIN rcv thr"); + + return (ib_api_status_t) cl_st; +} + +/* + * NAME + * osmv_transport_send_mad + * + * DESCRIPTION + * Send a single MAD (256 byte) + */ + +ib_api_status_t +osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, + IN void *p_mad, IN const osm_mad_addr_t * p_mad_addr) +{ + + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + struct ib_mad ts_mad; + int ret; + ib_api_status_t status; + + const ib_mad_t *p_mad_hdr = p_mad; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&ts_mad, 0, sizeof(ts_mad)); + + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return IB_INVALID_CALLBACK; + + /* + * Copy the MAD over to the sent mad + */ + memcpy(&ts_mad, p_mad_hdr, MAD_BLOCK_SIZE); + + /* + * For all sends other than directed route SM MADs, + * acquire an address vector for the destination. + */ + if (p_mad_hdr->mgmt_class != IB_MCLASS_SUBN_DIR) { + + __osmv_TOPSPIN_osm_addr_to_mad_addr(p_mad_addr, + p_mad_hdr->mgmt_class == + IB_MCLASS_SUBN_LID, + &ts_mad); + } else { + /* is a directed route - we need to construct a permissive address */ + /* we do not need port number since it is part of the mad_hndl */ + ts_mad.dlid = IB_LID_PERMISSIVE; + ts_mad.slid = IB_LID_PERMISSIVE; + ts_mad.sqpn = 0; + ts_mad.dqpn = 0; + } + + ts_mad.port = p_bo->port_num; + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "osmv_transport_mad_send: " + "Sending QPN:%d DLID:0x%04x class:0x%02x " + "method:0x%02x attr:0x%04x status:0x%04x " + "tid:0x%016" PRIx64 "\n", + ts_mad.dqpn, + cl_ntoh16(ts_mad.dlid), + ts_mad.mgmt_class, + ts_mad.r_method, + cl_ntoh16(ts_mad.attribute_id), + cl_ntoh16(ts_mad.status), cl_ntoh64(ts_mad.transaction_id) + ); + + /* send it */ + ret = + write(((osmv_TOPSPIN_transport_mgr_t *) (p_bo->p_transp_mgr))-> + device_fd, &ts_mad, sizeof(ts_mad)); + + if (ret != sizeof(ts_mad)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osmv_transport_mad_send: ERR 6804: " + "Error sending mad (%d).\n", ret); + status = IB_ERROR; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/* + register a new mad type to the opened device file + and send a mad through - the main idea is to make + the filter catch it such that the read unblocks +*/ +void __osm_transport_gen_dummy_mad(osmv_bind_obj_t * p_bo) +{ + struct ib_mad ts_mad; + osmv_TOPSPIN_transport_mgr_t *p_mgr = + (osmv_TOPSPIN_transport_mgr_t *) (p_bo->p_transp_mgr); + struct ib_get_port_info_ioctl port_data; + int ts_ioctl_ret; + + /* prepare the mad fields following the stored filter on the bind */ + memset(&ts_mad, 0, sizeof(ts_mad)); + ts_mad.format_version = 1; + ts_mad.mgmt_class = p_mgr->filter.mgmt_class; + ts_mad.attribute_id = 0x2; + ts_mad.class_version = 1; + ts_mad.r_method = cl_ntoh16(0x2); + ts_mad.port = p_bo->port_num; + ts_mad.sqpn = p_mgr->filter.qpn; + ts_mad.dqpn = p_mgr->filter.qpn; + ts_mad.slid = 0xffff; + /* we must send to our local lid ... */ + port_data.port = p_bo->port_num; + ts_ioctl_ret = ioctl(p_mgr->device_fd, TS_IB_IOCGPORTINFO, &port_data); + ts_mad.dlid = port_data.port_info.lid; + ts_mad.transaction_id = 0x9999; + write(p_mgr->device_fd, &ts_mad, sizeof(ts_mad)); +} + +void osmv_transport_done(IN const osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osmv_TOPSPIN_transport_mgr_t *p_tpot_mgr = + (osmv_TOPSPIN_transport_mgr_t *) (p_bo->p_transp_mgr); + + CL_ASSERT(p_bo); + + /* First of all - zero out the magic_ptr, so if a callback is called - + it'll know that we are currently closing down, and will not handle the + mad. */ + p_bo->magic_ptr = 0; + /* usleep(3000000); */ + + /* seems the only way to abort a blocking read is to make it read something */ + __osm_transport_gen_dummy_mad(p_bo); + cl_thread_destroy(&(p_tpot_mgr->receiver)); + free(p_tpot_mgr); +} + +static void +__osmv_TOPSPIN_osm_addr_to_mad_addr(IN const osm_mad_addr_t * p_mad_addr, + IN uint8_t is_smi, OUT struct ib_mad *p_mad) +{ + + /* For global destination or Multicast address: */ + p_mad->dlid = cl_ntoh16(p_mad_addr->dest_lid); + p_mad->sl = p_mad_addr->addr_type.gsi.service_level; + if (is_smi) { + p_mad->sqpn = 0; + p_mad->dqpn = 0; + } else { + p_mad->sqpn = 1; + p_mad->dqpn = cl_ntoh32(p_mad_addr->addr_type.gsi.remote_qp); + } + /* + HACK we limit to the first PKey Index assuming it will + always be the default PKey + */ + p_mad->pkey_index = 0; +} + +static void +__osmv_TOPSPIN_mad_addr_to_osm_addr(IN osm_vendor_t const *p_vend, + IN struct ib_mad *p_mad, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr) +{ + p_mad_addr->dest_lid = cl_hton16(p_mad->slid); + p_mad_addr->static_rate = 0; + p_mad_addr->path_bits = 0; + if (is_smi) { + /* SMI */ + p_mad_addr->addr_type.smi.source_lid = cl_hton16(p_mad->slid); + p_mad_addr->addr_type.smi.port_num = p_mad->port; + } else { + /* GSI */ + p_mad_addr->addr_type.gsi.remote_qp = cl_ntoh32(p_mad->sqpn); + p_mad_addr->addr_type.gsi.remote_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + /* There is a TAVOR limitation that only one P_KEY is supported per */ + /* QP - so QP1 must use IB_DEFAULT_PKEY */ + p_mad_addr->addr_type.gsi.pkey_ix = p_mad->pkey_index; + p_mad_addr->addr_type.gsi.service_level = p_mad->sl; + + p_mad_addr->addr_type.gsi.global_route = FALSE; + /* copy the GRH data if relevant - TopSpin imp doesnt relate to GRH!!! */ + /* + if (p_mad_addr->addr_type.gsi.global_route) + { + p_mad_addr->addr_type.gsi.grh_info.ver_class_flow = + ib_grh_set_ver_class_flow(p_rcv_desc->grh.IP_version, + p_rcv_desc->grh.traffic_class, + p_rcv_desc->grh.flow_label); + p_mad_addr->addr_type.gsi.grh_info.hop_limit = p_rcv_desc->grh.hop_limit; + memcpy(&p_mad_addr->addr_type.gsi.grh_info.src_gid.raw, + &p_rcv_desc->grh.sgid, sizeof(ib_net64_t)); + memcpy(&p_mad_addr->addr_type.gsi.grh_info.dest_gid.raw, + p_rcv_desc->grh.dgid, sizeof(ib_net64_t)); + } + */ + } +} + +/* + * NAME osm_vendor_set_sm + * + * DESCRIPTION Modifies the port info for the bound port to set the "IS_SM" bit + * according to the value given (TRUE or FALSE). + */ +#if (defined(OSM_VENDOR_INTF_TS_NO_VAPI) || defined(OSM_VENDOR_INTF_TS)) + +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + int ts_ioctl_ret; + int device_fd = + ((osmv_TOPSPIN_transport_mgr_t *) (p_bo->p_transp_mgr))->device_fd; + struct ib_set_port_info_ioctl set_port_data; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&set_port_data, 0, sizeof(set_port_data)); + + set_port_data.port = p_bo->port_num; + set_port_data.port_info.valid_fields = IB_PORT_IS_SM; + set_port_data.port_info.is_sm = is_sm_val; + ts_ioctl_ret = ioctl(device_fd, TS_IB_IOCSPORTINFO, &set_port_data); + if (ts_ioctl_ret < 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_set_sm: ERR 6805: " + "Unable set 'IS_SM' bit to:%u in port attributes (%d).\n", + is_sm_val, ts_ioctl_ret); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +#endif diff --git a/libvendor/osm_vendor_mlx_ts_anafa.c b/libvendor/osm_vendor_mlx_ts_anafa.c new file mode 100644 index 0000000..b88ac64 --- /dev/null +++ b/libvendor/osm_vendor_mlx_ts_anafa.c @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* AUTHOR Edward Bortnikov + * + * DESCRIPTION + * The lower-level MAD transport interface implementation + * that allows sending a single MAD/receiving a callback + * when a single MAD is received. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static void +__osmv_TOPSPIN_ANAFA_mad_addr_to_osm_addr(IN osm_vendor_t const *p_vend, + IN struct ib_mad *p_mad, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr); + +static void +__osmv_TOPSPIN_ANAFA_osm_addr_to_mad_addr(IN const osm_mad_addr_t * + p_mad_addr, IN uint8_t is_smi, + OUT struct ib_mad *p_mad); + +void __osmv_TOPSPIN_ANAFA_receiver_thr(void *p_ctx) +{ + int ts_ret_code; + struct ib_mad mad; + osm_mad_addr_t mad_addr; + osmv_bind_obj_t *const p_bo = (osmv_bind_obj_t *) p_ctx; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + /* we set the type of cancelation for this thread */ + /* pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); */ + + while (1) { + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + /* we read one mad at a time and pass it to the read callback function */ + ts_ret_code = + read(((osmv_TOPSPIN_ANAFA_transport_mgr_t *) (p_bo-> + p_transp_mgr))-> + device_fd, &mad, sizeof(mad)); + + /* Make sure the p_bo object is still relevant */ + if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) + return; + + if (ts_ret_code != sizeof(mad)) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_TOPSPIN_ANAFA_receiver_thr: ERR 6903: " + "error with read, bytes = %d\n", ts_ret_code); + break; + } else { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_TOPSPIN_ANAFA_receiver_thr: " + "MAD QPN:%d SLID:0x%04x class:0x%02x " + "method:0x%02x attr:0x%04x status:0x%04x " + "tid:0x%016" PRIx64 "\n", + mad.dqpn, + cl_ntoh16(mad.slid), + mad.mgmt_class, + mad.r_method, + cl_ntoh16(mad.attribute_id), + cl_ntoh16(mad.status), + cl_ntoh64(mad.transaction_id)); + + /* first arrange an address */ + __osmv_TOPSPIN_ANAFA_mad_addr_to_osm_addr + (p_bo->p_vendor, &mad, + (((ib_mad_t *) & mad)->mgmt_class == + IB_MCLASS_SUBN_LID) + || (((ib_mad_t *) & mad)->mgmt_class == + IB_MCLASS_SUBN_DIR), &mad_addr); + + /* call the receiver callback */ + + status = + osmv_dispatch_mad((osm_bind_handle_t) p_bo, + (void *)&mad, &mad_addr); + + /* Make sure the p_bo object is still relevant */ + if (p_bo->magic_ptr != p_bo) + return; + + if (IB_INTERRUPTED == status) { + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_TOPSPIN_ANAFA_receiver_thr: " + "The bind handle %p is being closed. " + "Breaking the loop.\n", p_bo); + break; + } + } + } + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +/* + * NAME + * osmv_transport_init + * + * DESCRIPTION + * Setup the MAD transport infrastructure (filters, callbacks etc). + */ + +ib_api_status_t +osmv_transport_init(IN osm_bind_info_t * p_info, + IN char hca_id[VENDOR_HCA_MAXNAMES], + IN uint8_t hca_idx, IN osmv_bind_obj_t * p_bo) +{ + cl_status_t cl_st; + + int ts_ioctl_ret; + int device_fd; + char *device_file = "/dev/ts_ua0"; + osm_ts_user_mad_filter filter; + osmv_TOPSPIN_ANAFA_transport_mgr_t *p_mgr; + osmv_TOPSPIN_ANAFA_transport_info_t *p_tpot_info; + p_tpot_info = + (osmv_TOPSPIN_ANAFA_transport_info_t *) p_bo->p_vendor-> + p_transport_info; + + p_mgr = malloc(sizeof(osmv_TOPSPIN_ANAFA_transport_mgr_t)); + if (!p_mgr) { + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_mgr, 0, sizeof(osmv_TOPSPIN_ANAFA_transport_mgr_t)); + + /* open TopSpin file device */ + device_fd = open(device_file, O_RDWR); + if (device_fd < 0) { + fprintf(stderr, "Fatal: Fail to open the file:%s err:%d\n", + device_file, errno); + return IB_ERROR; + } + p_mgr->device_fd = device_fd; + + /* + * Create the MAD filter on this file handle. + */ + + filter.port = 0; /* Victor */ + filter.direction = TS_IB_MAD_DIRECTION_IN; + filter.mask = + TS_IB_MAD_FILTER_DIRECTION | + TS_IB_MAD_FILTER_PORT | + TS_IB_MAD_FILTER_QPN | TS_IB_MAD_FILTER_MGMT_CLASS; + + switch (p_info->mad_class) { + case IB_MCLASS_SUBN_LID: + case IB_MCLASS_SUBN_DIR: + filter.qpn = 0; + filter.mgmt_class = IB_MCLASS_SUBN_LID; + ts_ioctl_ret = ioctl(device_fd, TS_IB_IOCSMADFILTADD, &filter); + if (ts_ioctl_ret < 0) { + return IB_ERROR; + } + + filter.mgmt_class = IB_MCLASS_SUBN_DIR; + ts_ioctl_ret = ioctl(device_fd, TS_IB_IOCSMADFILTADD, &filter); + if (ts_ioctl_ret < 0) { + return IB_ERROR; + } + + break; + + case IB_MCLASS_SUBN_ADM: + default: + filter.qpn = 1; + filter.mgmt_class = p_info->mad_class; + ts_ioctl_ret = ioctl(device_fd, TS_IB_IOCSMADFILTADD, &filter); + if (ts_ioctl_ret < 0) { + return IB_ERROR; + } + break; + } + + p_bo->p_transp_mgr = p_mgr; + + /* Initialize the magic_ptr to the pointer of the p_bo info. + This will be used to signal when the object is being destroyed, so no + real action will be done then. */ + p_bo->magic_ptr = p_bo; + + /* init receiver thread */ + cl_st = + cl_thread_init(&p_mgr->receiver, __osmv_TOPSPIN_ANAFA_receiver_thr, + (void *)p_bo, "osmv TOPSPIN_ANAFA rcv thr"); + + return (ib_api_status_t) cl_st; +} + +/* + * NAME + * osmv_transport_send_mad + * + * DESCRIPTION + * Send a single MAD (256 byte) + */ + +ib_api_status_t +osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, + IN void *p_mad, IN const osm_mad_addr_t * p_mad_addr) +{ + + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osm_vendor_t const *p_vend = p_bo->p_vendor; + struct ib_mad ts_mad = { 0 }; + int ret; + ib_api_status_t status; + + const ib_mad_t *p_mad_hdr = p_mad; + + OSM_LOG_ENTER(p_vend->p_log); + + /* Make sure the p_bo object is still relevant */ + if (p_bo->magic_ptr != p_bo) + return IB_INVALID_CALLBACK; + + /* + * Copy the MAD over to the sent mad + */ + memcpy(&ts_mad, p_mad_hdr, MAD_BLOCK_SIZE); + + /* + * For all sends other than directed route SM MADs, + * acquire an address vector for the destination. + */ + if (p_mad_hdr->mgmt_class != IB_MCLASS_SUBN_DIR) { + + __osmv_TOPSPIN_ANAFA_osm_addr_to_mad_addr(p_mad_addr, + p_mad_hdr-> + mgmt_class == + IB_MCLASS_SUBN_LID, + &ts_mad); + } else { + /* is a directed route - we need to construct a permissive address */ + /* we do not need port number since it is part of the mad_hndl */ + ts_mad.dlid = IB_LID_PERMISSIVE; + ts_mad.slid = IB_LID_PERMISSIVE; + } + if ((p_mad_hdr->mgmt_class == IB_MCLASS_SUBN_DIR) || + (p_mad_hdr->mgmt_class == IB_MCLASS_SUBN_LID)) { + ts_mad.sqpn = 0; + ts_mad.dqpn = 0; + } else { + ts_mad.sqpn = 1; + ts_mad.dqpn = 1; + } + + /* ts_mad.port = p_bo->port_num; */ + ts_mad.port = 0; /* Victor */ + + /* send it */ + ret = + write(((osmv_TOPSPIN_ANAFA_transport_mgr_t *) (p_bo-> + p_transp_mgr))-> + device_fd, &ts_mad, sizeof(ts_mad)); + + if (ret != sizeof(ts_mad)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osmv_transport_mad_send: ERR 6904: " + "Error sending mad (%d).\n", ret); + status = IB_ERROR; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void osmv_transport_done(IN const osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + osmv_TOPSPIN_ANAFA_transport_mgr_t *p_tpot_mgr = + (osmv_TOPSPIN_ANAFA_transport_mgr_t *) (p_bo->p_transp_mgr); + + CL_ASSERT(p_bo); + + /* First of all - zero out the magic_ptr, so if a callback is called - + it'll know that we are currently closing down, and will not handle the + mad. */ + p_bo->magic_ptr = 0; + + /* usleep(3000000); */ + + /* pthread_cancel (p_tpot_mgr->receiver.osd.id); */ + cl_thread_destroy(&(p_tpot_mgr->receiver)); + free(p_tpot_mgr); +} + +static void +__osmv_TOPSPIN_ANAFA_osm_addr_to_mad_addr(IN const osm_mad_addr_t * p_mad_addr, + IN uint8_t is_smi, + OUT struct ib_mad *p_mad) +{ + + /* For global destination or Multicast address: */ + p_mad->dlid = cl_ntoh16(p_mad_addr->dest_lid); + p_mad->sl = p_mad_addr->addr_type.gsi.service_level; + if (is_smi) { + p_mad->sqpn = 0; + p_mad->dqpn = 0; + } else { + p_mad->sqpn = 1; + p_mad->dqpn = p_mad_addr->addr_type.gsi.remote_qp; + } + /* + HACK we limit to the first PKey Index assuming it will + always be the default PKey + */ + p_mad->pkey_index = 0; +} + +static void +__osmv_TOPSPIN_ANAFA_mad_addr_to_osm_addr(IN osm_vendor_t const *p_vend, + IN struct ib_mad *p_mad, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr) +{ + p_mad_addr->dest_lid = cl_hton16(p_mad->slid); + p_mad_addr->static_rate = 0; + p_mad_addr->path_bits = 0; + if (is_smi) { + /* SMI */ + p_mad_addr->addr_type.smi.source_lid = cl_hton16(p_mad->slid); + p_mad_addr->addr_type.smi.port_num = p_mad->port; + } else { + /* GSI */ + p_mad_addr->addr_type.gsi.remote_qp = p_mad->sqpn; + p_mad_addr->addr_type.gsi.remote_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + p_mad_addr->addr_type.gsi.pkey_ix = p_mad->pkey_index; + p_mad_addr->addr_type.gsi.service_level = p_mad->sl; + + p_mad_addr->addr_type.gsi.global_route = FALSE; + /* copy the GRH data if relevant - TopSpin imp doesnt relate to GRH!!! */ + /* + if (p_mad_addr->addr_type.gsi.global_route) + { + p_mad_addr->addr_type.gsi.grh_info.ver_class_flow = + ib_grh_set_ver_class_flow(p_rcv_desc->grh.IP_version, + p_rcv_desc->grh.traffic_class, + p_rcv_desc->grh.flow_label); + p_mad_addr->addr_type.gsi.grh_info.hop_limit = p_rcv_desc->grh.hop_limit; + memcpy(&p_mad_addr->addr_type.gsi.grh_info.src_gid.raw, + &p_rcv_desc->grh.sgid, sizeof(ib_net64_t)); + memcpy(&p_mad_addr->addr_type.gsi.grh_info.dest_gid.raw, + p_rcv_desc->grh.dgid, sizeof(ib_net64_t)); + } + */ + } +} diff --git a/libvendor/osm_vendor_mlx_txn.c b/libvendor/osm_vendor_mlx_txn.c new file mode 100644 index 0000000..72b6435 --- /dev/null +++ b/libvendor/osm_vendor_mlx_txn.c @@ -0,0 +1,680 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include + +#include +#include +#include +#include +#include + +static ib_api_status_t +__osmv_txnmgr_lookup(IN osmv_txn_mgr_t * p_tx_mgr, + IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn); + +static ib_api_status_t +__osmv_txnmgr_insert_txn(IN osmv_txn_mgr_t * p_tx_mgr, + IN osmv_txn_ctx_t * p_txn, IN uint64_t key); + +static ib_api_status_t +__osmv_txnmgr_remove_txn(IN osmv_txn_mgr_t * p_tx_mgr, + IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn); + +static void __osmv_txn_all_done(osm_bind_handle_t h_bind); + +static uint64_t +__osmv_txn_timeout_cb(IN uint64_t key, + IN uint32_t num_regs, IN void *cb_context); + +ib_api_status_t +osmv_txn_init(IN osm_bind_handle_t h_bind, + IN uint64_t tid, IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn) +{ + ib_api_status_t st; + osmv_txn_ctx_t *p_txn; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + CL_ASSERT(NULL != h_bind && NULL != pp_txn); + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Starting transaction 0x%016" PRIx64 + " (key=0x%016" PRIx64 ")\n", tid, key); + + p_txn = malloc(sizeof(osmv_txn_ctx_t)); + if (!p_txn) { + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_txn, 0, sizeof(osmv_txn_ctx_t)); + p_txn->p_log = p_bo->txn_mgr.p_log; + p_txn->tid = tid; + p_txn->key = key; + p_txn->p_madw = NULL; + p_txn->rmpp_txfr.rmpp_state = OSMV_TXN_RMPP_NONE; + + /* insert into transaction manager DB */ + st = __osmv_txnmgr_insert_txn(&p_bo->txn_mgr, p_txn, key); + if (IB_SUCCESS != st) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "osmv_txn_init: ERR 6703: " + "Failed to insert to transaction 0x%016" PRIx64 + " (key=0x%016" PRIx64 ") to manager DB\n", + tid, key); + goto insert_txn_failed; + } + + *pp_txn = p_txn; + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return IB_SUCCESS; + +insert_txn_failed: + free(p_txn); + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return st; +} + +ib_api_status_t +osmv_txn_init_rmpp_sender(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, IN osm_madw_t * p_madw) +{ + ib_api_status_t st; + + CL_ASSERT(p_txn); + + /* Double-Sided RMPP Direction Switch */ + osmv_txn_remove_timeout_ev(h_bind, osmv_txn_get_key(p_txn)); + + p_txn->rmpp_txfr.rmpp_state = OSMV_TXN_RMPP_SENDER; + p_txn->rmpp_txfr.p_rmpp_send_ctx = malloc(sizeof(osmv_rmpp_send_ctx_t)); + + if (!p_txn->rmpp_txfr.p_rmpp_send_ctx) { + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_txn->rmpp_txfr.p_rmpp_send_ctx, 0, + sizeof(osmv_rmpp_send_ctx_t)); + + st = osmv_rmpp_send_ctx_init(p_txn->rmpp_txfr.p_rmpp_send_ctx, + (void *)p_madw->p_mad, + p_madw->mad_size, p_txn->p_log); + return st; +} + +ib_api_status_t +osmv_txn_init_rmpp_receiver(IN osm_bind_handle_t h_bind, + IN osmv_txn_ctx_t * p_txn, + IN boolean_t is_init_by_peer) +{ + ib_api_status_t st; + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + uint64_t key = osmv_txn_get_key(p_txn); + + CL_ASSERT(p_txn); + + /* Double-Sided RMPP Direction Switch */ + osmv_txn_remove_timeout_ev(h_bind, key); + + /* Set the Transaction Timeout value */ + st = osmv_txn_set_timeout_ev(h_bind, key, + p_bo->p_vendor->ttime_timeout); + if (IB_SUCCESS != st) { + + return st; + } + + p_txn->rmpp_txfr.rmpp_state = OSMV_TXN_RMPP_RECEIVER; + p_txn->rmpp_txfr.is_rmpp_init_by_peer = is_init_by_peer; + + p_txn->rmpp_txfr.p_rmpp_recv_ctx = malloc(sizeof(osmv_rmpp_recv_ctx_t)); + + if (!p_txn->rmpp_txfr.p_rmpp_recv_ctx) { + + osmv_txn_remove_timeout_ev(h_bind, key); + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_txn->rmpp_txfr.p_rmpp_recv_ctx, 0, + sizeof(osmv_rmpp_recv_ctx_t)); + + st = osmv_rmpp_recv_ctx_init(p_txn->rmpp_txfr.p_rmpp_recv_ctx, + p_txn->p_log); + + return st; +} + +/* + * NAME + * osmv_txn_set_timeout_ev + * + * DESCRIPTION + * + * SEE ALSO + * + */ +ib_api_status_t +osmv_txn_set_timeout_ev(IN osm_bind_handle_t h_bind, + IN uint64_t key, IN uint64_t msec) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + cl_event_wheel_t *p_event_wheel = p_bo->txn_mgr.p_event_wheel; + cl_status_t status; + + status = cl_event_wheel_reg(p_event_wheel, key, cl_get_time_stamp() + 1000 * msec, /* TTL */ + __osmv_txn_timeout_cb, + p_bo /* The context */ ); + + return (ib_api_status_t) status; +} + +/* + * NAME + * osmv_txn_remove_timeout_ev + * + * DESCRIPTION + + * SEE ALSO + * + */ +void osmv_txn_remove_timeout_ev(IN osm_bind_handle_t h_bind, IN uint64_t key) +{ + cl_event_wheel_t *p_event_wheel = + ((osmv_bind_obj_t *) h_bind)->txn_mgr.p_event_wheel; + cl_event_wheel_unreg(p_event_wheel, key); +} + +void +osmv_txn_done(IN osm_bind_handle_t h_bind, + IN uint64_t key, IN boolean_t is_in_cb) +{ + osmv_txn_ctx_t *p_ctx; + osmv_bind_obj_t *const p_bo = (osmv_bind_obj_t *) h_bind; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + CL_ASSERT(h_bind); + + /* Cancel the (single) timeout possibly outstanding for this txn + * Don't do this if you are in the callback context, for 2 reasons: + * (1) The event wheel will remove the context itself. + * (2) If we try to, there is a deadlock in the event wheel + */ + if (FALSE == is_in_cb) { + osmv_txn_remove_timeout_ev(h_bind, key); + } + + /* Remove from DB */ + if (IB_NOT_FOUND == + __osmv_txnmgr_remove_txn(&p_bo->txn_mgr, key, &p_ctx)) { + return; + } + + /* Destroy the transaction's RMPP contexts + * (can be more than one in the case of double sided transfer) + */ + + if (p_ctx->rmpp_txfr.p_rmpp_send_ctx) { + osmv_rmpp_send_ctx_done(p_ctx->rmpp_txfr.p_rmpp_send_ctx); + } + + if (p_ctx->rmpp_txfr.p_rmpp_recv_ctx) { + osmv_rmpp_recv_ctx_done(p_ctx->rmpp_txfr.p_rmpp_recv_ctx); + } + + free(p_ctx); + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +ib_api_status_t +osmv_txn_lookup(IN osm_bind_handle_t h_bind, + IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn) +{ + return __osmv_txnmgr_lookup(&(((osmv_bind_obj_t *) h_bind)->txn_mgr), + key, pp_txn); +} + +void osmv_txn_abort_rmpp_txns(osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + cl_map_item_t *p_item; + cl_map_obj_t *p_obj; + osmv_txn_ctx_t *p_txn; + osmv_rmpp_send_ctx_t *p_send_ctx; + cl_qmap_t *p_map = p_bo->txn_mgr.p_txn_map; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + while (FALSE == cl_is_qmap_empty(p_map)) { + + p_item = cl_qmap_head(p_map); + p_obj = PARENT_STRUCT(p_item, cl_map_obj_t, item); + p_txn = (osmv_txn_ctx_t *) cl_qmap_obj(p_obj); + p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn); + + if (NULL != p_send_ctx) { + + p_send_ctx->status = IB_INTERRUPTED; + + /* Wake up the sender thread to let it break out */ + cl_event_signal(&p_send_ctx->event); + } + + cl_qmap_remove_item(p_map, p_item); + } + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +ib_api_status_t +osmv_txnmgr_init(IN osmv_txn_mgr_t * p_tx_mgr, + IN osm_log_t * p_log, IN cl_spinlock_t * p_lock) +{ + cl_status_t cl_st = CL_SUCCESS; + + p_tx_mgr->p_event_wheel = malloc(sizeof(cl_event_wheel_t)); + if (!p_tx_mgr->p_event_wheel) { + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_tx_mgr->p_event_wheel, 0, sizeof(cl_event_wheel_t)); + + cl_event_wheel_construct(p_tx_mgr->p_event_wheel); + + /* NOTE! We are using an extended constructor. + * We tell the Event Wheel run in a non-protected manner in the reg/unreg calls, + * and acquire an external lock in the asynchronous callback. + */ + cl_st = cl_event_wheel_init_ex(p_tx_mgr->p_event_wheel, p_lock); + if (cl_st != CL_SUCCESS) { + free(p_tx_mgr->p_event_wheel); + return (ib_api_status_t) cl_st; + } + + p_tx_mgr->p_txn_map = malloc(sizeof(cl_qmap_t)); + if (!p_tx_mgr->p_txn_map) { + cl_event_wheel_destroy(p_tx_mgr->p_event_wheel); + free(p_tx_mgr->p_event_wheel); + return IB_INSUFFICIENT_MEMORY; + } + + memset(p_tx_mgr->p_txn_map, 0, sizeof(cl_qmap_t)); + + cl_qmap_init(p_tx_mgr->p_txn_map); + p_tx_mgr->p_log = p_log; + + return cl_st; +} + +void osmv_txnmgr_done(IN osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + + __osmv_txn_all_done(h_bind); + free(p_bo->txn_mgr.p_txn_map); + + cl_event_wheel_destroy(p_bo->txn_mgr.p_event_wheel); + free(p_bo->txn_mgr.p_event_wheel); +} + +ib_api_status_t +__osmv_txnmgr_lookup(IN osmv_txn_mgr_t * p_tx_mgr, + IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn) +{ + ib_api_status_t status = IB_SUCCESS; + cl_map_item_t *p_item; + cl_map_obj_t *p_obj; + + uint64_t tmp_key; + + OSM_LOG_ENTER(p_tx_mgr->p_log); + + CL_ASSERT(p_tx_mgr); + CL_ASSERT(pp_txn); + + osm_log(p_tx_mgr->p_log, OSM_LOG_DEBUG, + "__osmv_txnmgr_lookup: " + "Looking for key: 0x%016" PRIx64 " in map ptr:%p\n", key, + p_tx_mgr->p_txn_map); + + p_item = cl_qmap_head(p_tx_mgr->p_txn_map); + while (p_item != cl_qmap_end(p_tx_mgr->p_txn_map)) { + tmp_key = cl_qmap_key(p_item); + osm_log(p_tx_mgr->p_log, OSM_LOG_DEBUG, + "__osmv_txnmgr_lookup: " + "Found key 0x%016" PRIx64 "\n", tmp_key); + p_item = cl_qmap_next(p_item); + } + + p_item = cl_qmap_get(p_tx_mgr->p_txn_map, key); + if (cl_qmap_end(p_tx_mgr->p_txn_map) == p_item) { + status = IB_NOT_FOUND; + } else { + p_obj = PARENT_STRUCT(p_item, cl_map_obj_t, item); + *pp_txn = cl_qmap_obj(p_obj); + } + + OSM_LOG_EXIT(p_tx_mgr->p_log); + return status; +} + +ib_api_status_t +__osmv_txnmgr_insert_txn(IN osmv_txn_mgr_t * p_tx_mgr, + IN osmv_txn_ctx_t * p_txn, IN uint64_t key) +{ + cl_map_obj_t *p_obj = NULL; + cl_map_item_t *p_item; + uint64_t tmp_key; + + CL_ASSERT(p_tx_mgr); + CL_ASSERT(p_txn); + + key = osmv_txn_get_key(p_txn); + p_obj = malloc(sizeof(cl_map_obj_t)); + if (NULL == p_obj) + return IB_INSUFFICIENT_MEMORY; + + osm_log(p_tx_mgr->p_log, OSM_LOG_DEBUG, + "__osmv_txnmgr_insert_txn: " + "Inserting key: 0x%016" PRIx64 " to map ptr:%p\n", key, + p_tx_mgr->p_txn_map); + + memset(p_obj, 0, sizeof(cl_map_obj_t)); + + cl_qmap_set_obj(p_obj, p_txn); + /* assuming lookup with this key was made and the result was IB_NOT_FOUND */ + cl_qmap_insert(p_tx_mgr->p_txn_map, key, &p_obj->item); + + p_item = cl_qmap_head(p_tx_mgr->p_txn_map); + while (p_item != cl_qmap_end(p_tx_mgr->p_txn_map)) { + tmp_key = cl_qmap_key(p_item); + osm_log(p_tx_mgr->p_log, OSM_LOG_DEBUG, + "__osmv_txnmgr_insert_txn: " + "Found key 0x%016" PRIx64 "\n", tmp_key); + p_item = cl_qmap_next(p_item); + } + + return IB_SUCCESS; +} + +ib_api_status_t +__osmv_txnmgr_remove_txn(IN osmv_txn_mgr_t * p_tx_mgr, + IN uint64_t key, OUT osmv_txn_ctx_t ** pp_txn) +{ + cl_map_obj_t *p_obj; + cl_map_item_t *p_item; + + OSM_LOG_ENTER(p_tx_mgr->p_log); + + CL_ASSERT(p_tx_mgr); + CL_ASSERT(pp_txn); + + p_item = cl_qmap_remove(p_tx_mgr->p_txn_map, key); + + if (p_item == cl_qmap_end(p_tx_mgr->p_txn_map)) { + + osm_log(p_tx_mgr->p_log, OSM_LOG_ERROR, + "__osmv_txnmgr_remove_txn: ERR 6701: " + "Could not remove the transaction 0x%016" PRIx64 " - " + "something is really wrong!\n", key); + OSM_LOG_EXIT(p_tx_mgr->p_log); + return IB_NOT_FOUND; + } + + p_obj = PARENT_STRUCT(p_item, cl_map_obj_t, item); + *pp_txn = cl_qmap_obj(p_obj); + + free(p_obj); + + OSM_LOG_EXIT(p_tx_mgr->p_log); + return IB_SUCCESS; +} + +void __osmv_txn_all_done(osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + cl_map_item_t *p_item; + cl_map_obj_t *p_obj; + osmv_txn_ctx_t *p_txn; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + p_item = cl_qmap_head(p_bo->txn_mgr.p_txn_map); + while (p_item != cl_qmap_end(p_bo->txn_mgr.p_txn_map)) { + + p_obj = PARENT_STRUCT(p_item, cl_map_obj_t, item); + p_txn = (osmv_txn_ctx_t *) cl_qmap_obj(p_obj); + osmv_txn_done(h_bind, osmv_txn_get_key(p_txn), FALSE); + free(p_obj); + /* assuming osmv_txn_done has removed the txn from the map */ + p_item = cl_qmap_head(p_bo->txn_mgr.p_txn_map); + } + + OSM_LOG_EXIT(p_bo->p_vendor->p_log); +} + +/******************************************************************************/ + +void osmv_txn_lock(IN osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "--> Acquiring lock %p on bind handle %p\n", &p_bo->lock, p_bo); + + cl_spinlock_acquire(&p_bo->lock); + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "--> Acquired lock %p on bind handle %p\n", &p_bo->lock, p_bo); +} + +void osmv_txn_unlock(IN osm_bind_handle_t h_bind) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; + cl_spinlock_t *p_lock = &p_bo->lock; + osm_log_t *p_log = p_bo->p_vendor->p_log; + + osm_log(p_log, OSM_LOG_DEBUG, + "<-- Releasing lock %p on bind handle %p\n", p_lock, p_bo); + + cl_spinlock_release(&p_bo->lock); + + /* We'll use the saved ptrs, since now the p_bo can be destroyed already */ + osm_log(p_log, OSM_LOG_DEBUG, + "<-- Released lock %p on bind handle %p\n", p_lock, p_bo); + +} + +static uint64_t +__osmv_txn_timeout_cb(IN uint64_t key, + IN uint32_t num_regs, IN void *cb_context) +{ + osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) cb_context; + uint64_t ret = 0; + osmv_txn_ctx_t *p_txn; + osmv_rmpp_send_ctx_t *p_send_ctx; + osm_madw_t *p_madw = NULL; + ib_mad_t *p_mad; + osm_mad_addr_t *p_mad_addr; + boolean_t invoke_err_cb = FALSE; + + OSM_LOG_ENTER(p_bo->p_vendor->p_log); + + /* Don't try to acquire a lock on the Bind Object - + * it's taken by the mechanism that drives the timeout based events! + * (Recall the special constructor that the Event Wheel is applied with) + */ + if (p_bo->is_closing) { + goto txn_done; + } + + ret = osmv_txn_lookup(p_bo, key, &p_txn); + if (IB_NOT_FOUND == ret) { + /* Prevent a race - the transaction is already destroyed */ + goto txn_done; + } + + p_madw = p_txn->p_madw; + + switch (osmv_txn_get_rmpp_state(p_txn)) { + + case OSMV_TXN_RMPP_NONE: + if (num_regs <= OSM_DEFAULT_RETRY_COUNT) { + /* We still did not exceed the limit of retransmissions. + * Set the next timeout's value. + */ + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_txn_timeout_cb: " + "The transaction request (tid=0x%016" PRIx64 ")" + " timed out %d times. Retrying the send.\n", + osmv_txn_get_tid(p_txn), num_regs); + + /* resend this mad */ + ret = osmv_simple_send_madw((osm_bind_handle_t *) p_bo, + p_madw, p_txn, TRUE); + if (ret != IB_SUCCESS) { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_txn_timeout_cb: " + "Fail to send retry for transaction" + "request (tid=0x%016" PRIx64 ").\n", + osmv_txn_get_tid(p_txn)); + + osmv_txn_done((osm_bind_handle_t) p_bo, key, + TRUE /*in timeout callback */ ); + + /* This is a requester. Always apply the callback */ + invoke_err_cb = TRUE; + } else { + uint64_t next_timeout_ms; + next_timeout_ms = + p_bo->p_vendor->resp_timeout * (num_regs + + 1) * + (num_regs + 1); + /* when do we need to timeout again */ + ret = + cl_get_time_stamp() + + (uint64_t) (1000 * next_timeout_ms); + + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "__osmv_txn_timeout_cb: " + "Retry request timout in : %lu [msec].\n", + next_timeout_ms); + } + } else { + osm_log(p_bo->p_vendor->p_log, OSM_LOG_ERROR, + "__osmv_txn_timeout_cb: ERR 6702: " + "The transaction request (0x%016" PRIx64 ") " + "timed out (after %d retries). " + "Invoking the error callback.\n", + osmv_txn_get_tid(p_txn), num_regs); + + osmv_txn_done((osm_bind_handle_t) p_bo, key, + TRUE /*in timeout callback */ ); + + /* This is a requester. Always apply the callback */ + invoke_err_cb = TRUE; + } + break; + + case OSMV_TXN_RMPP_SENDER: + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "RMPP sender (tid=0x%016" PRIx64 ") did not receive ACK " + "on every segment in the current send window.\n", + osmv_txn_get_tid(p_txn)); + + p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn); + if (num_regs <= OSM_DEFAULT_RETRY_COUNT) { + /* We still did not exceed the limit of retransmissions. + * Set the next timeout's value. + */ + ret = + cl_get_time_stamp() + + 1000 * p_bo->p_vendor->resp_timeout; + } else { + p_send_ctx->status = IB_TIMEOUT; + + p_mad = osm_madw_get_mad_ptr(p_madw); + p_mad_addr = osm_madw_get_mad_addr_ptr(p_madw); + + /* Send an ABORT to the other side */ + osmv_rmpp_send_nak((osm_bind_handle_t) p_bo, p_mad, + p_mad_addr, IB_RMPP_TYPE_ABORT, + IB_RMPP_STATUS_T2L); + } + + /* Wake the RMPP sender thread up */ + cl_event_signal(&p_send_ctx->event); + break; + + case OSMV_TXN_RMPP_RECEIVER: + osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, + "Transaction timeout on an RMPP receiver " + "(tid=0x%016" PRIx64 "). Dropping the transaction.\n", + osmv_txn_get_tid(p_txn)); + + osmv_txn_done((osm_bind_handle_t) p_bo, key, + TRUE /*in timeout callback */ ); + + if (FALSE == osmv_txn_is_rmpp_init_by_peer(p_txn)) { + /* This is a requester, still waiting for the reply. Apply the callback */ + invoke_err_cb = TRUE; + } + + break; + + default: + CL_ASSERT(FALSE); + } + + if (TRUE == invoke_err_cb) { + CL_ASSERT(NULL != p_madw); + /* update the status in the p_madw */ + p_madw->status = IB_TIMEOUT; + p_bo->send_err_cb(p_bo->cb_context, p_madw); + /* no re-registration */ + ret = 0; + } + +txn_done: + OSM_LOG_EXIT(p_bo->p_vendor->p_log); + return ret; +} diff --git a/libvendor/osm_vendor_mtl.c b/libvendor/osm_vendor_mtl.c new file mode 100644 index 0000000..ede3c71 --- /dev/null +++ b/libvendor/osm_vendor_mtl.c @@ -0,0 +1,1098 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef OSM_VENDOR_INTF_MTL + +#include +#include +#include +#include +/* HACK - I do not know how to prevent complib from loading kernel H files */ +#undef __init +#include +#include +#include +#include +#include +#include + +/* + Since a race can accure on requests. Meaning - a response is received before + the send_callback is called - we will save both the madw_p and the fact + whether or not it is a response. A race can occure only on requests that did + not fail, and then the madw_p will be put back in the pool before the callback. +*/ +uint64_t __osm_set_wrid_by_p_madw(IN osm_madw_t * p_madw) +{ + uint64_t wrid = 0; + + CL_ASSERT(p_madw->p_mad); + + memcpy(&wrid, &p_madw, sizeof(osm_madw_t *)); + wrid = (wrid << 1) | + ib_mad_is_response(p_madw->p_mad); + return wrid; +} + +void +__osm_set_p_madw_and_resp_by_wrid(IN uint64_t wrid, + OUT uint8_t * is_resp, + OUT osm_madw_t ** pp_madw) +{ + *is_resp = wrid & 0x0000000000000001; + wrid = wrid >> 1; + memcpy(pp_madw, &wrid, sizeof(osm_madw_t *)); +} + +/********************************************************************** + * IB_MGT to OSM ADDRESS VECTOR + **********************************************************************/ +void +__osm_mtl_conv_ibmgt_rcv_desc_to_osm_addr(IN osm_vendor_t * const p_vend, + IN IB_MGT_mad_rcv_desc_t * p_rcv_desc, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr) +{ + /* p_mad_addr->dest_lid = p_osm->subn.sm_base_lid; - for resp we use the dest lid ... */ + p_mad_addr->dest_lid = cl_hton16(p_rcv_desc->remote_lid); + p_mad_addr->static_rate = 0; /* HACK - we do not know the rate ! */ + p_mad_addr->path_bits = p_rcv_desc->local_path_bits; + if (is_smi) { + /* SMI */ + p_mad_addr->addr_type.smi.source_lid = + cl_hton16(p_rcv_desc->remote_lid); + p_mad_addr->addr_type.smi.port_num = 99; /* HACK - if used - should fail */ + } else { + /* GSI */ + /* seems to me there is a IBMGT bug reversing the QPN ... */ + /* Does IBMGT supposed to provide the QPN is network or HOST ? */ + p_mad_addr->addr_type.gsi.remote_qp = cl_hton32(p_rcv_desc->qp); + + p_mad_addr->addr_type.gsi.remote_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + /* we do have the p_mad_addr->pkey_ix but how to get the PKey by index ? */ + /* the only way seems to be to use VAPI_query_hca_pkey_tbl and obtain */ + /* the full PKey table - than go by the index. */ + /* since this does not seem reasonable to me I simply use the default */ + /* There is a TAVOR limitation that only one P_KEY is supported per */ + /* QP - so QP1 must use IB_DEFAULT_PKEY */ + p_mad_addr->addr_type.gsi.pkey_ix = 0; + p_mad_addr->addr_type.gsi.service_level = p_rcv_desc->sl; + + p_mad_addr->addr_type.gsi.global_route = p_rcv_desc->grh_flag; + /* copy the GRH data if relevant */ + if (p_mad_addr->addr_type.gsi.global_route) { + p_mad_addr->addr_type.gsi.grh_info.ver_class_flow = + ib_grh_set_ver_class_flow(p_rcv_desc->grh. + IP_version, + p_rcv_desc->grh. + traffic_class, + p_rcv_desc->grh. + flow_label); + p_mad_addr->addr_type.gsi.grh_info.hop_limit = + p_rcv_desc->grh.hop_limit; + memcpy(&p_mad_addr->addr_type.gsi.grh_info.src_gid.raw, + &p_rcv_desc->grh.sgid, sizeof(ib_net64_t)); + memcpy(&p_mad_addr->addr_type.gsi.grh_info.dest_gid.raw, + p_rcv_desc->grh.dgid, sizeof(ib_net64_t)); + } + } +} + +/********************************************************************** + * OSM ADDR VECTOR TO IB_MGT + **********************************************************************/ +void +__osm_mtl_conv_osm_addr_to_ibmgt_addr(IN osm_mad_addr_t * p_mad_addr, + IN uint8_t is_smi, OUT IB_ud_av_t * p_av) +{ + + /* For global destination or Multicast address: */ + u_int8_t ver; + + memset(p_av, 0, sizeof(IB_ud_av_t)); + + p_av->src_path_bits = p_mad_addr->path_bits; + p_av->static_rate = p_mad_addr->static_rate; + p_av->dlid = cl_ntoh16(p_mad_addr->dest_lid); + + if (is_smi) { + p_av->sl = 0; /* Just to note we use 0 here. */ + } else { + p_av->sl = p_mad_addr->addr_type.gsi.service_level; + p_av->grh_flag = p_mad_addr->addr_type.gsi.global_route; + + if (p_mad_addr->addr_type.gsi.global_route) { + ib_grh_get_ver_class_flow(p_mad_addr->addr_type.gsi. + grh_info.ver_class_flow, &ver, + &p_av->traffic_class, + &p_av->flow_label); + p_av->hop_limit = + p_mad_addr->addr_type.gsi.grh_info.hop_limit; + p_av->sgid_index = 0; /* we always use source GID 0 */ + memcpy(&p_av->dgid, + &p_mad_addr->addr_type.gsi.grh_info.dest_gid.raw, + sizeof(ib_net64_t)); + + } + } +} + +void __osm_vendor_clear_sm(IN osm_bind_handle_t h_bind) +{ + osm_mtl_bind_info_t *p_bind = (osm_mtl_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + VAPI_ret_t status; + VAPI_hca_attr_t attr_mod; + VAPI_hca_attr_mask_t attr_mask; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&attr_mod, 0, sizeof(attr_mod)); + memset(&attr_mask, 0, sizeof(attr_mask)); + + attr_mod.is_sm = FALSE; + attr_mask = HCA_ATTR_IS_SM; + + status = + VAPI_modify_hca_attr(p_bind->hca_hndl, p_bind->port_num, &attr_mod, + &attr_mask); + if (status != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_clear_sm: ERR 3C21: " + "Unable set 'IS_SM' bit in port attributes (%d).\n", + status); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** + * ANY CONSTRUCTION OF THE osm_vendor_t OBJECT + **********************************************************************/ +void osm_vendor_construct(IN osm_vendor_t * const p_vend) +{ + memset(p_vend, 0, sizeof(*p_vend)); +} + +/********************************************************************** + * DEALOCATE osm_vendor_t + **********************************************************************/ +void osm_vendor_destroy(IN osm_vendor_t * const p_vend) +{ + osm_vendor_mgt_bind_t *vendor_mgt_bind_p; + IB_MGT_ret_t mgt_ret; + OSM_LOG_ENTER(p_vend->p_log); + + if (p_vend->h_al != NULL) { + vendor_mgt_bind_p = (osm_vendor_mgt_bind_t *) p_vend->h_al; + if (vendor_mgt_bind_p->gsi_init) { + + /* un register the class */ + /* HACK WE ASSUME WE ONLY GOT SA CLASS REGISTERD ON GSI !!! */ + mgt_ret = + IB_MGT_unbind_gsi_class(vendor_mgt_bind_p-> + gsi_mads_hdl, + IB_MCLASS_SUBN_ADM); + if (mgt_ret != IB_MGT_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_destroy: ERR 3C03: " + "Fail to unbind the SA class.\n"); + } + + /* un bind the handle */ + if (IB_MGT_release_handle + (vendor_mgt_bind_p->gsi_mads_hdl) != IB_MGT_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_destroy: ERR 3C02: " + "Fail to unbind the SA GSI handle.\n"); + } + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_destroy: DBG 1002: " + "Unbind the GSI handles.\n"); + } + if (vendor_mgt_bind_p->smi_init) { + /* first - clear the IS_SM in the capability mask */ + __osm_vendor_clear_sm((osm_bind_handle_t) + (vendor_mgt_bind_p->smi_p_bind)); + + /* un register the class */ + mgt_ret = + IB_MGT_unbind_sm(vendor_mgt_bind_p->smi_mads_hdl); + if (mgt_ret != IB_MGT_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_destroy: ERR 3C04: " + "Fail to unbind the SM class.\n"); + } + + /* un bind the handle */ + if (IB_MGT_release_handle + (vendor_mgt_bind_p->smi_mads_hdl) != IB_MGT_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_destroy: ERR 3C05: " + "Fail to unbind the SMI handle.\n"); + } + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_destroy: DBG 1003: " + "Unbind the SMI handles.\n"); + + } + } + osm_transaction_mgr_destroy(p_vend); + /* __osm_mtl_destroy_tid_mad_map( p_vend ); */ + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** +DEALLOCATE A POINTER TO osm_vendor_t +**********************************************************************/ +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + CL_ASSERT(pp_vend); + + osm_vendor_destroy(*pp_vend); + free(*pp_vend); + *pp_vend = NULL; +} + +/********************************************************************** + * This proc actuall binds the handle to the lower level. + * + * We might have here as a result a casting of our struct to the ib_al_handle_t + * + * Q: Do we need 2 of those - one for MSI and one for GSI ? + * A: Yes! We should be able to do the SA too. So we need a struct! + * + **********************************************************************/ + +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + osm_vendor_mgt_bind_t *ib_mgt_hdl_p; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + p_vend->p_log = p_log; + + /* + * HACK: We need no handle. Assuming the driver is up. + */ + ib_mgt_hdl_p = (osm_vendor_mgt_bind_t *) + malloc(sizeof(osm_vendor_mgt_bind_t)); + if (ib_mgt_hdl_p == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_init: ERR 3C06: " + "Fail to allocate vendor mgt handle.\n"); + goto Exit; + } + + ib_mgt_hdl_p->smi_init = FALSE; + ib_mgt_hdl_p->gsi_init = FALSE; + /* cast it into the ib_al_handle_t h_al */ + p_vend->h_al = (ib_al_handle_t) ib_mgt_hdl_p; + p_vend->p_transaction_mgr = NULL; + osm_transaction_mgr_init(p_vend); + /* p_vend->madw_by_tid_map_p = NULL; */ + /* __osm_mtl_init_tid_mad_map( p_vend ); */ + p_vend->timeout = timeout; + +Exit: + OSM_LOG_EXIT(p_log); + return (status); +} + +/********************************************************************** + * Create and Initialize osm_vendor_t Object + **********************************************************************/ +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + ib_api_status_t status; + osm_vendor_t *p_vend; + + OSM_LOG_ENTER(p_log); + + CL_ASSERT(p_log); + + p_vend = malloc(sizeof(*p_vend)); + if (p_vend != NULL) { + memset(p_vend, 0, sizeof(*p_vend)); + status = osm_vendor_init(p_vend, p_log, timeout); + if (status != IB_SUCCESS) { + osm_vendor_delete(&p_vend); + } + } else { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_new: ERR 3C07: " + "Fail to allocate vendor object.\n"); + } + + OSM_LOG_EXIT(p_log); + return (p_vend); +} + +/********************************************************************** + * IB_MGT RCV callback + * + **********************************************************************/ +void +__osm_mtl_rcv_callback(IN IB_MGT_mad_hndl_t mad_hndl, + IN void *private_ctx_p, + IN void *payload_p, + IN IB_MGT_mad_rcv_desc_t * rcv_remote_info_p) +{ + IB_MGT_ret_t status; + osm_mtl_bind_info_t *bind_info_p = private_ctx_p; + osm_madw_t *req_madw_p = NULL; + osm_madw_t *madw_p; + osm_vend_wrap_t *p_new_vw; + osm_mad_addr_t mad_addr; + ib_mad_t *mad_buf_p; + osm_log_t *const p_log = bind_info_p->p_vend->p_log; + + OSM_LOG_ENTER(p_log); + + /* if it is a response MAD we mustbe able to get the request */ + if (ib_mad_is_response((ib_mad_t *) payload_p)) { + /* can we find a matching madw by this payload TID */ + status = + osm_transaction_mgr_get_madw_for_tid(bind_info_p->p_vend, + (ib_mad_t *) payload_p, + &req_madw_p); + if (status != IB_MGT_OK) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_mtl_rcv_callback: ERR 3C08: " + "Error obtaining request madw by TID (%d).\n", + status); + req_madw_p = NULL; + } + + if (req_madw_p == NULL) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_mtl_rcv_callback: ERR 3C09: " + "Fail to obtain request madw for received MAD.(method=%X attr=%X) Aborting CB.\n", + ((ib_mad_t *) payload_p)->method, + cl_ntoh16(((ib_mad_t *) payload_p)->attr_id) + + ); + goto Exit; + } + } + + /* do we have a request ??? */ + if (req_madw_p == NULL) { + + /* first arrange an address */ + __osm_mtl_conv_ibmgt_rcv_desc_to_osm_addr(bind_info_p->p_vend, + rcv_remote_info_p, + (((ib_mad_t *) + payload_p)-> + mgmt_class == + IB_MCLASS_SUBN_LID) + || (((ib_mad_t *) + payload_p)-> + mgmt_class == + IB_MCLASS_SUBN_DIR), + &mad_addr); + + osm_log(p_log, OSM_LOG_ERROR, + "__osm_mtl_rcv_callback: : " + "Received MAD from QP:%X.\n", + cl_ntoh32(mad_addr.addr_type.gsi.remote_qp) + ); + + /* if not - get new osm_madw and arrange it. */ + /* create the new madw in the pool */ + madw_p = osm_mad_pool_get(bind_info_p->p_osm_pool, + (osm_bind_handle_t) bind_info_p, + MAD_BLOCK_SIZE, &mad_addr); + if (madw_p == NULL) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_mtl_rcv_callback: ERR 3C10: " + "Error request for a new madw.\n"); + goto Exit; + } + /* HACK: we cust to avoid the const ??? */ + mad_buf_p = (void *)madw_p->p_mad; + } else { + /* we have the madw defined during the send and stored in the vend_wrap */ + /* we need to make sure the wrapper is correctly init there */ + CL_ASSERT(req_madw_p->vend_wrap.p_resp_madw != 0); + madw_p = req_madw_p->vend_wrap.p_resp_madw; + + /* HACK: we do not Support RMPP */ + CL_ASSERT(madw_p->h_bind); + mad_buf_p = + osm_vendor_get(madw_p->h_bind, MAD_BLOCK_SIZE, + &madw_p->vend_wrap); + + if (mad_buf_p == NULL) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_mtl_rcv_callback: ERR 3C11: " + "Unable to acquire wire MAD.\n"); + + goto Exit; + } + + /* + Finally, attach the wire MAD to this wrapper. + */ + osm_madw_set_mad(madw_p, mad_buf_p); + + /* also we need to handle the size of the mad since we did not init ... */ + madw_p->mad_size = MAD_BLOCK_SIZE; + } + + /* init some fields of the vendor wrapper */ + p_new_vw = osm_madw_get_vend_ptr(madw_p); + p_new_vw->h_bind = bind_info_p; + p_new_vw->size = MAD_BLOCK_SIZE; + p_new_vw->p_resp_madw = NULL; + p_new_vw->mad_buf_p = mad_buf_p; + + /* HACK: We do not support RMPP in receiving MADS */ + memcpy(p_new_vw->mad_buf_p, payload_p, MAD_BLOCK_SIZE); + + /* attach the buffer to the wrapper */ + madw_p->p_mad = mad_buf_p; + + /* we can also make sure we marked the size and bind on the returned madw */ + madw_p->h_bind = p_new_vw->h_bind; + + /* call the CB */ + (*bind_info_p->rcv_callback) (madw_p, bind_info_p->client_context, + req_madw_p); + +Exit: + OSM_LOG_EXIT(p_log); +} + +/********************************************************************** + * IB_MGT Send callback : invoked after each send + * + **********************************************************************/ +void +__osm_mtl_send_callback(IN IB_MGT_mad_hndl_t mad_hndl, + IN u_int64_t wrid, + IN IB_comp_status_t status, IN void *private_ctx_p) +{ + osm_madw_t *madw_p; + osm_mtl_bind_info_t *bind_info_p = + (osm_mtl_bind_info_t *) private_ctx_p; + osm_log_t *const p_log = bind_info_p->p_vend->p_log; + osm_vend_wrap_t *p_vw; + uint8_t is_resp; + + OSM_LOG_ENTER(p_log); + + /* obtain the madp from the wrid */ + __osm_set_p_madw_and_resp_by_wrid(wrid, &is_resp, &madw_p); + + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_mtl_send_callback: INFO 1008: " + "Handling Send of MADW:%p Is Resp:%d.\n", madw_p, is_resp); + + /* we need to handle requests and responses differently */ + if (is_resp) { + if (status != IB_COMP_SUCCESS) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_mtl_send_callback: ERR 3C12: " + "Error Sending Response MADW:%p.\n", madw_p); + } else { + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_mtl_send_callback: DBG 1008: " + "Completed Sending Response MADW:%p.\n", + madw_p); + } + + /* if we are a response - we need to clean it up */ + osm_mad_pool_put(bind_info_p->p_osm_pool, madw_p); + } else { + + /* this call back is invoked on completion of send - error or not */ + if (status != IB_COMP_SUCCESS) { + + osm_log(p_log, OSM_LOG_ERROR, + "__osm_mtl_send_callback: ERR 3C13: " + "Received an Error from IB_MGT Send (%d).\n", + status); + + p_vw = osm_madw_get_vend_ptr(madw_p); + CL_ASSERT(p_vw); + + /* + Return any wrappers to the pool that may have been + pre-emptively allocated to handle a receive. + */ + if (p_vw->p_resp_madw) { + osm_mad_pool_put(bind_info_p->p_osm_pool, + p_vw->p_resp_madw); + p_vw->p_resp_madw = NULL; + } + + /* invoke the CB */ + (*bind_info_p->send_err_callback) (bind_info_p-> + client_context, + madw_p); + } else { + /* successful request send - do nothing - the response will need the + out mad */ + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_mtl_send_callback: DBG 1008: " + "Completed Sending Request MADW:%p.\n", madw_p); + } + } + + OSM_LOG_EXIT(p_log); +} + +/********************************************************************** + * BINDs a callback (rcv and send error) for a given class and method + * defined by the given: osm_bind_info_t + **********************************************************************/ +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_user_bind, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN osm_vend_mad_send_err_callback_t send_err_callback, + IN void *context) +{ + ib_net64_t port_guid; + osm_mtl_bind_info_t *p_bind = NULL; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_id_t hca_id; + IB_MGT_mad_type_t mad_type; + uint32_t port_num; + osm_vendor_mgt_bind_t *ib_mgt_hdl_p; + IB_MGT_ret_t mgt_ret; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_user_bind); + CL_ASSERT(p_mad_pool); + CL_ASSERT(mad_recv_callback); + CL_ASSERT(send_err_callback); + + /* cast back the AL handle to vendor mgt bind */ + ib_mgt_hdl_p = (osm_vendor_mgt_bind_t *) p_vend->h_al; + + port_guid = p_user_bind->port_guid; + + osm_log(p_vend->p_log, OSM_LOG_INFO, + "osm_vendor_bind: " + "Binding to port 0x%" PRIx64 ".\n", cl_ntoh64(port_guid)); + + /* obtain the hca name and port num from the guid */ + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: " + "Finding CA and Port that owns port guid 0x%" PRIx64 ".\n", + port_guid); + + mgt_ret = + osm_vendor_get_guid_ca_and_port(p_vend, port_guid, &hca_hndl, + &hca_id, &port_num); + if (mgt_ret != IB_MGT_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3C14: " + "Unable to obtain CA and port (%d).\n"); + goto Exit; + } + + /* create the bind object tracking this binding */ + p_bind = (osm_mtl_bind_info_t *) malloc(sizeof(osm_mtl_bind_info_t)); + memset(p_bind, 0, sizeof(osm_mtl_bind_info_t)); + if (p_bind == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3C15: " + "Unable to allocate internal bind object.\n"); + goto Exit; + } + + /* track this bind request info */ + memcpy(p_bind->hca_id, hca_id, sizeof(VAPI_hca_id_t)); + p_bind->port_num = port_num; + p_bind->p_vend = p_vend; + p_bind->client_context = context; + p_bind->rcv_callback = mad_recv_callback; + p_bind->send_err_callback = send_err_callback; + p_bind->p_osm_pool = p_mad_pool; + + CL_ASSERT(p_bind->port_num); + + /* + * Get the proper CLASS + */ + + switch (p_user_bind->mad_class) { + case IB_MCLASS_SUBN_LID: + case IB_MCLASS_SUBN_DIR: + mad_type = IB_MGT_SMI; + break; + + case IB_MCLASS_SUBN_ADM: + default: + mad_type = IB_MGT_GSI; + break; + } + + /* we split here - based on the type of MADS GSI / SMI */ + /* HACK: we only support one class registration per SMI/GSI !!! */ + if (mad_type == IB_MGT_SMI) { + /* + * SMI CASE + */ + + /* we do not need to bind the handle if already available */ + if (ib_mgt_hdl_p->smi_init == FALSE) { + + /* First we have to reg and get the handle for the mad */ + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: " + "Binding to IB_MGT SMI of %s port %u\n", hca_id, + port_num); + + mgt_ret = + IB_MGT_get_handle(hca_id, port_num, IB_MGT_SMI, + &(ib_mgt_hdl_p->smi_mads_hdl)); + if (IB_MGT_OK != mgt_ret) { + free(p_bind); + p_bind = NULL; + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3C16: " + "Error obtaining IB_MGT handle to SMI.\n"); + goto Exit; + } + + /* bind it */ + mgt_ret = IB_MGT_bind_sm(ib_mgt_hdl_p->smi_mads_hdl); + if (IB_MGT_OK != mgt_ret) { + free(p_bind); + p_bind = NULL; + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3C17: " + "Error binding IB_MGT handle to SM.\n"); + goto Exit; + } + + ib_mgt_hdl_p->smi_init = TRUE; + + } + + /* attach to this bind info */ + p_bind->mad_hndl = ib_mgt_hdl_p->smi_mads_hdl; + ib_mgt_hdl_p->smi_p_bind = p_bind; + + /* now register the callback */ + mgt_ret = IB_MGT_reg_cb(p_bind->mad_hndl, + &__osm_mtl_rcv_callback, + p_bind, + &__osm_mtl_send_callback, + p_bind, + IB_MGT_RCV_CB_MASK | + IB_MGT_SEND_CB_MASK); + + } else { + /* + * GSI CASE + */ + + if (ib_mgt_hdl_p->gsi_init == FALSE) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: " "Binding to IB_MGT GSI\n"); + + /* First we have to reg and get the handle for the mad */ + mgt_ret = + IB_MGT_get_handle(hca_id, port_num, IB_MGT_GSI, + &(ib_mgt_hdl_p->gsi_mads_hdl)); + if (IB_MGT_OK != mgt_ret) { + free(p_bind); + p_bind = NULL; + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3C20: " + "Error obtaining IB_MGT handle to GSI.\n"); + goto Exit; + } + + /* bind it */ + mgt_ret = + IB_MGT_bind_gsi_class(ib_mgt_hdl_p->gsi_mads_hdl, + p_user_bind->mad_class); + if (IB_MGT_OK != mgt_ret) { + free(p_bind); + p_bind = NULL; + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3C22: " + "Error binding IB_MGT handle to GSI.\n"); + goto Exit; + } + + ib_mgt_hdl_p->gsi_init = TRUE; + + /* attach to this bind info */ + p_bind->mad_hndl = ib_mgt_hdl_p->gsi_mads_hdl; + + /* now register the callback */ + mgt_ret = IB_MGT_reg_cb(p_bind->mad_hndl, + &__osm_mtl_rcv_callback, + p_bind, + &__osm_mtl_send_callback, + p_bind, + IB_MGT_RCV_CB_MASK | + IB_MGT_SEND_CB_MASK); + + } else { + /* we can use the existing handle */ + p_bind->mad_hndl = ib_mgt_hdl_p->gsi_mads_hdl; + mgt_ret = IB_MGT_OK; + } + + } + + if (IB_MGT_OK != mgt_ret) { + free(p_bind); + p_bind = NULL; + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 3C23: " + "Error binding IB_MGT CB (%d).\n", mgt_ret); + goto Exit; + } + + /* HACK: Do we need to initialize an address vector ???? */ + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return ((osm_bind_handle_t) p_bind); +} + +/********************************************************************** +Get a mad from the lower level. +The osm_vend_wrap_t is a wrapper used to connect the mad to the response. +**********************************************************************/ +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * const p_vw) +{ + ib_mad_t *mad_p; + osm_mtl_bind_info_t *p_bind = (osm_mtl_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + /* HACK: We know we can not send through IB_MGT */ + CL_ASSERT(mad_size <= MAD_BLOCK_SIZE); + + /* IB_MGT assumes it is 256 - we must follow */ + p_vw->size = MAD_BLOCK_SIZE; + + /* allocate it */ + mad_p = (ib_mad_t *) malloc(p_vw->size); + if (mad_p == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get: ERR 3C24: " + "Error Obtaining MAD buffer.\n"); + goto Exit; + } + + memset(mad_p, 0, p_vw->size); + + /* track locally */ + p_vw->mad_buf_p = mad_p; + p_vw->h_bind = h_bind; + p_vw->p_resp_madw = NULL; + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get: " + "Acquired MAD %p, size = %u.\n", mad_p, p_vw->size); + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (mad_p); +} + +/********************************************************************** + * Return a MAD by providing it's wrapper object. + **********************************************************************/ +void +osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) +{ + osm_mtl_bind_info_t *p_bind = (osm_mtl_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + CL_ASSERT(p_vw->mad_buf_p); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_put: " "Retiring MAD %p.\n", + p_vw->mad_buf_p); + } + + /* + * We moved the removal of the transaction to immediatly after + * it was looked up. + */ + + /* free the mad but the wrapper is part of the madw object */ + free(p_vw->mad_buf_p); + p_vw->mad_buf_p = NULL; + p_madw = PARENT_STRUCT(p_vw, osm_madw_t, vend_wrap); + p_madw->p_mad = NULL; + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** +Actually Send a MAD + +This is for internal use by osm_vendor_send and the transaction mgr +retry too. +**********************************************************************/ +ib_api_status_t +osm_mtl_send_mad(IN osm_mtl_bind_info_t * p_bind, IN osm_madw_t * const p_madw) +{ + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); + osm_mad_addr_t *const p_mad_addr = osm_madw_get_mad_addr_ptr(p_madw); + ib_mad_t *const p_mad = osm_madw_get_mad_ptr(p_madw); + ib_api_status_t status; + IB_MGT_ret_t mgt_res; + IB_ud_av_t av; + uint64_t wrid; + uint32_t qpn; + + OSM_LOG_ENTER(p_vend->p_log); + + /* + * For all sends other than directed route SM MADs, + * acquire an address vector for the destination. + */ + if (p_mad->mgmt_class != IB_MCLASS_SUBN_DIR) { + __osm_mtl_conv_osm_addr_to_ibmgt_addr(p_mad_addr, + p_mad->mgmt_class == + IB_MCLASS_SUBN_LID, &av); + } else { + /* is a directed route - we need to construct a permissive address */ + memset(&av, 0, sizeof(av)); + /* we do not need port number since it is part of the mad_hndl */ + av.dlid = IB_LID_PERMISSIVE; + } + + wrid = __osm_set_wrid_by_p_madw(p_madw); + + /* send it */ + if ((p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) || + (p_mad->mgmt_class == IB_MCLASS_SUBN_LID)) { + + /* SMI CASE */ + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_mtl_send_mad: " + "av.dlid 0x%X, " + "av.static_rate %d, " + "av.path_bits %d.\n", + cl_ntoh16(av.dlid), av.static_rate, + av.src_path_bits); + } + + mgt_res = IB_MGT_send_mad(p_bind->mad_hndl, p_mad, /* actual payload */ + &av, /* address vector */ + wrid, /* casting the mad wrapper pointer for err cb */ + p_vend->timeout); + + } else { + /* GSI CASE - Support Remote QP */ + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_mtl_send_mad: " + "av.dlid 0x%X, av.static_rate %d, " + "av.path_bits %d, remote qp: 0x%06X \n", + av.dlid, + av.static_rate, + av.src_path_bits, + cl_ntoh32(p_mad_addr->addr_type.gsi.remote_qp) + ); + } + + /* IBMGT have a bug sending to a QP not 1 - + the QPN must be in network order except when it qpn 1 ... */ + qpn = cl_ntoh32(p_mad_addr->addr_type.gsi.remote_qp); + + mgt_res = IB_MGT_send_mad_to_qp(p_bind->mad_hndl, p_mad, /* actual payload */ + &av, /* address vector */ + wrid, /* casting the mad wrapper pointer for err cb */ + p_vend->timeout, qpn); + } + + if (mgt_res != IB_MGT_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_mtl_send_mad: ERR 3C26: " + "Error sending mad (%d).\n", mgt_res); + if (p_vw->p_resp_madw) + osm_mad_pool_put(p_bind->p_osm_pool, p_vw->p_resp_madw); + status = IB_ERROR; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** +Send a MAD through. + +What is unclear to me is the need for the setting of all the MAD Wrapper +fields. Seems like the OSM uses these values during it's processing... +**********************************************************************/ +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, IN boolean_t const resp_expected) +{ + osm_mtl_bind_info_t *const p_bind = (osm_mtl_bind_info_t *) h_bind; + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + /* + * If a response is expected to this MAD, then preallocate + * a mad wrapper to contain the wire MAD received in the + * response. Allocating a wrapper here allows for easier + * failure paths than after we already received the wire mad. + */ + if (resp_expected == TRUE) { + /* we track it in the vendor wrapper */ + p_vw->p_resp_madw = + osm_mad_pool_get_wrapper_raw(p_bind->p_osm_pool); + if (p_vw->p_resp_madw == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 3C27: " + "Unable to allocate MAD wrapper.\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + /* put some minimal info on that wrapper */ + ((osm_madw_t *) (p_vw->p_resp_madw))->h_bind = h_bind; + + /* we also want to track it in the TID based map */ + status = osm_transaction_mgr_insert_madw((osm_bind_handle_t) + p_bind, p_madw); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 3C25: " + "Error inserting request madw by TID (%d).\n", + status); + } + + } else + p_vw->p_resp_madw = NULL; + + /* do the actual send */ + status = osm_mtl_send_mad(p_bind, p_madw); + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * the idea here is to change the content of the bind such that it + * will hold the local address used for sending directed route by the SMA. + **********************************************************************/ +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) +{ + osm_vendor_t *p_vend = ((osm_mtl_bind_info_t *) h_bind)->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_local_lid_change: DEBUG 2202: " "Change of LID.\n"); + + OSM_LOG_EXIT(p_vend->p_log); + + return (IB_SUCCESS); +} + +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osm_mtl_bind_info_t *p_bind = (osm_mtl_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + VAPI_ret_t status; + VAPI_hca_attr_t attr_mod; + VAPI_hca_attr_mask_t attr_mask; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&attr_mod, 0, sizeof(attr_mod)); + memset(&attr_mask, 0, sizeof(attr_mask)); + + attr_mod.is_sm = is_sm_val; + attr_mask = HCA_ATTR_IS_SM; + + status = + VAPI_modify_hca_attr(p_bind->hca_hndl, p_bind->port_num, &attr_mod, + &attr_mask); + if (status != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_set_sm: ERR 3C28: " + "Unable set 'IS_SM' bit to:%u in port attributes (%d).\n", + is_sm_val, status); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level) +{ + +} + +#endif /* OSM_VENDOR_INTF_TEST */ diff --git a/libvendor/osm_vendor_mtl_hca_guid.c b/libvendor/osm_vendor_mtl_hca_guid.c new file mode 100644 index 0000000..0b0b23c --- /dev/null +++ b/libvendor/osm_vendor_mtl_hca_guid.c @@ -0,0 +1,631 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#if defined(OSM_VENDOR_INTF_MTL) | defined(OSM_VENDOR_INTF_TS) +#undef IN +#undef OUT +#include +#include +#include +#include +#include +#include + +/******************************************************************************** + * + * Provide the functionality for selecting an HCA Port and Obtaining it's guid. + * + ********************************************************************************/ + +/********************************************************************** + * Convert the given GID to GUID by copy of it's upper 8 bytes + * + * + **********************************************************************/ + +ib_api_status_t +__osm_vendor_gid_to_guid(IN u_int8_t * gid, OUT VAPI_gid_t * guid) +{ + memcpy(guid, gid + 8, 8); + return (IB_SUCCESS); +} + +/****f* OpenSM: CA Info/osm_ca_info_get_pi_ptr + * NAME + * osm_ca_info_get_pi_ptr + * + * DESCRIPTION + * Returns a pointer to the port attribute of the specified port + * owned by this CA. + * + * SYNOPSIS + */ +static ib_port_attr_t *__osm_ca_info_get_port_attr_ptr(IN const osm_ca_info_t * + const p_ca_info, + IN const uint8_t index) +{ + return (&p_ca_info->p_attr->p_port_attr[index]); +} + +/* + * PARAMETERS + * p_ca_info + * [in] Pointer to a CA Info object. + * + * index + * [in] Port "index" for which to retrieve the port attribute. + * The index is the offset into the ca's internal array + * of port attributes. + * + * RETURN VALUE + * Returns a pointer to the port attribute of the specified port + * owned by this CA. + * + * NOTES + * + * SEE ALSO + *********/ + +/******************************************************************************** + * get the CA names ava`ilable on the system + * NOTE: user of this function needs to deallocate p_hca_ids after usage. + ********************************************************************************/ +static ib_api_status_t +__osm_vendor_get_ca_ids(IN osm_vendor_t * const p_vend, + IN VAPI_hca_id_t ** const p_hca_ids, + IN uint32_t * const p_num_guids) +{ + ib_api_status_t status; + VAPI_ret_t vapi_res; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_hca_ids); + CL_ASSERT(p_num_guids); + + /* first call is just to get the number */ + vapi_res = EVAPI_list_hcas(0, p_num_guids, NULL); + + /* fail ? */ + if (vapi_res == VAPI_EINVAL_PARAM) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 7101: " + "Bad parameter in calling: EVAPI_list_hcas. (%d)\n", + vapi_res); + status = IB_ERROR; + goto Exit; + } + + /* NO HCA ? */ + if (*p_num_guids == 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 7102: " + "No available channel adapters.\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + /* allocate and really call - user of this function needs to deallocate it */ + *p_hca_ids = + (VAPI_hca_id_t *) malloc(*p_num_guids * sizeof(VAPI_hca_id_t)); + + /* now call it really */ + vapi_res = EVAPI_list_hcas(*p_num_guids, p_num_guids, *p_hca_ids); + + /* too many ? */ + if (vapi_res == VAPI_EAGAIN) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 7103: " + "More CA GUIDs than allocated array (%d).\n", + *p_num_guids); + status = IB_ERROR; + goto Exit; + } + + /* fail ? */ + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_get_ca_ids: ERR 7104: " + "Bad parameter in calling: EVAPI_list_hcas.\n"); + status = IB_ERROR; + goto Exit; + } + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_vendor_get_ca_ids: " + "Detected %u local channel adapters.\n", *p_num_guids); + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Initialize an Info Struct for the Given HCA by its Id + **********************************************************************/ +static ib_api_status_t +__osm_ca_info_init(IN osm_vendor_t * const p_vend, + IN VAPI_hca_id_t ca_id, OUT osm_ca_info_t * const p_ca_info) +{ + ib_api_status_t status = IB_ERROR; + VAPI_ret_t vapi_res; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_vendor_t hca_vendor; + VAPI_hca_cap_t hca_cap; + VAPI_hca_port_t hca_port; + uint8_t port_num; + IB_gid_t *p_port_gid; + uint16_t maxNumGids; + + OSM_LOG_ENTER(p_vend->p_log); + + /* get the HCA handle */ + vapi_res = EVAPI_get_hca_hndl(ca_id, &hca_hndl); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 7105: " + "Fail to get HCA handle (%u).\n", vapi_res); + goto Exit; + } + + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_ca_info_init: " "Querying CA %s.\n", ca_id); + } + + /* query and get the HCA capability */ + vapi_res = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 7106: " + "Fail to get HCA Capabilities (%u).\n", vapi_res); + goto Exit; + } + + /* get the guid of the HCA */ + memcpy(&(p_ca_info->guid), hca_cap.node_guid, 8 * sizeof(u_int8_t)); + p_ca_info->attr_size = 1; + p_ca_info->p_attr = (ib_ca_attr_t *) malloc(sizeof(ib_ca_attr_t)); + memcpy(&(p_ca_info->p_attr->ca_guid), hca_cap.node_guid, + 8 * sizeof(u_int8_t)); + + /* now obtain the attributes of the ports */ + p_ca_info->p_attr->num_ports = hca_cap.phys_port_num; + p_ca_info->p_attr->p_port_attr = + (ib_port_attr_t *) malloc(hca_cap.phys_port_num * + sizeof(ib_port_attr_t)); + + for (port_num = 0; port_num < p_ca_info->p_attr->num_ports; port_num++) { + + /* query the port attributes */ + vapi_res = + VAPI_query_hca_port_prop(hca_hndl, port_num + 1, &hca_port); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 7107: " + "Fail to get HCA Port Attributes (%d).\n", + vapi_res); + goto Exit; + } + + /* first call to know the size of the gid table */ + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, port_num + 1, 0, + &maxNumGids, NULL); + p_port_gid = (IB_gid_t *) malloc(maxNumGids * sizeof(IB_gid_t)); + + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, port_num + 1, maxNumGids, + &maxNumGids, p_port_gid); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_ca_info_init: ERR 7108: " + "Fail to get HCA Port GID (%d).\n", vapi_res); + goto Exit; + } + + __osm_vendor_gid_to_guid(p_port_gid[0], + (IB_gid_t *) & p_ca_info->p_attr-> + p_port_attr[port_num].port_guid); + p_ca_info->p_attr->p_port_attr[port_num].lid = hca_port.lid; + p_ca_info->p_attr->p_port_attr[port_num].link_state = + hca_port.state; + p_ca_info->p_attr->p_port_attr[port_num].sm_lid = + hca_port.sm_lid; + + free(p_port_gid); + } + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +void +osm_ca_info_destroy(IN osm_vendor_t * const p_vend, + IN osm_ca_info_t * const p_ca_info) +{ + OSM_LOG_ENTER(p_vend->p_log); + + if (p_ca_info->p_attr) { + if (p_ca_info->p_attr->num_ports) { + free(p_ca_info->p_attr->p_port_attr); + } + free(p_ca_info->p_attr); + } + + free(p_ca_info); + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** + * Fill in the array of port_attr with all available ports on ALL the + * avilable CAs on this machine. + * ALSO - + * UPDATE THE VENDOR OBJECT LIST OF CA_INFO STRUCTS + **********************************************************************/ +ib_api_status_t +osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, + IN ib_port_attr_t * const p_attr_array, + IN uint32_t * const p_num_ports) +{ + ib_api_status_t status; + + uint32_t ca; + uint32_t ca_count; + uint32_t port_count = 0; + uint8_t port_num; + uint32_t total_ports = 0; + VAPI_hca_id_t *p_ca_ids = NULL; + osm_ca_info_t *p_ca_info; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* + * 1) Determine the number of CA's + * 2) Allocate an array big enough to hold the ca info objects. + * 3) Call again to retrieve the guids. + */ + status = __osm_vendor_get_ca_ids(p_vend, &p_ca_ids, &ca_count); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 7109: " + "Fail to get CA Ids.\n"); + goto Exit; + } + + /* we keep track of all the CAs in this info array */ + p_vend->p_ca_info = malloc(ca_count * sizeof(*p_vend->p_ca_info)); + if (p_vend->p_ca_info == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 7110: " + "Unable to allocate CA information array.\n"); + goto Exit; + } + + memset(p_vend->p_ca_info, 0, ca_count * sizeof(*p_vend->p_ca_info)); + p_vend->ca_count = ca_count; + + /* + * For each CA, retrieve the CA info attributes + */ + for (ca = 0; ca < ca_count; ca++) { + p_ca_info = &p_vend->p_ca_info[ca]; + + status = __osm_ca_info_init(p_vend, p_ca_ids[ca], p_ca_info); + + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_all_port_attr: ERR 7111: " + "Unable to initialize CA Info object (%s).\n", + ib_get_err_str(status)); + } + + total_ports += osm_ca_info_get_num_ports(p_ca_info); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get_all_port_attr: " + "osm_vendor_get_all_port_attr: %u got %u ports total:%u\n", + ca, osm_ca_info_get_num_ports(p_ca_info), total_ports); + + } + + /* + * If the user supplied enough storage, return the port guids, + * otherwise, return the appropriate error. + */ + if (*p_num_ports >= total_ports) { + for (ca = 0; ca < ca_count; ca++) { + uint32_t num_ports; + + p_ca_info = &p_vend->p_ca_info[ca]; + + num_ports = osm_ca_info_get_num_ports(p_ca_info); + + for (port_num = 0; port_num < num_ports; port_num++) { + p_attr_array[port_count] = + *__osm_ca_info_get_port_attr_ptr(p_ca_info, + port_num); + port_count++; + } + } + } else { + status = IB_INSUFFICIENT_MEMORY; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + *p_num_ports = total_ports; + + if (p_ca_ids) + free(p_ca_ids); + + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * Given the vendor obj and a guid + * return the ca id and port number that have that guid + **********************************************************************/ + +ib_api_status_t +osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, + IN ib_net64_t const guid, + OUT VAPI_hca_hndl_t * p_hca_hndl, + OUT VAPI_hca_id_t * p_hca_id, + OUT uint32_t * p_port_num) +{ + + ib_api_status_t status; + VAPI_hca_id_t *p_ca_ids = NULL; + VAPI_ret_t vapi_res; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_vendor_t hca_vendor; + VAPI_hca_cap_t hca_cap; + IB_gid_t *p_port_gid = NULL; + uint16_t maxNumGids; + ib_net64_t port_guid; + uint32_t ca, portIdx, ca_count; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + + /* + * 1) Determine the number of CA's + * 2) Allocate an array big enough to hold the ca info objects. + * 3) Call again to retrieve the guids. + */ + status = __osm_vendor_get_ca_ids(p_vend, &p_ca_ids, &ca_count); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 7112: " + "Fail to get CA Ids.\n"); + goto Exit; + } + + /* + * For each CA, retrieve the CA info attributes + */ + for (ca = 0; ca < ca_count; ca++) { + /* get the HCA handle */ + vapi_res = EVAPI_get_hca_hndl(p_ca_ids[ca], &hca_hndl); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 7113: " + "Fail to get HCA handle (%u).\n", vapi_res); + goto Exit; + } + + /* get the CA attributes - to know how many ports it has: */ + if (osm_log_is_active(p_vend->p_log, OSM_LOG_DEBUG)) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get_guid_ca_and_port: " + "Querying CA %s.\n", p_ca_ids[ca]); + } + + /* query and get the HCA capability */ + vapi_res = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 7114: " + "Fail to get HCA Capabilities (%u).\n", + vapi_res); + goto Exit; + } + + /* go over all ports - to obtail their guids */ + for (portIdx = 0; portIdx < hca_cap.phys_port_num; portIdx++) { + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, portIdx + 1, 0, + &maxNumGids, NULL); + p_port_gid = + (IB_gid_t *) malloc(maxNumGids * sizeof(IB_gid_t)); + + /* get the port guid */ + vapi_res = + VAPI_query_hca_gid_tbl(hca_hndl, portIdx + 1, + maxNumGids, &maxNumGids, + p_port_gid); + if (vapi_res != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 7115: " + "Fail to get HCA Port GID (%d).\n", + vapi_res); + goto Exit; + } + + /* convert to SF style */ + __osm_vendor_gid_to_guid(p_port_gid[0], + (VAPI_gid_t *) & port_guid); + + /* finally did we find it ? */ + if (port_guid == guid) { + *p_hca_hndl = hca_hndl; + memcpy(p_hca_id, p_ca_ids[ca], + sizeof(VAPI_hca_id_t)); + *p_port_num = portIdx + 1; + status = IB_SUCCESS; + goto Exit; + } + + free(p_port_gid); + p_port_gid = NULL; + } /* ALL PORTS */ + } /* all HCAs */ + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get_guid_ca_and_port: ERR 7116: " + "Fail to find HCA and Port for Port Guid 0x%" PRIx64 "\n", + cl_ntoh64(guid)); + status = IB_INVALID_GUID; + +Exit: + if (p_ca_ids != NULL) + free(p_ca_ids); + if (p_port_gid != NULL) + free(p_port_gid); + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +#ifdef __TEST_HCA_GUID__ + +#define GUID_ARRAY_SIZE 64 + +#include + +ib_net64_t get_port_guid() +{ + uint32_t i; + uint32_t choice = 0; + boolean_t done_flag = FALSE; + ib_api_status_t status; + uint32_t num_ports = GUID_ARRAY_SIZE; + ib_port_attr_t attr_array[GUID_ARRAY_SIZE]; + VAPI_hca_id_t ca_id; + uint32_t portNum; + osm_vendor_t vend; + osm_vendor_t *p_vend; + osm_log_t *p_osm_log, tlog; + + p_osm_log = &tlog; + + status = osm_log_init(p_osm_log, FALSE); + if (status != IB_SUCCESS) + return (status); + + osm_log(p_osm_log, OSM_LOG_FUNCS, "get_port_guid: [\n"); + + p_vend = &vend; + p_vend->p_log = p_osm_log; + + /* + * Call the transport layer for a list of local port + * GUID values. + */ + status = osm_vendor_get_all_port_attr(p_vend, attr_array, &num_ports); + if (status != IB_SUCCESS) { + printf("\nError from osm_opensm_init (%x)\n", status); + return (0); + } + + if (num_ports == 0) { + printf("\nNo local ports detected!\n"); + return (0); + } + + while (done_flag == FALSE) { + printf("\nChoose a local port number with which to bind:\n\n"); + for (i = 0; i < num_ports; i++) { + /* + * Print the index + 1 since by convention, port numbers + * start with 1 on host channel adapters. + */ + + printf("\t%u: GUID = 0x%8" PRIx64 + ", lid = 0x%04X, state = %s\n", i + 1, + cl_ntoh64(attr_array[i].port_guid), + cl_ntoh16(attr_array[i].lid), + ib_get_port_state_str(attr_array[i].link_state)); + } + + printf("\nEnter choice (1-%u): ", i); + fflush(stdout); + scanf("%u", &choice); + if (choice > num_ports) + printf("\nError: Lame choice!\n"); + else + done_flag = TRUE; + } + + status = + osm_vendor_get_guid_ca_and_port(p_vend, + attr_array[choice - 1].port_guid, + &ca_id, &portNum); + if (status != IB_SUCCESS) { + printf("Error obtaining back the HCA and Port\n"); + return (0); + } + + printf("Selected: CA:%s Port:%d\n", ca_id, portNum); + + return (attr_array[choice - 1].port_guid); +} + +int main(int argc, char **argv) +{ + get_port_guid(); + return (0); +} + +#endif + +#endif diff --git a/libvendor/osm_vendor_mtl_transaction_mgr.c b/libvendor/osm_vendor_mtl_transaction_mgr.c new file mode 100644 index 0000000..5ddfe84 --- /dev/null +++ b/libvendor/osm_vendor_mtl_transaction_mgr.c @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(OSM_VENDOR_INTF_MTL) | defined(OSM_VENDOR_INTF_TS) + +#include +#ifdef OSM_VENDOR_INTF_MTL +#include +#endif + +/* this is the callback function of the timer */ +void __osm_transaction_mgr_callback(IN void *context) +{ + osm_transaction_mgr_t *trans_mgr_p; + osm_vendor_t *p_vend = (osm_vendor_t *) context; + cl_list_item_t *p_list_item; + cl_list_item_t *p_list_next_item; + osm_madw_req_t *osm_madw_req_p; + uint64_t current_time; /* [usec] */ + uint32_t new_timeout; /* [msec] */ + cl_status_t cl_status; + ib_mad_t *p_mad; +#ifdef OSM_VENDOR_INTF_MTL + osm_mtl_bind_info_t *p_bind; +#else + osm_ts_bind_info_t *p_bind; +#endif + cl_list_t tmp_madw_p_list; /* this list will include all the madw_p that should be removed. */ + cl_list_t retry_madw_p_list; /* this list will include all the madw_p that were retried and need to be removed. */ + osm_madw_t *madw_p; + + OSM_LOG_ENTER(p_vend->p_log); + + trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; + + /* initialize the tmp_madw_p_list */ + cl_list_construct(&tmp_madw_p_list); + cl_status = cl_list_init(&tmp_madw_p_list, 50); + if (cl_status != CL_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_transaction_mgr_callback : ERROR 1000: " + "Failed to create tmp_madw_p_list\n"); + } + + cl_list_construct(&retry_madw_p_list); + cl_status = cl_list_init(&retry_madw_p_list, 50); + if (cl_status != CL_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_transaction_mgr_callback : ERROR 1000: " + "Failed to create retry_madw_p_list\n"); + } + + current_time = cl_get_time_stamp(); + cl_spinlock_acquire(&(trans_mgr_p->transaction_mgr_lock)); + p_list_item = cl_qlist_head(trans_mgr_p->madw_reqs_list_p); + if (p_list_item == cl_qlist_end(trans_mgr_p->madw_reqs_list_p)) { + /* the list is empty - nothing to do */ + cl_spinlock_release(&trans_mgr_p->transaction_mgr_lock); + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_transaction_mgr_callback : Nothing to do\n"); + goto Exit; + } + + /* non empty list: */ + + /* get the osm_madw_req_p */ + osm_madw_req_p = PARENT_STRUCT(p_list_item, osm_madw_req_t, list_item); + + while (osm_madw_req_p->waking_time <= current_time) { + /* this object was supposed to have gotten a response */ + /* we need to decide if we need to retry or done with it. */ + if (osm_madw_req_p->retry_cnt > 0) { + /* add to the list of the retrys : */ + cl_list_insert_tail(&retry_madw_p_list, osm_madw_req_p); + + /* update wakeup time and retry count */ + osm_madw_req_p->waking_time = + p_vend->timeout * 1000 + cl_get_time_stamp(); + osm_madw_req_p->retry_cnt--; + + /* make sure we will get some timer call if not earlier */ + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_transaction_mgr_callback : Timer restart:%u\n", + p_vend->timeout); + + cl_status = + cl_timer_start(&trans_mgr_p->madw_list_timer, + p_vend->timeout); + + /* go to the next object and check if it also needs to be removed - didn't receive response */ + /* we need to do it before we move current item to the end of the list */ + p_list_next_item = cl_qlist_next(p_list_item); + + /* remove from the head */ + cl_qlist_remove_item(trans_mgr_p->madw_reqs_list_p, + &(osm_madw_req_p->list_item)); + + /* insert the object to the qlist and the qmap */ + cl_qlist_insert_tail(trans_mgr_p->madw_reqs_list_p, + &(osm_madw_req_p->list_item)); + + } else { + /* go to the next object and check if it also needs to be removed - didn't receive response */ + p_list_next_item = cl_qlist_next(p_list_item); + + /* remove from the head */ + cl_qlist_remove_item(trans_mgr_p->madw_reqs_list_p, + &(osm_madw_req_p->list_item)); + + /* add it to the tmp_madw_p_list to be removed */ + cl_list_insert_tail(&tmp_madw_p_list, + osm_madw_req_p->p_madw); + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_transaction_mgr_callback : Found failed transaction madw: %p\n", + osm_madw_req_p->p_madw); + } + + /* Advance */ + p_list_item = p_list_next_item; + if (p_list_item == cl_qlist_end(trans_mgr_p->madw_reqs_list_p)) { + /* the list is empty - nothing to do */ + break; + } + + /* get the osm_madw_req_p */ + osm_madw_req_p = + PARENT_STRUCT(p_list_item, osm_madw_req_t, list_item); + } + + /* look at the current p_list_item. If it is not the end item - then we need to */ + /* re-start the timer */ + if (p_list_item != cl_qlist_end(trans_mgr_p->madw_reqs_list_p)) { + /* get the osm_madw_req_p */ + osm_madw_req_p = + PARENT_STRUCT(p_list_item, osm_madw_req_t, list_item); + + /* we have the object that still didn't get response - re-start the timer */ + /* start the timer to the timeout (in miliseconds) */ + new_timeout = + (osm_madw_req_p->waking_time - cl_get_time_stamp()) / 1000 + + 1; + cl_status = + cl_timer_start(&trans_mgr_p->madw_list_timer, new_timeout); + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_transaction_mgr_callback : Timer restart:%u\n", + new_timeout); + + if (cl_status != CL_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_transaction_mgr_callback : ERROR 1000: " + "Failed to start timer\n"); + } + } + /* if not empty - retry on retry list: */ + if (!cl_is_list_empty(&retry_madw_p_list)) { + + /* remove all elements that were retried: */ + osm_madw_req_p = + (osm_madw_req_t + *) (cl_list_remove_head(&retry_madw_p_list)); + while (osm_madw_req_p != NULL) { + + /* resend: */ + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "__osm_transaction_mgr_callback : " + "Retry %d of madw %p\n", + OSM_DEFAULT_RETRY_COUNT - + osm_madw_req_p->retry_cnt, + osm_madw_req_p->p_madw); + + /* actually send it */ +#ifdef OSM_VENDOR_INTF_MTL + osm_mtl_send_mad((osm_mtl_bind_info_t *) + osm_madw_req_p->p_bind, + osm_madw_req_p->p_madw); +#else + ib_api_status_t + osm_ts_send_mad(osm_ts_bind_info_t * p_bind, + osm_madw_t * const p_madw); + osm_ts_send_mad((osm_ts_bind_info_t *) osm_madw_req_p-> + p_bind, osm_madw_req_p->p_madw); +#endif + /* next one */ + osm_madw_req_p = + (osm_madw_req_t + *) (cl_list_remove_head(&retry_madw_p_list)); + } + } + + /* if the tmp_madw_p_list has elements - need to call the send_err_callback */ + madw_p = (osm_madw_t *) (cl_list_remove_head(&tmp_madw_p_list)); + while (madw_p != NULL) { + /* need to remove it from pool */ + + /* obtain the madw_p stored as the wrid in the send call */ + p_mad = osm_madw_get_mad_ptr(madw_p); + p_bind = madw_p->h_bind; + /* + Return any wrappers to the pool that may have been + pre-emptively allocated to handle a receive. + */ + if (madw_p->vend_wrap.p_resp_madw) { +#ifdef OSM_VENDOR_INTF_MTL + osm_mad_pool_put(p_bind->p_osm_pool, + madw_p->vend_wrap.p_resp_madw); +#else + osm_mad_pool_put(p_bind->p_osm_pool, + madw_p->vend_wrap.p_resp_madw); +#endif + madw_p->vend_wrap.p_resp_madw = NULL; + } + + /* invoke the CB */ + (*(osm_vend_mad_send_err_callback_t) + (p_bind->send_err_callback)) (p_bind->client_context, madw_p); + madw_p = (osm_madw_t *) (cl_list_remove_head(&tmp_madw_p_list)); + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + +} + +/* + * Construct and Initialize + */ + +void osm_transaction_mgr_init(IN osm_vendor_t * const p_vend) +{ + cl_status_t cl_status; + osm_transaction_mgr_t *trans_mgr_p; + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend->p_transaction_mgr == NULL); + + (osm_transaction_mgr_t *) p_vend->p_transaction_mgr = + (osm_transaction_mgr_t *) malloc(sizeof(osm_transaction_mgr_t)); + + trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; + + /* construct lock object */ + cl_spinlock_construct(&(trans_mgr_p->transaction_mgr_lock)); + CL_ASSERT(cl_spinlock_init(&(trans_mgr_p->transaction_mgr_lock)) == + CL_SUCCESS); + + /* initialize the qlist */ + trans_mgr_p->madw_reqs_list_p = + (cl_qlist_t *) malloc(sizeof(cl_qlist_t)); + cl_qlist_init(trans_mgr_p->madw_reqs_list_p); + + /* initialize the qmap */ + trans_mgr_p->madw_by_tid_map_p = + (cl_qmap_t *) malloc(sizeof(cl_qmap_t)); + cl_qmap_init(trans_mgr_p->madw_by_tid_map_p); + + /* create the timer used by the madw_req_list */ + cl_timer_construct(&(trans_mgr_p->madw_list_timer)); + + /* init the timer with timeout. */ + cl_status = cl_timer_init(&trans_mgr_p->madw_list_timer, + __osm_transaction_mgr_callback, p_vend); + + if (cl_status != CL_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_transaction_mgr_init : ERROR 1000: " + "Failed to initialize madw_reqs_list timer\n"); + } + OSM_LOG_EXIT(p_vend->p_log); +} + +void osm_transaction_mgr_destroy(IN osm_vendor_t * const p_vend) +{ + osm_transaction_mgr_t *trans_mgr_p; + cl_list_item_t *p_list_item; + cl_map_item_t *p_map_item; + osm_madw_req_t *osm_madw_req_p; + + OSM_LOG_ENTER(p_vend->p_log); + + trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; + + if (p_vend->p_transaction_mgr != NULL) { + /* we need to get a lock */ + cl_spinlock_acquire(&trans_mgr_p->transaction_mgr_lock); + + /* go over all the items in the list and remove them */ + p_list_item = + cl_qlist_remove_head(trans_mgr_p->madw_reqs_list_p); + while (p_list_item != + cl_qlist_end(trans_mgr_p->madw_reqs_list_p)) { + osm_madw_req_p = (osm_madw_req_t *) p_list_item; + + if (osm_madw_req_p->p_madw->p_mad) + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_destroy: " + "Found outstanding MADW:%p TID:<0x%" + PRIx64 ">.\n", osm_madw_req_p->p_madw, + osm_madw_req_p->p_madw->p_mad-> + trans_id); + else + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_destroy: " + "Found outstanding MADW:%p TID:UNDEFINED.\n", + osm_madw_req_p->p_madw); + + /* each item - remove it from the map */ + p_map_item = &(osm_madw_req_p->map_item); + cl_qmap_remove_item(trans_mgr_p->madw_by_tid_map_p, + p_map_item); + /* free the item */ + free(osm_madw_req_p); + p_list_item = + cl_qlist_remove_head(trans_mgr_p->madw_reqs_list_p); + } + /* free the qlist and qmap */ + free(trans_mgr_p->madw_reqs_list_p); + free(trans_mgr_p->madw_by_tid_map_p); + /* reliease and destroy the lock */ + cl_spinlock_release(&trans_mgr_p->transaction_mgr_lock); + cl_spinlock_destroy(&(trans_mgr_p->transaction_mgr_lock)); + /* destroy the timer */ + cl_timer_trim(&trans_mgr_p->madw_list_timer, 1); + cl_timer_destroy(&trans_mgr_p->madw_list_timer); + /* free the transaction_manager object */ + free(trans_mgr_p); + trans_mgr_p = NULL; + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +ib_api_status_t +osm_transaction_mgr_insert_madw(IN osm_bind_handle_t * const p_bind, + IN osm_madw_t * p_madw) +{ +#ifdef OSM_VENDOR_INTF_MTL + osm_vendor_t *const p_vend = ((osm_mtl_bind_info_t *) p_bind)->p_vend; +#else + osm_vendor_t *const p_vend = ((osm_ts_bind_info_t *) p_bind)->p_vend; +#endif + osm_transaction_mgr_t *trans_mgr_p; + osm_madw_req_t *osm_madw_req_p; + uint64_t timeout; + uint64_t waking_time; + cl_status_t cl_status; + uint64_t key; + const ib_mad_t *mad_p = p_madw->p_mad; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(mad_p); + + trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; + + timeout = (uint64_t) (p_vend->timeout) * 1000; /* change the miliseconds value of timeout to microseconds. */ + waking_time = timeout + cl_get_time_stamp(); + + osm_madw_req_p = (osm_madw_req_t *) malloc(sizeof(osm_madw_req_t)); + + osm_madw_req_p->p_madw = p_madw; + osm_madw_req_p->waking_time = waking_time; + osm_madw_req_p->retry_cnt = OSM_DEFAULT_RETRY_COUNT; + osm_madw_req_p->p_bind = p_bind; + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_insert_madw: " + "Inserting MADW:%p with waking_time: <0x%" PRIx64 "> TID:<0x%" + PRIx64 ">.\n", p_madw, waking_time, p_madw->p_mad->trans_id); + + /* Get the lock on the manager */ + cl_spinlock_acquire(&(trans_mgr_p->transaction_mgr_lock)); + /* If the list is empty - need to start the timer with timer of timeout (in miliseconds) */ + if (cl_is_qlist_empty(trans_mgr_p->madw_reqs_list_p)) { + /* stop the timer if it is running */ + cl_timer_stop(&trans_mgr_p->madw_list_timer); + + /* start the timer to the timeout (in miliseconds) */ + cl_status = cl_timer_start(&trans_mgr_p->madw_list_timer, + p_vend->timeout); + if (cl_status != CL_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_transaction_mgr_insert_madw : ERROR 1000: " + "Failed to start timer\n"); + } + } + + /* insert the object to the qlist and the qmap */ + cl_qlist_insert_tail(trans_mgr_p->madw_reqs_list_p, + &(osm_madw_req_p->list_item)); + /* get the key */ + key = (uint64_t) mad_p->trans_id; + cl_qmap_insert(trans_mgr_p->madw_by_tid_map_p, key, + &(osm_madw_req_p->map_item)); + cl_spinlock_release(&trans_mgr_p->transaction_mgr_lock); + + OSM_LOG_EXIT(p_vend->p_log); + + return (IB_SUCCESS); +} + +ib_api_status_t +osm_transaction_mgr_erase_madw(IN osm_vendor_t * const p_vend, + IN ib_mad_t * p_mad) +{ + osm_transaction_mgr_t *trans_mgr_p; + osm_madw_req_t *osm_madw_req_p; + uint64_t key; + cl_map_item_t *p_map_item; + OSM_LOG_ENTER(p_vend->p_log); + + trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; + + key = (uint64_t) p_mad->trans_id; + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_erase_madw: " + "Removing TID:<0x%" PRIx64 ">.\n", p_mad->trans_id); + + cl_spinlock_acquire(&trans_mgr_p->transaction_mgr_lock); + p_map_item = cl_qmap_get(trans_mgr_p->madw_by_tid_map_p, key); + if (p_map_item != cl_qmap_end(trans_mgr_p->madw_by_tid_map_p)) { + /* we found such an item. */ + /* get the osm_madw_req_p */ + osm_madw_req_p = + PARENT_STRUCT(p_map_item, osm_madw_req_t, map_item); + + /* remove the item from the qlist */ + cl_qlist_remove_item(trans_mgr_p->madw_reqs_list_p, + &(osm_madw_req_p->list_item)); + /* remove the item from the qmap */ + cl_qmap_remove_item(trans_mgr_p->madw_by_tid_map_p, + &(osm_madw_req_p->map_item)); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_erase_madw: " + "Removed TID:<0x%" PRIx64 ">.\n", p_mad->trans_id); + + /* free the item */ + free(osm_madw_req_p); + } else { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_erase_madw: " + "osm_transaction_mgr_erase_madw:<0x%" PRIx64 + "> NOT FOUND.\n", p_mad->trans_id); + } + cl_spinlock_release(&trans_mgr_p->transaction_mgr_lock); + OSM_LOG_EXIT(p_vend->p_log); + + return (IB_SUCCESS); +} + +ib_api_status_t +osm_transaction_mgr_get_madw_for_tid(IN osm_vendor_t * const p_vend, + IN ib_mad_t * const p_mad, + OUT osm_madw_t ** req_madw_p) +{ + osm_transaction_mgr_t *trans_mgr_p; + osm_madw_req_t *osm_madw_req_p; + cl_map_item_t *p_map_item; + uint64_t key; + OSM_LOG_ENTER(p_vend->p_log); + + trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; + + *req_madw_p = NULL; + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_get_madw_for_tid: " + "Looking for TID:<0x%" PRIx64 ">.\n", p_mad->trans_id); + + key = (uint64_t) p_mad->trans_id; + cl_spinlock_acquire(&(trans_mgr_p->transaction_mgr_lock)); + p_map_item = cl_qmap_get(trans_mgr_p->madw_by_tid_map_p, key); + if (p_map_item != cl_qmap_end(trans_mgr_p->madw_by_tid_map_p)) { + /* we found such an item. */ + /* get the osm_madw_req_p */ + osm_madw_req_p = + PARENT_STRUCT(p_map_item, osm_madw_req_t, map_item); + + /* Since the Transaction was looked up and provided for */ + /* processing we retire it */ + cl_qlist_remove_item(trans_mgr_p->madw_reqs_list_p, + &(osm_madw_req_p->list_item)); + /* remove the item from the qmap */ + cl_qmap_remove_item(trans_mgr_p->madw_by_tid_map_p, + &(osm_madw_req_p->map_item)); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_get_madw_for_tid: " + "Removed TID:<0x%" PRIx64 ">.\n", p_mad->trans_id); + + *req_madw_p = osm_madw_req_p->p_madw; + } + + cl_spinlock_release(&(trans_mgr_p->transaction_mgr_lock)); + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_transaction_mgr_get_madw_for_tid: " + "Got MADW:%p.\n", *req_madw_p); + OSM_LOG_EXIT(p_vend->p_log); + return (IB_SUCCESS); +} + +#endif diff --git a/libvendor/osm_vendor_test.c b/libvendor/osm_vendor_test.c new file mode 100644 index 0000000..9f7b104 --- /dev/null +++ b/libvendor/osm_vendor_test.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of vendor specific transport interface. + * This is the "Test" vendor which allows compilation and some + * testing without a real vendor interface. + * These objects are part of the opensm family of objects. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef OSM_VENDOR_INTF_TEST + +#include +#include +#include +#include +#include + +void osm_vendor_construct(IN osm_vendor_t * const p_vend) +{ + memset(p_vend, 0, sizeof(*p_vend)); +} + +void osm_vendor_destroy(IN osm_vendor_t * const p_vend) +{ + UNUSED_PARAM(p_vend); +} + +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + CL_ASSERT(pp_vend); + + osm_vendor_destroy(*pp_vend); + free(*pp_vend); + *pp_vend = NULL; +} + +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + OSM_LOG_ENTER(p_log); + + CL_ASSERT(p_vend); + CL_ASSERT(p_log); + + p_vend->p_log = p_log; + p_vend->timeout = timeout; + OSM_LOG_EXIT(p_log); + return (IB_SUCCESS); +} + +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + ib_api_status_t status; + osm_vendor_t *p_vend; + OSM_LOG_ENTER(p_log); + + CL_ASSERT(p_log); + + p_vend = malloc(sizeof(*p_vend)); + if (p_vend != NULL) { + memset(p_vend, 0, sizeof(*p_vend)); + + status = osm_vendor_init(p_vend, p_log, timeout); + if (status != IB_SUCCESS) { + osm_vendor_delete(&p_vend); + } + } + + OSM_LOG_EXIT(p_log); + return (p_vend); +} + +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t size, + IN osm_vend_wrap_t * const p_vend_wrap) +{ + osm_vendor_t *p_vend; + ib_mad_t *p_mad; + OSM_LOG_ENTER(h_bind->p_vend->p_log); + + UNUSED_PARAM(p_vend_wrap); + + p_vend = h_bind->p_vend; + + /* + Simply malloc the MAD off the heap. + */ + p_mad = (ib_mad_t *) malloc(size); + + osm_log(p_vend->p_log, OSM_LOG_VERBOSE, + "osm_vendor_get: " "MAD %p.\n", p_mad); + + if (p_mad) + memset(p_mad, 0, size); + + OSM_LOG_EXIT(p_vend->p_log); + return (p_mad); +} + +void +osm_vendor_put(IN osm_bind_handle_t h_bind, + IN osm_vend_wrap_t * const p_vend_wrap, + IN ib_mad_t * const p_mad) +{ + osm_vendor_t *p_vend; + + OSM_LOG_ENTER(h_bind->p_vend->p_log); + + UNUSED_PARAM(p_vend_wrap); + + p_vend = h_bind->p_vend; + + osm_log(p_vend->p_log, OSM_LOG_VERBOSE, + "osm_vendor_put: " "MAD %p.\n", p_mad); + + /* + Return the MAD to the heap. + */ + free(p_mad); + + OSM_LOG_EXIT(p_vend->p_log); +} + +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_vend_wrap_t * const p_vend_wrap, + IN osm_mad_addr_t * const p_mad_addr, + IN ib_mad_t * const p_mad, + IN void *transaction_context, IN boolean_t const resp_expected) +{ + osm_vendor_t *p_vend = h_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + UNUSED_PARAM(p_vend_wrap); + UNUSED_PARAM(p_mad_addr); + UNUSED_PARAM(transaction_context); + UNUSED_PARAM(resp_expected); + + osm_log(p_vend->p_log, OSM_LOG_VERBOSE, + "osm_vendor_send: " "MAD %p.\n", p_mad); + + OSM_LOG_EXIT(p_vend->p_log); + return (IB_SUCCESS); +} + +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_bind_info, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN void *context) +{ + osm_bind_handle_t h_bind; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vend); + CL_ASSERT(p_bind_info); + CL_ASSERT(p_mad_pool); + CL_ASSERT(mad_recv_callback); + CL_ASSERT(context); + + UNUSED_PARAM(p_vend); + UNUSED_PARAM(p_mad_pool); + UNUSED_PARAM(mad_recv_callback); + UNUSED_PARAM(context); + + h_bind = (osm_bind_handle_t) malloc(sizeof(*h_bind)); + if (h_bind != NULL) { + memset(h_bind, 0, sizeof(*h_bind)); + h_bind->p_vend = p_vend; + h_bind->port_guid = p_bind_info->port_guid; + h_bind->mad_class = p_bind_info->mad_class; + h_bind->class_version = p_bind_info->class_version; + h_bind->is_responder = p_bind_info->is_responder; + h_bind->is_trap_processor = p_bind_info->is_trap_processor; + h_bind->is_report_processor = p_bind_info->is_report_processor; + h_bind->send_q_size = p_bind_info->send_q_size; + h_bind->recv_q_size = p_bind_info->recv_q_size; + } + + OSM_LOG_EXIT(p_vend->p_log); + return (h_bind); +} + +ib_api_status_t +osm_vendor_get_ports(IN osm_vendor_t * const p_vend, + IN ib_net64_t * const p_guids, + IN uint32_t * const num_guids) +{ + OSM_LOG_ENTER(p_vend->p_log); + + *p_guids = CL_NTOH64(0x0000000000001234); + *num_guids = 1; + + OSM_LOG_EXIT(p_vend->p_log); + return (IB_SUCCESS); +} + +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) +{ + osm_vendor_t *p_vend = h_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + OSM_LOG_EXIT(p_vend->p_log); + + return (IB_SUCCESS); +} + +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level) +{ + +} + +#endif /* OSM_VENDOR_INTF_TEST */ diff --git a/libvendor/osm_vendor_ts.c b/libvendor/osm_vendor_ts.c new file mode 100644 index 0000000..f4f1df1 --- /dev/null +++ b/libvendor/osm_vendor_ts.c @@ -0,0 +1,897 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#undef __init +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include + +/* + Since a race can accure on requests. Meaning - a response is received before + the send_callback is called - we will save both the madw_p and the fact + whether or not it is a response. A race can occure only on requests that did + not fail, and then the madw_p will be put back in the pool before the + callback. +*/ +uint64_t __osm_set_wrid_by_p_madw(IN osm_madw_t * p_madw) +{ + uint64_t wrid = 0; + + CL_ASSERT(p_madw->p_mad); + + memcpy(&wrid, &p_madw, sizeof(osm_madw_t *)); + wrid = (wrid << 1) | + ib_mad_is_response(p_madw->p_mad); + return wrid; +} + +void +__osm_set_p_madw_and_resp_by_wrid(IN uint64_t wrid, + OUT uint8_t * is_resp, + OUT osm_madw_t ** pp_madw) +{ + *is_resp = wrid & 0x0000000000000001; + wrid = wrid >> 1; + memcpy(pp_madw, &wrid, sizeof(osm_madw_t *)); +} + +/********************************************************************** + * TS MAD to OSM ADDRESS VECTOR + **********************************************************************/ +void +__osm_ts_conv_mad_rcv_desc_to_osm_addr(IN osm_vendor_t * const p_vend, + IN struct ib_mad *p_mad, + IN uint8_t is_smi, + OUT osm_mad_addr_t * p_mad_addr) +{ + p_mad_addr->dest_lid = cl_hton16(p_mad->slid); + p_mad_addr->static_rate = 0; /* HACK - we do not know the rate ! */ + p_mad_addr->path_bits = 0; /* HACK - no way to know in TS */ + if (is_smi) { + /* SMI */ + p_mad_addr->addr_type.smi.source_lid = cl_hton16(p_mad->slid); + p_mad_addr->addr_type.smi.port_num = p_mad->port; + } else { + /* GSI */ + p_mad_addr->addr_type.gsi.remote_qp = p_mad->sqpn; + p_mad_addr->addr_type.gsi.remote_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + p_mad_addr->addr_type.gsi.pkey_ix = p_mad->pkey_index; + p_mad_addr->addr_type.gsi.service_level = 0; /* HACK no way to know */ + + p_mad_addr->addr_type.gsi.global_route = FALSE; /* HACK no way to know */ + /* copy the GRH data if relevant */ + /* + if (p_mad_addr->addr_type.gsi.global_route) + { + p_mad_addr->addr_type.gsi.grh_info.ver_class_flow = + ib_grh_set_ver_class_flow(p_rcv_desc->grh.IP_version, + p_rcv_desc->grh.traffic_class, + p_rcv_desc->grh.flow_label); + p_mad_addr->addr_type.gsi.grh_info.hop_limit = p_rcv_desc->grh.hop_limit; + memcpy(&p_mad_addr->addr_type.gsi.grh_info.src_gid.raw, + &p_rcv_desc->grh.sgid, sizeof(ib_net64_t)); + memcpy(&p_mad_addr->addr_type.gsi.grh_info.dest_gid.raw, + p_rcv_desc->grh.dgid, sizeof(ib_net64_t)); + } + */ + } +} + +/********************************************************************** + * OSM ADDR VECTOR TO TS MAD: + **********************************************************************/ +void +__osm_ts_conv_osm_addr_to_ts_addr(IN osm_mad_addr_t * p_mad_addr, + IN uint8_t is_smi, OUT struct ib_mad *p_mad) +{ + + /* For global destination or Multicast address: */ + p_mad->dlid = cl_ntoh16(p_mad_addr->dest_lid); + p_mad->sl = 0; + if (is_smi) { + p_mad->sqpn = 0; + p_mad->dqpn = 0; + } else { + p_mad->sqpn = 1; + p_mad->dqpn = p_mad_addr->addr_type.gsi.remote_qp; + } +} + +void __osm_vendor_clear_sm(IN osm_bind_handle_t h_bind) +{ + osm_ts_bind_info_t *p_bind = (osm_ts_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + VAPI_ret_t status; + VAPI_hca_attr_t attr_mod; + VAPI_hca_attr_mask_t attr_mask; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&attr_mod, 0, sizeof(attr_mod)); + memset(&attr_mask, 0, sizeof(attr_mask)); + + attr_mod.is_sm = FALSE; + attr_mask = HCA_ATTR_IS_SM; + + status = + VAPI_modify_hca_attr(p_bind->hca_hndl, p_bind->port_num, &attr_mod, + &attr_mask); + if (status != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_clear_sm: ERR 5021: " + "Unable set 'IS_SM' bit in port attributes (%d).\n", + status); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** + * ANY CONSTRUCTION OF THE osm_vendor_t OBJECT + **********************************************************************/ +void osm_vendor_construct(IN osm_vendor_t * const p_vend) +{ + memset(p_vend, 0, sizeof(*p_vend)); + cl_thread_construct(&(p_vend->smi_bind.poller)); + cl_thread_construct(&(p_vend->gsi_bind.poller)); +} + +/********************************************************************** + * DEALOCATE osm_vendor_t + **********************************************************************/ +void osm_vendor_destroy(IN osm_vendor_t * const p_vend) +{ + OSM_LOG_ENTER(p_vend->p_log); + osm_transaction_mgr_destroy(p_vend); + + /* Destroy the poller threads */ + /* HACK: can you destroy an un-initialized thread ? */ + pthread_cancel(p_vend->smi_bind.poller.osd.id); + pthread_cancel(p_vend->gsi_bind.poller.osd.id); + cl_thread_destroy(&(p_vend->smi_bind.poller)); + cl_thread_destroy(&(p_vend->gsi_bind.poller)); + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** +DEALLOCATE A POINTER TO osm_vendor_t +**********************************************************************/ +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + CL_ASSERT(pp_vend); + + osm_vendor_destroy(*pp_vend); + free(*pp_vend); + *pp_vend = NULL; +} + +/********************************************************************** + Initializes the vendor: +**********************************************************************/ + +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + p_vend->p_log = p_log; + p_vend->p_transaction_mgr = NULL; + osm_transaction_mgr_init(p_vend); + p_vend->timeout = timeout; + + /* we use the file handle to track the binding */ + p_vend->smi_bind.ul_dev_fd = -1; + p_vend->gsi_bind.ul_dev_fd = -1; + + OSM_LOG_EXIT(p_log); + return (status); +} + +/********************************************************************** + * Create and Initialize osm_vendor_t Object + **********************************************************************/ +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + ib_api_status_t status; + osm_vendor_t *p_vend; + + OSM_LOG_ENTER(p_log); + + CL_ASSERT(p_log); + + p_vend = malloc(sizeof(*p_vend)); + if (p_vend != NULL) { + memset(p_vend, 0, sizeof(*p_vend)); + + status = osm_vendor_init(p_vend, p_log, timeout); + if (status != IB_SUCCESS) { + osm_vendor_delete(&p_vend); + } + } else { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_new: ERR 5007: " + "Fail to allocate vendor object.\n"); + } + + OSM_LOG_EXIT(p_log); + return (p_vend); +} + +/********************************************************************** + * TS RCV Thread callback + * HACK: - we need to make this support arbitrary size mads. + **********************************************************************/ +void +__osm_ts_rcv_callback(IN osm_ts_bind_info_t * p_bind, + IN osm_mad_addr_t * p_mad_addr, + IN uint32_t mad_size, IN void *p_mad) +{ + ib_api_status_t status; + osm_madw_t *p_req_madw = NULL; + osm_madw_t *p_madw; + osm_vend_wrap_t *p_new_vw; + ib_mad_t *p_mad_buf; + osm_log_t *const p_log = p_bind->p_vend->p_log; + + OSM_LOG_ENTER(p_log); + + /* if it is a response MAD we mustbe able to get the request */ + if (ib_mad_is_response((ib_mad_t *) p_mad)) { + /* can we find a matching madw by this payload TID */ + status = + osm_transaction_mgr_get_madw_for_tid(p_bind->p_vend, + (ib_mad_t *) p_mad, + &p_req_madw); + if (status != IB_SUCCESS) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_ts_rcv_callback: ERR 5008: " + "Error obtaining request madw by TID (%d).\n", + status); + p_req_madw = NULL; + } + + if (p_req_madw == NULL) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_ts_rcv_callback: ERR 5009: " + "Fail to obtain request madw for receined MAD. Aborting CB.\n"); + goto Exit; + } + } + + /* do we have a request ??? */ + if (p_req_madw == NULL) { + + /* if not - get new osm_madw and arrange it. */ + /* create the new madw in the pool */ + p_madw = osm_mad_pool_get(p_bind->p_osm_pool, + (osm_bind_handle_t) p_bind, + mad_size, p_mad_addr); + if (p_madw == NULL) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_ts_rcv_callback: ERR 5010: " + "Error request for a new madw.\n"); + goto Exit; + } + /* HACK: we cust to avoid the const ??? */ + p_mad_buf = (void *)p_madw->p_mad; + } else { + /* we have the madw defined during the send and stored in the vend_wrap */ + /* we need to make sure the wrapper is correctly init there */ + CL_ASSERT(p_req_madw->vend_wrap.p_resp_madw != 0); + p_madw = p_req_madw->vend_wrap.p_resp_madw; + + CL_ASSERT(p_madw->h_bind); + p_mad_buf = + osm_vendor_get(p_madw->h_bind, mad_size, + &p_madw->vend_wrap); + + if (p_mad_buf == NULL) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_ts_rcv_callback: ERR 5011: " + "Unable to acquire wire MAD.\n"); + + goto Exit; + } + + /* + Finally, attach the wire MAD to this wrapper. + */ + osm_madw_set_mad(p_madw, p_mad_buf); + } + + /* init some fields of the vendor wrapper */ + p_new_vw = osm_madw_get_vend_ptr(p_madw); + p_new_vw->h_bind = p_bind; + p_new_vw->size = mad_size; + p_new_vw->p_resp_madw = NULL; + p_new_vw->p_mad_buf = p_mad_buf; + + memcpy(p_new_vw->p_mad_buf, p_mad, mad_size); + + /* attach the buffer to the wrapper */ + p_madw->p_mad = p_mad_buf; + + /* we can also make sure we marked the size and bind on the returned madw */ + p_madw->h_bind = p_new_vw->h_bind; + + /* call the CB */ + (*(osm_vend_mad_recv_callback_t) p_bind->rcv_callback) + (p_madw, p_bind->client_context, p_req_madw); + +Exit: + OSM_LOG_EXIT(p_log); +} + +/********************************************************************** + * TS Send callback : invoked after each send + * + **********************************************************************/ +void +__osm_ts_send_callback(IN osm_ts_bind_info_t * bind_info_p, + IN boolean_t is_resp, + IN osm_madw_t * madw_p, IN IB_comp_status_t status) +{ + osm_log_t *const p_log = bind_info_p->p_vend->p_log; + osm_vend_wrap_t *p_vw; + + OSM_LOG_ENTER(p_log); + + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_ts_send_callback: INFO 1008: " + "Handling Send of MADW:%p Is Resp:%d.\n", madw_p, is_resp); + + /* we need to handle requests and responses differently */ + if (is_resp) { + if (status != IB_COMP_SUCCESS) { + osm_log(p_log, OSM_LOG_ERROR, + "__osm_ts_send_callback: ERR 5012: " + "Error Sending Response MADW:%p.\n", madw_p); + } else { + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_ts_send_callback: DBG 1008: " + "Completed Sending Response MADW:%p.\n", + madw_p); + } + + /* if we are a response - we need to clean it up */ + osm_mad_pool_put(bind_info_p->p_osm_pool, madw_p); + } else { + + /* this call back is invoked on completion of send - error or not */ + if (status != IB_COMP_SUCCESS) { + + osm_log(p_log, OSM_LOG_ERROR, + "__osm_ts_send_callback: ERR 5013: " + "Received an Error from IB_MGT Send (%d).\n", + status); + + p_vw = osm_madw_get_vend_ptr(madw_p); + CL_ASSERT(p_vw); + + /* + Return any wrappers to the pool that may have been + pre-emptively allocated to handle a receive. + */ + if (p_vw->p_resp_madw) { + osm_mad_pool_put(bind_info_p->p_osm_pool, + p_vw->p_resp_madw); + p_vw->p_resp_madw = NULL; + } + + /* invoke the CB */ + (*(osm_vend_mad_send_err_callback_t) bind_info_p-> + send_err_callback) + (bind_info_p->client_context, madw_p); + } else { + /* successful request send - do nothing - the response will need the + out mad */ + osm_log(p_log, OSM_LOG_DEBUG, + "__osm_ts_send_callback: DBG 1008: " + "Completed Sending Request MADW:%p.\n", madw_p); + } + } + + OSM_LOG_EXIT(p_log); +} + +/********************************************************************** + * Poller thread: + * Always receive 256byte mads from the devcie file + **********************************************************************/ +void __osm_vendor_ts_poller(IN void *p_ptr) +{ + int ts_ret_code; + struct ib_mad mad; + osm_mad_addr_t mad_addr; + osm_ts_bind_info_t *const p_bind = (osm_ts_bind_info_t *) p_ptr; + + OSM_LOG_ENTER(p_bind->p_vend->p_log); + /* we set the type of cancelation for this thread */ + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + + while (1) { + /* we read one mad at a time and pass it to the read callback function */ + ts_ret_code = read(p_bind->ul_dev_fd, &mad, sizeof(mad)); + if (ts_ret_code != sizeof(mad)) { + osm_log(p_bind->p_vend->p_log, OSM_LOG_ERROR, + "__osm_vendor_ts_poller: ERR 5003: " + "error with read, bytes = %d, errno = %d\n", + ts_ret_code, errno); + } else { + osm_log(p_bind->p_vend->p_log, OSM_LOG_DEBUG, + "__osm_vendor_ts_poller: " + "MAD QPN:%d SLID:0x%04x class:0x%02x " + "__osm_vendor_ts_poller:0x%02x attr:0x%04x status:0x%04x " + "__osm_vendor_ts_poller:0x%016" PRIx64 "\n", + cl_ntoh32(mad.dqpn), + cl_ntoh16(mad.slid), + mad.mgmt_class, + mad.r_method, + cl_ntoh16(mad.attribute_id), + cl_ntoh16(mad.status), + cl_ntoh64(mad.transaction_id)); + + /* first arrange an address */ + __osm_ts_conv_mad_rcv_desc_to_osm_addr(p_bind->p_vend, + &mad, + (((ib_mad_t *) & + mad)-> + mgmt_class == + IB_MCLASS_SUBN_LID) + || + (((ib_mad_t *) & + mad)-> + mgmt_class == + IB_MCLASS_SUBN_DIR), + &mad_addr); + + /* call the receiver callback */ + /* HACK: this should be replaced with a call to the RMPP Assembly ... */ + __osm_ts_rcv_callback(p_bind, &mad_addr, 256, &mad); + } + } + + OSM_LOG_EXIT(p_bind->p_vend->p_log); +} + +/********************************************************************** + * BINDs a callback (rcv and send error) for a given class and method + * defined by the given: osm_bind_info_t + **********************************************************************/ +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_user_bind, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN osm_vend_mad_send_err_callback_t send_err_callback, + IN void *context) +{ + ib_net64_t port_guid; + osm_ts_bind_info_t *p_bind = NULL; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_id_t hca_id; + uint32_t port_num; + ib_api_status_t status; + int device_fd; + char device_file[16]; + osm_ts_user_mad_filter filter; + int ts_ioctl_ret; + int qpn; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_mad_pool); + + port_guid = p_user_bind->port_guid; + + osm_log(p_vend->p_log, OSM_LOG_INFO, + "osm_vendor_bind: " + "Binding to port 0x%" PRIx64 ".\n", cl_ntoh64(port_guid)); + + switch (p_user_bind->mad_class) { + case IB_MCLASS_SUBN_LID: + case IB_MCLASS_SUBN_DIR: + p_bind = &(p_vend->smi_bind); + qpn = 0; + break; + + case IB_MCLASS_SUBN_ADM: + default: + p_bind = &(p_vend->gsi_bind); + qpn = 1; + break; + } + + /* Make sure we did not previously opened the file */ + if (p_bind->ul_dev_fd >= 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 5004: " + "Already binded to port %u\n", p_bind->port_num); + goto Exit; + } + + /* + We need to figure out what is the TS file name to attach to. + I guess it is following the index of the port in the table of + ports. + */ + + /* obtain the hca name and port num from the guid */ + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_bind: " + "Finding CA and Port that owns port guid 0x%" PRIx64 ".\n", + cl_ntoh64(port_guid)); + status = + osm_vendor_get_guid_ca_and_port(p_vend, port_guid, &hca_hndl, + &hca_id, &port_num); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 5005: " + "Fail to find port number of port guid:0x%016" PRIx64 + "\n", port_guid); + goto Exit; + } + + /* the file name is just /dev/ts_ua0: */ + strcpy(device_file, "/dev/ts_ua0"); + + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: " "Opening TS UL dev file:%s\n", device_file); + + /* Open the file ... */ + device_fd = open(device_file, O_RDWR); + if (device_fd < 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 5006: " + "Fail to open TS UL dev file:%s\n", device_file); + goto Exit; + } + + /* track this bind request info */ + p_bind->ul_dev_fd = device_fd; + p_bind->port_num = port_num; + p_bind->p_vend = p_vend; + p_bind->client_context = context; + p_bind->rcv_callback = mad_recv_callback; + p_bind->send_err_callback = send_err_callback; + p_bind->p_osm_pool = p_mad_pool; + p_bind->hca_hndl = hca_hndl; + + /* + * Create the MAD filter on this file handle. + */ + filter.port = port_num; + + filter.qpn = qpn; + filter.mgmt_class = p_user_bind->mad_class; + filter.direction = TS_IB_MAD_DIRECTION_IN; + filter.mask = + TS_IB_MAD_FILTER_DIRECTION | + TS_IB_MAD_FILTER_PORT | + TS_IB_MAD_FILTER_QPN | TS_IB_MAD_FILTER_MGMT_CLASS; + + ts_ioctl_ret = ioctl(device_fd, TS_IB_IOCSMADFILTADD, &filter); + if (ts_ioctl_ret < 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_bind: ERR 5014: " + "Fail to register MAD filter with err:%u\n", + ts_ioctl_ret); + goto Exit; + } + + /* Initialize the listener thread for this port */ + status = cl_thread_init(&p_bind->poller, + __osm_vendor_ts_poller, p_bind, + "osm ts poller"); + if (status != IB_SUCCESS) + goto Exit; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return ((osm_bind_handle_t) p_bind); +} + +/********************************************************************** +Get a mad from the lower level. +The osm_vend_wrap_t is a wrapper used to connect the mad to the response. +**********************************************************************/ +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * const p_vw) +{ + ib_mad_t *p_mad; + osm_ts_bind_info_t *p_bind = (osm_ts_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + + p_vw->size = mad_size; + + /* allocate it */ + p_mad = (ib_mad_t *) malloc(p_vw->size); + if (p_mad == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_get: ERR 5022: " + "Error Obtaining MAD buffer.\n"); + goto Exit; + } + + memset(p_mad, 0, p_vw->size); + + /* track locally */ + p_vw->p_mad_buf = p_mad; + p_vw->h_bind = h_bind; + p_vw->p_resp_madw = NULL; + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_get: " + "Acquired MAD %p, size = %u.\n", p_mad, p_vw->size); + } + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (p_mad); +} + +/********************************************************************** + * Return a MAD by providing it's wrapper object. + **********************************************************************/ +void +osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) +{ + osm_ts_bind_info_t *p_bind = (osm_ts_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_vend->p_log); + + CL_ASSERT(p_vw); + CL_ASSERT(p_vw->p_mad_buf); + + if (osm_log_get_level(p_vend->p_log) >= OSM_LOG_DEBUG) { + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_put: " "Retiring MAD %p.\n", + p_vw->p_mad_buf); + } + + /* + * We moved the removal of the transaction to immediatly after + * it was looked up. + */ + + /* free the mad but the wrapper is part of the madw object */ + free(p_vw->p_mad_buf); + p_vw->p_mad_buf = NULL; + p_madw = PARENT_STRUCT(p_vw, osm_madw_t, vend_wrap); + p_madw->p_mad = NULL; + + OSM_LOG_EXIT(p_vend->p_log); +} + +/********************************************************************** +Actually Send a MAD + +MADs are buffers of type: struct ib_mad - so they are limited by size. +This is for internal use by osm_vendor_send and the transaction mgr +retry too. +**********************************************************************/ +ib_api_status_t +osm_ts_send_mad(IN osm_ts_bind_info_t * p_bind, IN osm_madw_t * const p_madw) +{ + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_mad_addr_t *const p_mad_addr = osm_madw_get_mad_addr_ptr(p_madw); + ib_mad_t *const p_mad = osm_madw_get_mad_ptr(p_madw); + struct ib_mad ts_mad; + int ret; + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + /* + * Copy the MAD over to the sent mad + */ + memcpy(&ts_mad, p_mad, 256); + + /* + * For all sends other than directed route SM MADs, + * acquire an address vector for the destination. + */ + if (p_mad->mgmt_class != IB_MCLASS_SUBN_DIR) { + __osm_ts_conv_osm_addr_to_ts_addr(p_mad_addr, + p_mad->mgmt_class == + IB_MCLASS_SUBN_LID, &ts_mad); + } else { + /* is a directed route - we need to construct a permissive address */ + /* we do not need port number since it is part of the mad_hndl */ + ts_mad.dlid = IB_LID_PERMISSIVE; + ts_mad.slid = IB_LID_PERMISSIVE; + } + if ((p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) || + (p_mad->mgmt_class == IB_MCLASS_SUBN_LID)) { + ts_mad.sqpn = 0; + ts_mad.dqpn = 0; + } else { + ts_mad.sqpn = 1; + ts_mad.dqpn = 1; + } + ts_mad.port = p_bind->port_num; + + /* send it */ + ret = write(p_bind->ul_dev_fd, &ts_mad, sizeof(ts_mad)); + + if (ret != sizeof(ts_mad)) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_ts_send_mad: ERR 5026: " + "Error sending mad (%d).\n", ret); + status = IB_ERROR; + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** +Send a MAD through. + +What is unclear to me is the need for the setting of all the MAD Wrapper +fields. Seems like the OSM uses these values during it's processing... +**********************************************************************/ +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_madw_t * const p_madw, IN boolean_t const resp_expected) +{ + osm_ts_bind_info_t *p_bind = (osm_ts_bind_info_t *) h_bind; + osm_vendor_t *const p_vend = p_bind->p_vend; + osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); + ib_api_status_t status; + + OSM_LOG_ENTER(p_vend->p_log); + + /* + * If a response is expected to this MAD, then preallocate + * a mad wrapper to contain the wire MAD received in the + * response. Allocating a wrapper here allows for easier + * failure paths than after we already received the wire mad. + */ + if (resp_expected == TRUE) { + /* we track it in the vendor wrapper */ + p_vw->p_resp_madw = + osm_mad_pool_get_wrapper_raw(p_bind->p_osm_pool); + if (p_vw->p_resp_madw == NULL) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 5024: " + "Unable to allocate MAD wrapper.\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + /* put some minimal info on that wrapper */ + ((osm_madw_t *) (p_vw->p_resp_madw))->h_bind = h_bind; + + /* we also want to track it in the TID based map */ + status = osm_transaction_mgr_insert_madw((osm_bind_handle_t *) + p_bind, p_madw); + if (status != IB_SUCCESS) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_send: ERR 5025: " + "Error inserting request madw by TID (%d).\n", + status); + } + } else + p_vw->p_resp_madw = NULL; + + /* do the actual send */ + /* HACK: to be replaced by call to RMPP Segmentation */ + status = osm_ts_send_mad(p_bind, p_madw); + + /* we do not get an asycn callback so call it ourselves */ + /* this will handle all cleanup if neccessary */ + __osm_ts_send_callback(p_bind, !resp_expected, p_madw, status); + +Exit: + OSM_LOG_EXIT(p_vend->p_log); + return (status); +} + +/********************************************************************** + * the idea here is to change the content of the bind such that it + * will hold the local address used for sending directed route by the SMA. + **********************************************************************/ +ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) +{ + osm_vendor_t *p_vend = ((osm_ts_bind_info_t *) h_bind)->p_vend; + + OSM_LOG_ENTER(p_vend->p_log); + + osm_log(p_vend->p_log, OSM_LOG_DEBUG, + "osm_vendor_local_lid_change: DEBUG 2202: " "Change of LID.\n"); + + OSM_LOG_EXIT(p_vend->p_log); + + return (IB_SUCCESS); +} + +void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) +{ + osm_ts_bind_info_t *p_bind = (osm_ts_bind_info_t *) h_bind; + osm_vendor_t *p_vend = p_bind->p_vend; + VAPI_ret_t status; + VAPI_hca_attr_t attr_mod; + VAPI_hca_attr_mask_t attr_mask; + + OSM_LOG_ENTER(p_vend->p_log); + + memset(&attr_mod, 0, sizeof(attr_mod)); + memset(&attr_mask, 0, sizeof(attr_mask)); + + attr_mod.is_sm = is_sm_val; + attr_mask = HCA_ATTR_IS_SM; + + status = + VAPI_modify_hca_attr(p_bind->hca_hndl, p_bind->port_num, &attr_mod, + &attr_mask); + if (status != VAPI_OK) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_set_sm: ERR 5027: " + "Unable set 'IS_SM' bit to:%u in port attributes (%d).\n", + is_sm_val, status); + } + + OSM_LOG_EXIT(p_vend->p_log); +} + +void osm_vendor_set_debug(IN osm_vendor_t * const p_vend, IN int32_t level) +{ + +} diff --git a/libvendor/osm_vendor_umadt.c b/libvendor/osm_vendor_umadt.c new file mode 100644 index 0000000..b4d707d --- /dev/null +++ b/libvendor/osm_vendor_umadt.c @@ -0,0 +1,1107 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_req_t. + * This object represents the generic attribute requester. + * This object is part of the opensm family of objects. + * + */ + +/* + Next available error code: 0x300 +*/ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef OSM_VENDOR_INTF_UMADT + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* GEN1 includes */ +#include "umadt_so.h" +#include "ibt.h" +#include "statustext.h" + +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ +/* */ +/* VENDOR_MAD_INTF */ +/* */ +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ +/* //////////////////////////////////////////////////////////////////////// */ + +/* //////////////////// */ +/* Globals // */ +/* //////////////////// */ +typedef struct _ib_sa_mad_vM3 { + uint8_t base_ver; + uint8_t mgmt_class; + uint8_t class_ver; + uint8_t method; + ib_net16_t status; + ib_net16_t resv; + ib_net64_t trans_id; + ib_net16_t attr_id; + ib_net16_t resv1; + ib_net32_t attr_mod; + ib_net64_t resv2; + ib_net64_t sm_key; + + ib_net32_t seg_num; + ib_net32_t payload_len; + uint8_t frag_flag; + uint8_t edit_mod; + ib_net16_t window; + ib_net16_t attr_offset; + ib_net16_t resv3; + + ib_net64_t comp_mask; + + uint8_t data[IB_SA_DATA_SIZE]; +} ib_sa_mad_t_vM3; +#define DEFAULT_TIMER_INTERVAL_MSEC 500 /* 500msec timer interval */ + +void __mad_recv_processor(void *context); + +boolean_t __valid_mad_handle(IN mad_bind_info_t * p_mad_bind_info); + +cl_status_t +__match_tid_context(const cl_list_item_t * const p_list_item, void *context); +void __osm_vendor_timer_callback(IN void *context); + +osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, + IN const uint32_t timeout) +{ + ib_api_status_t status; + umadt_obj_t *p_umadt_obj; + + OSM_LOG_ENTER(p_log); + + p_umadt_obj = malloc(sizeof(umadt_obj_t)); + if (p_umadt_obj) { + memset(p_umadt_obj, 0, sizeof(umadt_obj_t)); + + status = osm_vendor_init((osm_vendor_t *) p_umadt_obj, p_log, + timeout); + if (status != IB_SUCCESS) { + osm_vendor_delete((osm_vendor_t **) & p_umadt_obj); + } + } else { + printf + ("osm_vendor_construct: ERROR! Unable to create Umadt object!\n"); + } + + OSM_LOG_EXIT(p_log); + + return ((osm_vendor_t *) p_umadt_obj); +} + +void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) +{ + umadt_obj_t *p_umadt_obj = (umadt_obj_t *) * pp_vend; + cl_list_item_t *p_list_item; + uint32_t count, i; + mad_bind_info_t *p_mad_bind_info; + + OSM_LOG_ENTER(p_umadt_obj->p_log); + + cl_spinlock_acquire(&p_umadt_obj->register_lock); + p_mad_bind_info = + (mad_bind_info_t *) cl_qlist_head(&p_umadt_obj->register_list); + count = cl_qlist_count(&p_umadt_obj->register_list); + cl_spinlock_release(&p_umadt_obj->register_lock); + for (i = 0; i < count; i++) { + cl_spinlock_acquire(&p_umadt_obj->register_lock); + p_list_item = cl_qlist_next(&p_mad_bind_info->list_item); + cl_spinlock_release(&p_umadt_obj->register_lock); + /* Unbind this handle */ + /* osm_vendor_ubind also removesd the item from the list */ + /* osm_vendor_unbind takes the list lock so release it here */ + osm_vendor_unbind((osm_bind_handle_t) p_mad_bind_info); + p_mad_bind_info = (mad_bind_info_t *) p_list_item; + } + dlclose(p_umadt_obj->umadt_handle); + free(p_umadt_obj); + *pp_vend = NULL; + + OSM_LOG_EXIT(p_umadt_obj->p_log); +} + +/* //////////////////////////////////////////////////////////////////////// */ +/* See VendorAbstractMadIntf.h for info */ +/* //////////////////////////////////////////////////////////////////////// */ +/* */ +ib_api_status_t +osm_vendor_init(IN osm_vendor_t * const p_vend, + IN osm_log_t * const p_log, IN const uint32_t timeout) +{ + FSTATUS Status; + PUMADT_GET_INTERFACE uMadtGetInterface; + char *error; + umadt_obj_t *p_umadt_obj = (umadt_obj_t *) p_vend; + + OSM_LOG_ENTER(p_log); + + p_umadt_obj->p_log = p_log; + p_umadt_obj->timeout = timeout; + + p_umadt_obj->umadt_handle = dlopen("libibt.so", RTLD_NOW); + + if (!p_umadt_obj->umadt_handle) { + printf("Could not load libibt.so <%s>\n", dlerror()); + return IB_ERROR; + } + uMadtGetInterface = + dlsym(p_umadt_obj->umadt_handle, "uMadtGetInterface"); + if ((error = dlerror()) != NULL) { + printf("Could not resolve symbol uMadtGetInterface ERROR<%s>\n", + error); + return IB_ERROR; + } + + Status = (*uMadtGetInterface) (&p_umadt_obj->uMadtInterface); + if (Status != FSUCCESS) { + printf(" Error in getting uMADT interface ERROR<%d>\n", Status); + return IB_ERROR; + } + + /* Initialize the register list and register list lock */ + cl_qlist_init(&p_umadt_obj->register_list); + + cl_spinlock_construct(&p_umadt_obj->register_lock); + CL_ASSERT(cl_spinlock_init(&p_umadt_obj->register_lock) == CL_SUCCESS); + p_umadt_obj->init_done = TRUE; + printf("*****SUCCESS*****\n"); + + OSM_LOG_EXIT(p_log); + return IB_SUCCESS; + +} + +/* //////////////////////////////////////////////////////////////////////// */ +/* See VendorAbstractMadIntf.h for info */ +/* //////////////////////////////////////////////////////////////////////// */ +ib_api_status_t +osm_vendor_get_ports(IN osm_vendor_t * const p_vend, + IN ib_net64_t * const p_guids, + IN uint32_t * const p_num_guids) +{ + char *error = NULL; + PIBT_GET_INTERFACE pfnIbtGetInterface; + PIBT_INIT pfnIbtInitFunc; + + FSTATUS Status; + uint32_t caCount, caGuidCount; + IB_CA_ATTRIBUTES caAttributes; + IB_HANDLE caHandle; + uint32_t i; + IB_PORT_ATTRIBUTES *pPortAttributesList; + EUI64 CaGuidArray[8]; + void *context; + uint64_t *p_port_guid; + uint32_t free_guids; + + umadt_obj_t *p_umadt_obj = (umadt_obj_t *) p_vend; + + OSM_LOG_ENTER(p_umadt_obj->p_log); + + CL_ASSERT(p_guids); + CL_ASSERT(p_num_guids); + + pfnIbtInitFunc = + (PIBT_INIT) dlsym(p_umadt_obj->umadt_handle, "IbtInit"); + + if (!pfnIbtInitFunc) { + printf("Error getting IbtInit function address.\n"); + return IB_ERROR; + } + + (*pfnIbtInitFunc) (); + + pfnIbtGetInterface = + (PIBT_GET_INTERFACE) dlsym(p_umadt_obj->umadt_handle, + "IbtGetInterface"); + + if (!pfnIbtGetInterface || (error = dlerror()) != NULL) { + printf("Error getting IbtGetInterface function address.<%s>\n", + error); + return FALSE; + } + (*pfnIbtGetInterface) (&p_umadt_obj->IbtInterface); + + caGuidCount = 8; + Status = + p_umadt_obj->IbtInterface.GetCaGuidArray(&caGuidCount, + &CaGuidArray[0]); + + if ((Status != FSUCCESS) || (caGuidCount == 0)) { + return FALSE; + } + + free_guids = *p_num_guids; + p_port_guid = p_guids; + + /* query each ca & copy its info into callers buffer */ + for (caCount = 0; caCount < caGuidCount; caCount++) { + memset(&caAttributes, 0, sizeof(IB_CA_ATTRIBUTES)); + + /* Open the CA */ + Status = p_umadt_obj->IbtInterface.Vpi.OpenCA(CaGuidArray[caCount], NULL, /* CACompletionCallback */ + NULL, /* AsyncEventCallback */ + NULL, &caHandle); + if (Status != FSUCCESS) { + return IB_ERROR; + } + + Status = p_umadt_obj->IbtInterface.Vpi.QueryCA(caHandle, + &caAttributes, + &context); + + if (Status != FSUCCESS) { + p_umadt_obj->IbtInterface.Vpi.CloseCA(caHandle); + return IB_ERROR; + } + + if (caAttributes.Ports > free_guids) { + *p_num_guids = 0; + memset(p_guids, 0, (*p_num_guids) * sizeof(uint64_t)); + return IB_INSUFFICIENT_MEMORY; + } + + pPortAttributesList = + (IB_PORT_ATTRIBUTES *) malloc(caAttributes. + PortAttributesListSize); + + if (pPortAttributesList == NULL) { + p_umadt_obj->IbtInterface.Vpi.CloseCA(caHandle); + *p_num_guids = 0; + memset(p_guids, 0, (*p_num_guids) * sizeof(uint64_t)); + return IB_INSUFFICIENT_MEMORY; + } + + memset(pPortAttributesList, 0, + caAttributes.PortAttributesListSize); + + caAttributes.PortAttributesList = pPortAttributesList; + + Status = p_umadt_obj->IbtInterface.Vpi.QueryCA(caHandle, + &caAttributes, + &context); + + if (Status != FSUCCESS) { + p_umadt_obj->IbtInterface.Vpi.CloseCA(caHandle); + *p_num_guids = 0; + memset(p_guids, 0, (*p_num_guids) * sizeof(uint64_t)); + return IB_ERROR; + } + + pPortAttributesList = caAttributes.PortAttributesList; + + for (i = 0; i < caAttributes.Ports; i++) { + *(p_port_guid) = + cl_hton64((uint64_t) pPortAttributesList->GUID); + pPortAttributesList = pPortAttributesList->Next; + p_port_guid++; + } + free(caAttributes.PortAttributesList); + p_umadt_obj->IbtInterface.Vpi.CloseCA(caHandle); + + free_guids = free_guids - caAttributes.Ports; + + } + *p_num_guids = *p_num_guids - free_guids; + return IB_SUCCESS; +} + +/* //////////////////////////////////////////////////////////////////////// */ +/* See VendorAbstractMadIntf.h for info */ +/* //////////////////////////////////////////////////////////////////////// */ +ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, + IN const uint32_t mad_size, + IN osm_vend_wrap_t * p_vend_wrap) +{ + /* FSTATUS Status; */ + /* uint32_t mad_count = 0; */ + /* MadtStruct *p_madt_struct; */ + mad_bind_info_t *p_mad_bind_info = (mad_bind_info_t *) h_bind; + umadt_obj_t *p_umadt_obj = p_mad_bind_info->p_umadt_obj; + ib_mad_t *p_mad; + OSM_LOG_ENTER(p_umadt_obj->p_log); + + CL_ASSERT(h_bind); + + p_umadt_obj = p_mad_bind_info->p_umadt_obj; + + /* Sanity check */ + CL_ASSERT(p_umadt_obj->init_done); + CL_ASSERT(p_vend_wrap); + CL_ASSERT(__valid_mad_handle(p_mad_bind_info)); + +#if 0 + mad_count = 1; + Status = + p_umadt_obj->uMadtInterface.uMadtGetSendMad(p_mad_bind_info-> + umadt_handle, + &mad_count, + &p_madt_struct); + + if (Status != FSUCCESS || p_madt_struct == NULL) { + p_vend_wrap->p_madt_struct = NULL; + return NULL; + } + p_vend_wrap->p_madt_struct = p_madt_struct; + p_vend_wrap->direction = SEND; + return ((ib_mad_t *) & p_madt_struct->IBMad); +#endif /* 0 */ + p_mad = (ib_mad_t *) malloc(mad_size); + if (!p_mad) { + p_vend_wrap->p_madt_struct = NULL; + return NULL; + } + + memset(p_mad, 0, mad_size); + + p_vend_wrap->p_madt_struct = NULL; + p_vend_wrap->direction = SEND; + p_vend_wrap->size = mad_size; + return (p_mad); + +} + +/* //////////////////////////////////////////////////////////////////////// */ +/* See VendorAbstractMadIntf.h for info */ +/* //////////////////////////////////////////////////////////////////////// */ +void +osm_vendor_put(IN osm_bind_handle_t h_bind, + IN osm_vend_wrap_t * const p_vend_wrap, + IN ib_mad_t * const p_mad) +{ + + FSTATUS Status; + + mad_bind_info_t *p_mad_bind_info; + umadt_obj_t *p_umadt_obj; + + /* */ + /* Validate the vendor mad transport handle */ + /* */ + CL_ASSERT(h_bind); + p_mad_bind_info = (mad_bind_info_t *) h_bind; + p_umadt_obj = p_mad_bind_info->p_umadt_obj; + + /* sanity check */ + CL_ASSERT(p_umadt_obj->init_done); + CL_ASSERT(h_bind); + CL_ASSERT(__valid_mad_handle(p_mad_bind_info)); + CL_ASSERT(p_vend_wrap); + /* CL_ASSERT( (ib_mad_t*)&p_vend_wrap->p_madt_struct->IBMad == p_mad ); */ + + /* Release the MAD based on the direction of the MAD */ + if (p_vend_wrap->direction == SEND) { + /* */ + /* For a send the PostSend released the MAD with Umadt. Simply dealloacte the */ + /* local memory that was allocated on the osm_vendor_get() call. */ + /* */ + free(p_mad); +#if 0 + Status = + p_umadt_obj->uMadtInterface. + uMadtReleaseSendMad(p_mad_bind_info->umadt_handle, + p_vend_wrap->p_madt_struct); + if (Status != FSUCCESS) { + /* printf("uMadtReleaseSendMad: Status = <%d>\n", Status); */ + return; + } +#endif + } else if (p_vend_wrap->direction == RECEIVE) { + CL_ASSERT((ib_mad_t *) & p_vend_wrap->p_madt_struct->IBMad == + p_mad); + Status = + p_umadt_obj->uMadtInterface. + uMadtReleaseRecvMad(p_mad_bind_info->umadt_handle, + p_vend_wrap->p_madt_struct); + if (Status != FSUCCESS) { + /* printf("uMadtReleaseRecvMad Status=<%d>\n", Status); */ + return; + } + } else { + return; + } + return; +} + +/* //////////////////////////////////////////////////////////////////////// */ +/* See VendorAbstractMadIntf.h for info */ +/* //////////////////////////////////////////////////////////////////////// */ +ib_api_status_t +osm_vendor_send(IN osm_bind_handle_t h_bind, + IN osm_vend_wrap_t * const p_vend_wrap, + IN osm_mad_addr_t * const p_mad_addr, + IN ib_mad_t * const p_mad, + IN void *transaction_context, IN boolean_t const resp_expected) +{ + FSTATUS Status; + + MadAddrStruct destAddr = { 0 }; + + mad_bind_info_t *p_mad_bind_info; + trans_context_t *p_trans_context; + + umadt_obj_t *p_umadt_obj = NULL; + + uint32_t mad_count = 0; + MadtStruct *p_madt_struct = NULL; + uint32_t i; + uint32_t num_mads = 0; + uint32_t seg_num = 0; + uint8_t *p_frag_data = NULL; + ib_sa_mad_t_vM3 *p_sa_mad = NULL; + + CL_ASSERT(h_bind); + p_mad_bind_info = (mad_bind_info_t *) h_bind; + p_umadt_obj = p_mad_bind_info->p_umadt_obj; + + /* sanity check */ + CL_ASSERT(p_umadt_obj); + CL_ASSERT(p_umadt_obj->init_done); + CL_ASSERT(__valid_mad_handle(p_mad_bind_info)); + CL_ASSERT(p_vend_wrap); + CL_ASSERT(p_mad_addr); + CL_ASSERT(p_mad); + /* CL_ASSERT( (ib_mad_t*)&p_vend_wrap->p_madt_struct->IBMad == p_mad ); */ + + /* */ + /* based on the class, fill out the address info */ + /* */ + destAddr.DestLid = p_mad_addr->dest_lid; + destAddr.PathBits = p_mad_addr->path_bits; + destAddr.StaticRate = p_mad_addr->static_rate; + + if (p_mad_bind_info->umadt_reg_class.ClassId == IB_MCLASS_SUBN_LID || + p_mad_bind_info->umadt_reg_class.ClassId == IB_MCLASS_SUBN_DIR) { + CL_ASSERT(p_mad_addr->addr_type.smi.source_lid); + destAddr.AddrType.Smi.SourceLid = + p_mad_addr->addr_type.smi.source_lid; + } else { + destAddr.AddrType.Gsi.RemoteQpNumber = + p_mad_addr->addr_type.gsi.remote_qp; + destAddr.AddrType.Gsi.RemoteQkey = + p_mad_addr->addr_type.gsi.remote_qkey; + destAddr.AddrType.Gsi.PKey = OSM_DEFAULT_PKEY; + destAddr.AddrType.Gsi.ServiceLevel = + p_mad_addr->addr_type.gsi.service_level; + destAddr.AddrType.Gsi.GlobalRoute = + p_mad_addr->addr_type.gsi.global_route; + /* destAddr.AddrType.Gsi.GRHInfo = p_mad_addr->addr_type.gsi.grh_info; */ + } + p_mad->trans_id = cl_ntoh64(p_mad->trans_id) << 24; + + /* */ + /* Create a transaction context for this send and save the TID and client context. */ + /* */ + + if (resp_expected) { + p_trans_context = malloc(sizeof(trans_context_t)); + CL_ASSERT(p_trans_context); + + memset(p_trans_context, 0, sizeof(trans_context_t)); + p_trans_context->trans_id = p_mad->trans_id; + p_trans_context->context = transaction_context; + p_trans_context->sent_time = cl_get_time_stamp(); + + cl_spinlock_acquire(&p_mad_bind_info->trans_ctxt_lock); + cl_qlist_insert_tail(&p_mad_bind_info->trans_ctxt_list, + &p_trans_context->list_item); + cl_spinlock_release(&p_mad_bind_info->trans_ctxt_lock); + } + + if (p_mad_bind_info->umadt_reg_class.ClassId == IB_MCLASS_SUBN_LID || + p_mad_bind_info->umadt_reg_class.ClassId == IB_MCLASS_SUBN_DIR) { + /* Get one mad from uMadt */ + mad_count = 1; + Status = + p_umadt_obj->uMadtInterface. + uMadtGetSendMad(p_mad_bind_info->umadt_handle, &mad_count, + &p_madt_struct); + + if (Status != FSUCCESS || p_madt_struct == NULL) { + return IB_ERROR; + } + + /* No Segmentation required */ + memcpy(&p_madt_struct->IBMad, p_mad, MAD_BLOCK_SIZE); + + /* Post the MAD */ + + Status = + p_umadt_obj->uMadtInterface.uMadtPostSend(p_mad_bind_info-> + umadt_handle, + p_madt_struct, + &destAddr); + if (Status != FSUCCESS) { + printf("uMadtPostSendMad: Status = <%d>\n", Status); + return IB_ERROR; + } + + /* Release send MAD */ + Status = + p_umadt_obj->uMadtInterface. + uMadtReleaseSendMad(p_mad_bind_info->umadt_handle, + p_madt_struct); + if (Status != FSUCCESS) { + printf("uMadtReleaseSendMad: Status = <%d>\n", Status); + return IB_ERROR; + } + } else { + + /* */ + /* Segment the MAD, get the required send mads from uMadt and post the MADs. */ + /* */ + uint32_t payload_len; + + payload_len = + cl_ntoh32(((ib_sa_mad_t_vM3 *) p_mad)->payload_len); + num_mads = payload_len / IB_SA_DATA_SIZE; + if (payload_len % IB_SA_DATA_SIZE != 0) { + num_mads++; /* Get one additional mad for the remainder */ + } + for (i = 0; i < num_mads; i++) { + /* Get one mad from uMadt */ + mad_count = 1; + Status = + p_umadt_obj->uMadtInterface. + uMadtGetSendMad(p_mad_bind_info->umadt_handle, + &mad_count, &p_madt_struct); + + if (Status != FSUCCESS || p_madt_struct == NULL) { + return IB_ERROR; + } + /* Copy client MAD into uMadt's MAD. */ + if (i == 0) { /* First Packet */ + /* Since this is the first MAD, copy the entire MAD_SIZE */ + memcpy(&p_madt_struct->IBMad, p_mad, + MAD_BLOCK_SIZE); + + p_frag_data = + (uint8_t *) p_mad + MAD_BLOCK_SIZE; + + p_sa_mad = + (ib_sa_mad_t_vM3 *) & p_madt_struct->IBMad; + if (num_mads == 1) { /* Only one Packet */ + p_sa_mad->seg_num = 0; + p_sa_mad->frag_flag = 5; /* Set bit 0 for first pkt and b4 for last pkt */ + /* the payload length gets copied with the mad header above */ + } else { /* More than one packet in this response */ + + seg_num = 1; + p_sa_mad->seg_num = + cl_ntoh32(seg_num++); + p_sa_mad->frag_flag = 1; /* Set bit 0 for first pkt */ + /* the payload length gets copied with the mad header above */ + } + + } else if (i < num_mads - 1) { /* Not last packet */ + /* First copy only the header */ + memcpy(&p_madt_struct->IBMad, p_mad, + IB_SA_MAD_HDR_SIZE); + /* Set the relevant fields in the SA_MAD_HEADER */ + p_sa_mad = + (ib_sa_mad_t_vM3 *) & p_madt_struct->IBMad; + p_sa_mad->payload_len = + cl_ntoh32(IB_SA_DATA_SIZE); + p_sa_mad->seg_num = cl_ntoh32(seg_num++); + p_sa_mad->frag_flag = 0; + /* Now copy the fragmented data */ + memcpy(((uint8_t *) & p_madt_struct->IBMad) + + IB_SA_MAD_HDR_SIZE, p_frag_data, + IB_SA_DATA_SIZE); + p_frag_data = p_frag_data + IB_SA_DATA_SIZE; + + } else if (i == num_mads - 1) { /* Last packet */ + /* First copy only the header */ + memcpy(&p_madt_struct->IBMad, p_mad, + IB_SA_MAD_HDR_SIZE); + /* Set the relevant fields in the SA_MAD_HEADER */ + p_sa_mad = + (ib_sa_mad_t_vM3 *) & p_madt_struct->IBMad; + p_sa_mad->seg_num = cl_ntoh32(seg_num++); + p_sa_mad->frag_flag = 4; /* Set Bit 2 for last pkt */ + p_sa_mad->payload_len = + cl_ntoh32(cl_ntoh32 + (((ib_sa_mad_t_vM3 *) p_mad)-> + payload_len) % IB_SA_DATA_SIZE); + /* Now copy the fragmented data */ + memcpy((((uint8_t *) & p_madt_struct->IBMad)) + + IB_SA_MAD_HDR_SIZE, p_frag_data, + cl_ntoh32(p_sa_mad->payload_len)); + p_frag_data = p_frag_data + IB_SA_DATA_SIZE; + + } + /* Post the MAD */ + Status = + p_umadt_obj->uMadtInterface. + uMadtPostSend(p_mad_bind_info->umadt_handle, + p_madt_struct, &destAddr); + if (Status != FSUCCESS) { + printf("uMadtPostSendMad: Status = <%d>\n", + Status); + return IB_ERROR; + } + + /* Release send MAD */ + Status = + p_umadt_obj->uMadtInterface. + uMadtReleaseSendMad(p_mad_bind_info->umadt_handle, + p_madt_struct); + if (Status != FSUCCESS) { + printf("uMadtReleaseSendMad: Status = <%d>\n", + Status); + return IB_ERROR; + } + } + } + return (IB_SUCCESS); +} + +/* //////////////////////////////////////////////////////////////////////// */ +/* See VendorAbstractMadIntf.h for info */ +/* //////////////////////////////////////////////////////////////////////// */ + +osm_bind_handle_t +osm_vendor_bind(IN osm_vendor_t * const p_vend, + IN osm_bind_info_t * const p_osm_bind_info, + IN osm_mad_pool_t * const p_mad_pool, + IN osm_vend_mad_recv_callback_t mad_recv_callback, + IN void *context) +{ + cl_status_t cl_status; + FSTATUS Status; /* GEN1 Status for Umadt */ + + mad_bind_info_t *p_mad_bind_info; + RegisterClassStruct *p_umadt_reg_class; + + umadt_obj_t *p_umadt_obj; + OSM_LOG_ENTER(((umadt_obj_t *) p_vend)->p_log); + + CL_ASSERT(p_vend); + + p_umadt_obj = (umadt_obj_t *) p_vend; + + /* Sanity check */ + CL_ASSERT(p_umadt_obj->init_done); + CL_ASSERT(p_osm_bind_info); + CL_ASSERT(p_mad_pool); + CL_ASSERT(mad_recv_callback); + + /* Allocate memory for registering the handle. */ + p_mad_bind_info = (mad_bind_info_t *) malloc(sizeof(*p_mad_bind_info)); + if (p_mad_bind_info) { + memset(p_mad_bind_info, 0, sizeof(*p_mad_bind_info)); + p_umadt_reg_class = &p_mad_bind_info->umadt_reg_class; + } + p_umadt_reg_class->PortGuid = cl_ntoh64(p_osm_bind_info->port_guid); + p_umadt_reg_class->ClassId = p_osm_bind_info->mad_class; + p_umadt_reg_class->ClassVersion = p_osm_bind_info->class_version; + p_umadt_reg_class->isResponder = p_osm_bind_info->is_responder; + p_umadt_reg_class->isTrapProcessor = p_osm_bind_info->is_trap_processor; + p_umadt_reg_class->isReportProcessor = + p_osm_bind_info->is_report_processor; + p_umadt_reg_class->SendQueueSize = p_osm_bind_info->send_q_size; + p_umadt_reg_class->RecvQueueSize = p_osm_bind_info->recv_q_size; + p_umadt_reg_class->NotifySendCompletion = TRUE; + + p_mad_bind_info->p_umadt_obj = p_umadt_obj; + p_mad_bind_info->p_mad_pool = p_mad_pool; + p_mad_bind_info->mad_recv_callback = mad_recv_callback; + p_mad_bind_info->client_context = context; + + /* register with Umadt for MAD interface */ + Status = p_umadt_obj->uMadtInterface.uMadtRegister(p_umadt_reg_class, + &p_mad_bind_info-> + umadt_handle); + if (Status != FSUCCESS) { + free(p_mad_bind_info); + OSM_LOG_EXIT(p_umadt_obj->p_log); + return (OSM_BIND_INVALID_HANDLE); + } + CL_ASSERT(p_mad_bind_info->umadt_handle); + /* */ + /* Start a worker thread to process receives. */ + /* */ + cl_thread_construct(&p_mad_bind_info->recv_processor_thread); + cl_status = cl_thread_init(&p_mad_bind_info->recv_processor_thread, + __mad_recv_processor, + (void *)p_mad_bind_info, "mad_recv_worker"); + CL_ASSERT(cl_status == CL_SUCCESS); + + cl_qlist_init(&p_mad_bind_info->trans_ctxt_list); + cl_spinlock_construct(&p_mad_bind_info->trans_ctxt_lock); + cl_spinlock_init(&p_mad_bind_info->trans_ctxt_lock); + cl_spinlock_construct(&p_mad_bind_info->timeout_list_lock); + cl_spinlock_init(&p_mad_bind_info->timeout_list_lock); + + cl_status = cl_timer_init(&p_mad_bind_info->timeout_timer, + __osm_vendor_timer_callback, + (void *)p_mad_bind_info); + CL_ASSERT(cl_status == CL_SUCCESS); + cl_qlist_init(&p_mad_bind_info->timeout_list); + /* */ + /* Insert the mad_reg_struct in list and return pointer to it as the handle */ + /* */ + cl_spinlock_acquire(&p_umadt_obj->register_lock); + + cl_qlist_insert_head(&p_umadt_obj->register_list, + &p_mad_bind_info->list_item); + + cl_spinlock_release(&p_umadt_obj->register_lock); + + /* + A timeout value of 0 means disable timeouts. + */ + if (p_umadt_obj->timeout) { + cl_timer_start(&p_mad_bind_info->timeout_timer, + DEFAULT_TIMER_INTERVAL_MSEC); + } + + OSM_LOG_EXIT(p_umadt_obj->p_log); + return ((osm_bind_handle_t) p_mad_bind_info); +} + +void osm_vendor_unbind(IN osm_bind_handle_t h_bind) +{ + mad_bind_info_t *p_mad_bind_info; + umadt_obj_t *p_umadt_obj; + cl_list_item_t *p_list_item, *p_next_list_item; + + CL_ASSERT(h_bind); + p_mad_bind_info = (mad_bind_info_t *) h_bind; + p_umadt_obj = p_mad_bind_info->p_umadt_obj; + + /* sanity check */ + CL_ASSERT(p_umadt_obj); + CL_ASSERT(p_umadt_obj->init_done); + CL_ASSERT(__valid_mad_handle(p_mad_bind_info)); + + p_umadt_obj->uMadtInterface.uMadtDestroy(&p_mad_bind_info-> + umadt_handle); + cl_timer_destroy(&p_mad_bind_info->timeout_timer); + cl_thread_destroy(&p_mad_bind_info->recv_processor_thread); + + cl_spinlock_acquire(&p_mad_bind_info->trans_ctxt_lock); + p_list_item = cl_qlist_head(&p_mad_bind_info->trans_ctxt_list); + while (p_list_item != cl_qlist_end(&p_mad_bind_info->trans_ctxt_list)) { + p_next_list_item = cl_qlist_next(p_list_item); + cl_qlist_remove_item(&p_mad_bind_info->trans_ctxt_list, + p_list_item); + free(p_list_item); + p_list_item = p_next_list_item; + } + cl_spinlock_release(&p_mad_bind_info->trans_ctxt_lock); + + cl_spinlock_acquire(&p_mad_bind_info->timeout_list_lock); + p_list_item = cl_qlist_head(&p_mad_bind_info->timeout_list); + while (p_list_item != cl_qlist_end(&p_mad_bind_info->timeout_list)) { + p_next_list_item = cl_qlist_next(p_list_item); + cl_qlist_remove_item(&p_mad_bind_info->timeout_list, + p_list_item); + free(p_list_item); + p_list_item = p_next_list_item; + } + cl_spinlock_release(&p_mad_bind_info->timeout_list_lock); + + free(p_mad_bind_info); +} + +void __mad_recv_processor(IN void *context) +{ + mad_bind_info_t *p_mad_bind_info = (mad_bind_info_t *) context; + umadt_obj_t *p_umadt_obj; + osm_madw_t *p_osm_madw = NULL; + osm_vend_wrap_t *p_vend_wrap = NULL; + osm_mad_addr_t osm_mad_addr = { 0 }; + cl_list_item_t *p_list_item; + void *transaction_context; + + FSTATUS Status; + MadtStruct *pRecvMad = NULL; + MadWorkCompletion *pRecvCmp = NULL; + + CL_ASSERT(context); + + p_mad_bind_info = (mad_bind_info_t *) context; + p_umadt_obj = p_mad_bind_info->p_umadt_obj; + /* PollFor a completion */ + /* if FNOTFOND, then wait for a completion then again poll and return the MAD */ + while (1) { + Status = + p_umadt_obj->uMadtInterface. + uMadtPollForRecvCompletion(p_mad_bind_info->umadt_handle, + &pRecvMad, &pRecvCmp); + if (Status != FSUCCESS) { + if (Status == FNOT_FOUND) { + /* Wait for a completion */ + Status = p_umadt_obj->uMadtInterface.uMadtWaitForAnyCompletion(p_mad_bind_info->umadt_handle, RECV_COMPLETION, 0x5000); /* 5 sec timeout */ + + if (Status == FTIMEOUT) { + continue; + } + CL_ASSERT(Status == FSUCCESS); + + Status = + p_umadt_obj->uMadtInterface. + uMadtPollForRecvCompletion(p_mad_bind_info-> + umadt_handle, + &pRecvMad, + &pRecvCmp); + if (Status != FSUCCESS) { + printf + (" mad_recv_worker: Error in PollForRecv returning <%x>\n", + Status); + CL_ASSERT(0); + } + } else { + printf + ("uMadtPollForRecvCompletion Status=<%x>\n", + Status); + CL_ASSERT(0); + } + } + CL_ASSERT(pRecvMad); + CL_ASSERT(pRecvCmp); + + if (((ib_sa_mad_t_vM3 *) (&pRecvMad->IBMad))->frag_flag & 0x20) { + /* Ignore the ACK packet */ + Status = + p_umadt_obj->uMadtInterface. + uMadtReleaseRecvMad(p_mad_bind_info->umadt_handle, + pRecvMad); + continue; + } + /* */ + /* Extract the return address to pass it on to the client */ + /* */ + osm_mad_addr.dest_lid = pRecvCmp->AddressInfo.DestLid; + osm_mad_addr.path_bits = pRecvCmp->AddressInfo.PathBits; + osm_mad_addr.static_rate = pRecvCmp->AddressInfo.StaticRate; + + if (p_mad_bind_info->umadt_reg_class.ClassId == + IB_MCLASS_SUBN_LID + || p_mad_bind_info->umadt_reg_class.ClassId == + IB_MCLASS_SUBN_DIR) { + osm_mad_addr.addr_type.smi.source_lid = + pRecvCmp->AddressInfo.AddrType.Smi.SourceLid; + /* osm_mad_addr.addr_type.smi.port_num = pRecvCmp->AddressInfo.AddrType.Smi.PortNumber; */ + } else { + osm_mad_addr.addr_type.gsi.remote_qp = + pRecvCmp->AddressInfo.AddrType.Gsi.RemoteQpNumber; + osm_mad_addr.addr_type.gsi.remote_qkey = + pRecvCmp->AddressInfo.AddrType.Gsi.RemoteQkey; + osm_mad_addr.addr_type.gsi.pkey_ix = 0; + osm_mad_addr.addr_type.gsi.service_level = + pRecvCmp->AddressInfo.AddrType.Gsi.ServiceLevel; + osm_mad_addr.addr_type.gsi.global_route = + pRecvCmp->AddressInfo.AddrType.Gsi.GlobalRoute; + /* osm_mad_addr.addr_type.gsi.grh_info = pRecvCmp->AddressInfo.AddrType.Gsi.GRHInfo; */ + } + p_osm_madw = + osm_mad_pool_get_wrapper(p_mad_bind_info->p_mad_pool, + p_mad_bind_info, MAD_BLOCK_SIZE, + (ib_mad_t *) & pRecvMad->IBMad, + &osm_mad_addr); + CL_ASSERT(p_osm_madw); + p_vend_wrap = osm_madw_get_vend_ptr(p_osm_madw); + CL_ASSERT(p_vend_wrap); + p_vend_wrap->p_madt_struct = pRecvMad; + p_vend_wrap->direction = RECEIVE; + + osm_log(p_mad_bind_info->p_umadt_obj->p_log, OSM_LOG_DEBUG, + "__mad_recv_processor: " + "Received data p_osm_madw[0x%p].\n", p_osm_madw); + + /* */ + /* Do TID Processing. */ + /* */ + /* If R bit is set swap the TID */ + + cl_spinlock_acquire(&p_mad_bind_info->trans_ctxt_lock); + p_list_item = + cl_qlist_find_from_head(&p_mad_bind_info->trans_ctxt_list, + __match_tid_context, + &p_osm_madw->p_mad->trans_id); + + if (p_list_item == + cl_qlist_end(&p_mad_bind_info->trans_ctxt_list)) { + transaction_context = NULL; + } else { + transaction_context = + ((trans_context_t *) p_list_item)->context; + cl_qlist_remove_item(&p_mad_bind_info->trans_ctxt_list, + p_list_item); + free(p_list_item); + } + cl_spinlock_release(&p_mad_bind_info->trans_ctxt_lock); + ((ib_mad_t *) p_osm_madw->p_mad)->trans_id = + cl_ntoh64(p_osm_madw->p_mad->trans_id >> 24); + osm_log(p_mad_bind_info->p_umadt_obj->p_log, OSM_LOG_DEBUG, + "__mad_recv_processor: " + "Received data p_osm_madw [0x%p]" "\n\t\t\t\tTID[0x%" + PRIx64 ", context[%p]. \n", p_osm_madw, + ((ib_mad_t *) p_osm_madw->p_mad)->trans_id, + transaction_context); + + (*(p_mad_bind_info->mad_recv_callback)) (p_osm_madw, + p_mad_bind_info-> + client_context, + transaction_context); + + } +} + +cl_status_t +__match_tid_context(const cl_list_item_t * const p_list_item, void *context) +{ + if (((trans_context_t *) p_list_item)->trans_id == + *((uint64_t *) context)) + return CL_SUCCESS; + return CL_NOT_FOUND; +} + +boolean_t __valid_mad_handle(IN mad_bind_info_t * p_mad_bind_info) +{ + + umadt_obj_t *p_umadt_obj; + + p_umadt_obj = p_mad_bind_info->p_umadt_obj; + + cl_spinlock_acquire(&p_umadt_obj->register_lock); + if (!cl_is_item_in_qlist(&p_umadt_obj->register_list, + &p_mad_bind_info->list_item)) { + cl_spinlock_release(&p_umadt_obj->register_lock); + return FALSE; + } + cl_spinlock_release(&p_umadt_obj->register_lock); + return TRUE; +} + +void __osm_vendor_timer_callback(IN void *context) +{ + uint64_t current_time; + mad_bind_info_t *p_mad_bind_info; + umadt_obj_t *p_umadt_obj; + uint32_t timeout; + + cl_list_item_t *p_list_item, *p_next_list_item; + + CL_ASSERT(context); + + p_mad_bind_info = (mad_bind_info_t *) context; + p_umadt_obj = p_mad_bind_info->p_umadt_obj; + timeout = p_umadt_obj->timeout * 1000; + + current_time = cl_get_time_stamp(); + + cl_spinlock_acquire(&p_mad_bind_info->trans_ctxt_lock); + + p_list_item = cl_qlist_head(&p_mad_bind_info->trans_ctxt_list); + while (p_list_item != cl_qlist_end(&p_mad_bind_info->trans_ctxt_list)) { + + p_next_list_item = cl_qlist_next(p_list_item); + + /* DEFAULT_PKT_TIMEOUT is in milli seconds */ + if (current_time - ((trans_context_t *) p_list_item)->sent_time + > timeout) { + /* Add this transaction to the timeout_list */ + cl_qlist_remove_item(&p_mad_bind_info->trans_ctxt_list, + p_list_item); + cl_qlist_insert_tail(&p_mad_bind_info->timeout_list, + p_list_item); + } + + p_list_item = p_next_list_item; + } + + cl_spinlock_release(&p_mad_bind_info->trans_ctxt_lock); + + p_list_item = cl_qlist_head(&p_mad_bind_info->timeout_list); + while (p_list_item != cl_qlist_end(&p_mad_bind_info->timeout_list)) { + osm_log(p_mad_bind_info->p_umadt_obj->p_log, OSM_LOG_DEBUG, + "__osm_vendor_timer_callback: " + "Timing out transaction context [0x%p].\n", + ((trans_context_t *) p_list_item)->context); + + (*(p_mad_bind_info->mad_recv_callback)) (NULL, + p_mad_bind_info-> + client_context, + ((trans_context_t *) + p_list_item)-> + context); + + p_next_list_item = cl_qlist_next(p_list_item); + cl_qlist_remove_item(&p_mad_bind_info->timeout_list, + p_list_item); + free(p_list_item); + p_list_item = p_next_list_item; + } + + cl_timer_start(&p_mad_bind_info->timeout_timer, + DEFAULT_TIMER_INTERVAL_MSEC); + +} + +#endif /* OSM_VENDOR_INTF_UMADT */ diff --git a/man/opensm.8.in b/man/opensm.8.in new file mode 100644 index 0000000..2b6696d --- /dev/null +++ b/man/opensm.8.in @@ -0,0 +1,1708 @@ +.TH OPENSM 8 "Sept 15, 2014" "OpenIB" "OpenIB Management" + +.SH NAME +opensm \- InfiniBand subnet manager and administration (SM/SA) + +.SH SYNOPSIS +.B opensm +[\-\-version]] +[\-F | \-\-config ] +[\-c(reate-config) ] +[\-g(uid) ] +[\-l(mc) ] +[\-p(riority) ] +[\-\-subnet_prefix ] +[\-\-smkey ] +[\-\-sm_sl ] +[\-r(eassign_lids)] +[\-R | \-\-routing_engine ] +[\-\-do_mesh_analysis] +[\-\-lash_start_vl ] +[\-\-nue_max_num_vls ] +[\-A | \-\-ucast_cache] +[\-z | \-\-connect_roots] +[\-M | \-\-lid_matrix_file ] +[\-U | \-\-lfts_file ] +[\-S | \-\-sadb_file ] +[\-a | \-\-root_guid_file ] +[\-u | \-\-cn_guid_file ] +[\-G | \-\-io_guid_file ] +[\-\-port\-shifting] +[\-\-scatter\-ports ] +[\-H | \-\-max_reverse_hops ] +[\-X | \-\-guid_routing_order_file ] +[\-m | \-\-ids_guid_file ] +[\-o(nce)] +[\-s(weep) ] +[\-t(imeout) ] +[\-\-retries ] +[\-\-maxsmps ] +[\-\-console [off | local | socket | loopback]] +[\-\-console-port ] +[\-i | \-\-ignore_guids ] +[\-w | \-\-hop_weights_file ] +[\-O | \-\-port_search_ordering_file ] +[\-O | \-\-dimn_ports_file ] (DEPRECATED) +[\-\-dump_files_dir ] +[\-f | \-\-log_file ] +[\-L | \-\-log_limit ] [\-e(rase_log_file)] +[\-P(config) ] +[\-N | \-\-no_part_enforce] (DEPRECATED) +[\-Z | \-\-part_enforce [both | in | out | off]] +[\-W | \-\-allow_both_pkeys] +[\-Q | \-\-qos [\-Y | \-\-qos_policy_file ]] +[\-\-congestion\-control] +[\-\-cckey ] +[\-y | \-\-stay_on_fatal] +[\-B | \-\-daemon] +[\-J | \-\-pidfile ] +[\-I | \-\-inactive] +[\-\-perfmgr] +[\-\-perfmgr_sweep_time_s ] +[\-\-prefix_routes_file ] +[\-\-consolidate_ipv6_snm_req] +[\-\-log_prefix ] +[\-\-torus_config ] +[\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] +[\-h(elp)] [\-?] + +.SH DESCRIPTION +.PP +opensm is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. + +opensm provides an implementation of an InfiniBand Subnet Manager and +Administration. Such a software entity is required to run for in order +to initialize the InfiniBand hardware (at least one per each +InfiniBand subnet). + +opensm also now contains an experimental version of a performance +manager as well. + +opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB +fabric, initialize it, and sweep occasionally for changes. + +opensm attaches to a specific IB port on the local machine and configures only +the fabric connected to it. (If the local machine has other IB ports, +opensm will ignore the fabrics connected to those other ports). If no port is +specified, it will select the first "best" available port. + +opensm can present the available ports and prompt for a port number to +attach to. + +By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log. +The first file will register only general major events, whereas the second +will include details of reported errors. All errors reported in this second +file should be treated as indicators of IB fabric health issues. +(Note that when a fatal and non-recoverable error occurs, opensm will exit.) +Both log files should include the message "SUBNET UP" if opensm was able to +setup the subnet correctly. + +.SH OPTIONS + +.PP +.TP +\fB\-\-version\fR +Prints OpenSM version and exits. +.TP +\fB\-F\fR, \fB\-\-config\fR +The name of the OpenSM config file. When not specified +\fB\% @OPENSM_CONFIG_DIR@/@OPENSM_CONFIG_FILE@\fP will be used (if exists). +.TP +\fB\-c\fR, \fB\-\-create-config\fR +OpenSM will dump its configuration to the specified file and exit. +This is a way to generate OpenSM configuration file template. +.TP +\fB\-g\fR, \fB\-\-guid\fR +This option specifies the local port GUID value +with which OpenSM should bind. OpenSM may be +bound to 1 port at a time. +If GUID given is 0, OpenSM displays a list +of possible port GUIDs and waits for user input. +Without -g, OpenSM tries to use the default port. +.TP +\fB\-l\fR, \fB\-\-lmc\fR +This option specifies the subnet's LMC value. +The number of LIDs assigned to each port is 2^LMC. +The LMC value must be in the range 0-7. +LMC values > 0 allow multiple paths between ports. +LMC values > 0 should only be used if the subnet +topology actually provides multiple paths between +ports, i.e. multiple interconnects between switches. +Without -l, OpenSM defaults to LMC = 0, which allows +one path between any two ports. +.TP +\fB\-p\fR, \fB\-\-priority\fR +This option specifies the SM\'s PRIORITY. +This will effect the handover cases, where master +is chosen by priority and GUID. Range goes from 0 +(default and lowest priority) to 15 (highest). +.TP +\fB\-\-subnet_prefix\fR +This option specifies the subnet prefix to use in +on the fabric. The default prefix is +0xfe80000000000000. +.TP +\fB\-\-smkey\fR +This option specifies the SM\'s SM_Key (64 bits). +This will effect SM authentication. +Note that OpenSM version 3.2.1 and below used the default value '1' +in a host byte order, it is fixed now but you may need this option to +interoperate with old OpenSM running on a little endian machine. +.TP +\fB\-\-sm_sl\fR +This option sets the SL to use for communication with the SM/SA. +Defaults to 0. +.TP +\fB\-r\fR, \fB\-\-reassign_lids\fR +This option causes OpenSM to reassign LIDs to all +end nodes. Specifying -r on a running subnet +may disrupt subnet traffic. +Without -r, OpenSM attempts to preserve existing +LID assignments resolving multiple use of same LID. +.TP +\fB\-R\fR, \fB\-\-routing_engine\fR +This option chooses routing engine(s) to use instead of Min Hop +algorithm (default). Multiple routing engines can be specified +separated by commas so that specific ordering of routing algorithms +will be tried if earlier routing engines fail. If all configured +routing engines fail, OpenSM will always attempt to route with Min Hop +unless 'no_fallback' is included in the list of routing engines. +Supported engines: minhop, updn, dnup, file, ftree, lash, dor, torus-2QoS, +nue, dfsssp, sssp. +.TP +\fB\-\-do_mesh_analysis\fR +This option enables additional analysis for the lash routing engine to +precondition switch port assignments in regular cartesian meshes which +may reduce the number of SLs required to give a deadlock free routing. +.TP +\fB\-\-lash_start_vl\fR +This option sets the starting VL to use for the lash routing algorithm. +Defaults to 0. +.TP +\fB\-\-nue_max_num_vls\fR +This option sets the maximum number of VLs to use for the Nue routing engine. +Every number greater or equal to 0 is allowed, and the default is 1 to enforce +deadlock-freedom even if QoS is not enabled. If set to 0, then Nue routing will +automatically determine and choose maximum supported by the fabric. And if set +to any integer >= 1, then Nue uses min(max_supported,nue_max_num_vls). +Rule of thumb is: higher nue_max_num_vls results in better path balancing. +.TP +\fB\-A\fR, \fB\-\-ucast_cache\fR +This option enables unicast routing cache and prevents routing +recalculation (which is a heavy task in a large cluster) when +there was no topology change detected during the heavy sweep, or +when the topology change does not require new routing calculation, +e.g. when one or more CAs/RTRs/leaf switches going down, or one or +more of these nodes coming back after being down. +A very common case that is handled by the unicast routing cache +is host reboot, which otherwise would cause two full routing +recalculations: one when the host goes down, and the other when +the host comes back online. +.TP +\fB\-z\fR, \fB\-\-connect_roots\fR +This option enforces routing engines (up/down and +fat-tree) to make connectivity between root switches and in +this way to be fully IBA compliant. In many cases this can +violate "pure" deadlock free algorithm, so use it carefully. +.TP +\fB\-M\fR, \fB\-\-lid_matrix_file\fR +This option specifies the name of the lid matrix dump file +from where switch lid matrices (min hops tables) will be +loaded. +.TP +\fB\-U\fR, \fB\-\-lfts_file\fR +This option specifies the name of the LFTs file +from where switch forwarding tables will be loaded when using "file" routing +engine. +.TP +\fB\-S\fR, \fB\-\-sadb_file\fR +This option specifies the name of the SA DB dump file +from where SA database will be loaded. +.TP +\fB\-a\fR, \fB\-\-root_guid_file\fR +Set the root nodes for the Up/Down or Fat-Tree routing +algorithm to the guids provided in the given file (one to a line). +.TP +\fB\-u\fR, \fB\-\-cn_guid_file\fR +Set the compute nodes for the Fat-Tree or DFSSSP/SSSP routing algorithms +to the port GUIDs provided in the given file (one to a line). +.TP +\fB\-G\fR, \fB\-\-io_guid_file\fR +Set the I/O nodes for the Fat-Tree or DFSSSP/SSSP routing algorithms +to the port GUIDs provided in the given file (one to a line). +.br +In the case of Fat-Tree routing: +.br +I/O nodes are non-CN nodes allowed to use up to max_reverse_hops switches +the wrong way around to improve connectivity. +.br +In the case of (DF)SSSP routing: +.br +Providing guids of compute and/or I/O nodes will ensure that paths towards +those nodes are as much separated as possible within their node category, +i.e., I/O traffic will not share the same link if multiple links are available. +.TP +\fB\-\-port\-shifting\fR +This option enables a feature called \fBport shifting\fR. In some +fabrics, particularly cluster environments, routes commonly align and +congest with other routes due to algorithmically unchanging traffic +patterns. This routing option will "shift" routing around in an +attempt to alleviate this problem. +.TP +\fB\-\-scatter\-ports\fR +This option is used to randomize port selection in routing rather than +using a round-robin algorithm (which is the default). Value supplied +with option is used as a random seed. If value is 0, +which is the default, the scatter ports option is disabled. +.TP +\fB\-H\fR, \fB\-\-max_reverse_hops\fR +Set the maximum number of reverse hops an I/O node is allowed +to make. A reverse hop is the use of a switch the wrong way around. +.TP +\fB\-m\fR, \fB\-\-ids_guid_file\fR +Name of the map file with set of the IDs which will be used +by Up/Down routing algorithm instead of node GUIDs +(format: per line). +.TP +\fB\-X\fR, \fB\-\-guid_routing_order_file\fR +Set the order port guids will be routed for the MinHop +and Up/Down routing algorithms to the guids provided in the +given file (one to a line). +.TP +\fB\-o\fR, \fB\-\-once\fR +This option causes OpenSM to configure the subnet +once, then exit. Ports remain in the ACTIVE state. +.TP +\fB\-s\fR, \fB\-\-sweep\fR +This option specifies the number of seconds between +subnet sweeps. Specifying -s 0 disables sweeping. +Without -s, OpenSM defaults to a sweep interval of +10 seconds. +.TP +\fB\-t\fR, \fB\-\-timeout\fR +This option specifies the time in milliseconds +used for transaction timeouts. +Timeout values should be > 0. +Without -t, OpenSM defaults to a timeout value of +200 milliseconds. +.TP +\fB\-\-retries\fR +This option specifies the number of retries used +for transactions. +Without --retries, OpenSM defaults to 3 retries +for transactions. +.TP +\fB\-\-maxsmps\fR +This option specifies the number of VL15 SMP MADs +allowed on the wire at any one time. +Specifying \-\-maxsmps 0 allows unlimited outstanding +SMPs. +Without \-\-maxsmps, OpenSM defaults to a maximum of +4 outstanding SMPs. +.TP +\fB\-\-console [off | local | loopback | socket]\fR +This option brings up the OpenSM console (default off). Note, loopback and +socket open a socket which can be connected to WITHOUT CREDENTIALS. Loopback +is safer if access to your SM host is controlled. tcp_wrappers +(hosts.[allow|deny]) is used with loopback and socket. loopback and socket +will only be available if OpenSM was built with --enable-console-loopback +(default yes) and --enable-console-socket (default no) respectively. +.TP +\fB\-\-console-port\fR +Specify an alternate telnet port for the socket console (default 10000). +Note that this option only appears if OpenSM was built with +--enable-console-socket. +.TP +\fB\-i\fR, \fB\-\-ignore_guids\fR +This option provides the means to define a set of ports +(by node guid and port number) that will be ignored by the link load +equalization algorithm. +.TP +\fB\-w\fR, \fB\-\-hop_weights_file\fR +This option provides weighting factors per port representing a hop cost in +computing the lid matrix. The file consists of lines containing a switch port +GUID (specified as a 64 bit hex number, with leading 0x), output port number, +and weighting factor. Any port not listed in the file defaults to a weighting +factor of 1. Lines starting with # are comments. Weights affect only the +output route from the port, so many useful configurations will require weights +to be specified in pairs. +.TP +\fB\-O\fR, \fB\-\-port_search_ordering_file\fR +This option tweaks the routing. It suitable for two cases: +1. While using DOR routing algorithm. +This option provides a mapping between hypercube dimensions and ports +on a per switch basis for the DOR routing engine. The file consists +of lines containing a switch node GUID (specified as a 64 bit hex +number, with leading 0x) followed by a list of non-zero port numbers, +separated by spaces, one switch per line. The order for the port +numbers is in one to one correspondence to the dimensions. Ports not +listed on a line are assigned to the remaining dimensions, in port +order. Anything after a # is a comment. +2. While using general routing algorithm. +This option provides the order of the ports that would be chosen for routing, +from each switch rather than searching for an appropriate port from port 1 to N. +The file consists of lines containing a switch node GUID (specified as a 64 bit +hex number, with leading 0x) followed by a list of non-zero port numbers, +separated by spaces, one switch per line. In case of DOR, the order for the +port numbers is in one to one correspondence to the dimensions. Ports not +listed on a line are assigned to the remaining dimensions, in port +order. Anything after a # is a comment. +.TP +\fB\-O\fR, \fB\-\-dimn_ports_file\fR \fB(DEPRECATED)\fR +This is a deprecated flag. Please use \fB\-\-port_search_ordering_file\fR instead. +This option provides a mapping between hypercube dimensions and ports +on a per switch basis for the DOR routing engine. The file consists +of lines containing a switch node GUID (specified as a 64 bit hex +number, with leading 0x) followed by a list of non-zero port numbers, +separated by spaces, one switch per line. The order for the port +numbers is in one to one correspondence to the dimensions. Ports not +listed on a line are assigned to the remaining dimensions, in port +order. Anything after a # is a comment. +.TP +\fB\-x\fR, \fB\-\-honor_guid2lid\fR +This option forces OpenSM to honor the guid2lid file, +when it comes out of Standby state, if such file exists +under OSM_CACHE_DIR, and is valid. +By default, this is FALSE. +.TP +\fB\-\-dump_files_dir +This option will set the directory to hold the file dumps. +.TP +\fB\-f\fR, \fB\-\-log_file\fR +This option defines the log to be the given file. +By default, the log goes to /var/log/opensm.log. +For the log to go to standard output use -f stdout. +.TP +\fB\-L\fR, \fB\-\-log_limit\fR +This option defines maximal log file size in MB. When +specified the log file will be truncated upon reaching +this limit. +.TP +\fB\-e\fR, \fB\-\-erase_log_file\fR +This option will cause deletion of the log file +(if it previously exists). By default, the log file +is accumulative. +.TP +\fB\-P\fR, \fB\-\-Pconfig\fR +This option defines the optional partition configuration file. +The default name is \fB\%@OPENSM_CONFIG_DIR@/@PARTITION_CONFIG_FILE@\fP. +.TP +\fB\-\-prefix_routes_file\fR +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. The +.B PREFIX ROUTES +section below describes the format of the configuration file. +The default path is \fB\%@OPENSM_CONFIG_DIR@/prefix\-routes.conf\fP. +.TP +\fB\-Q\fR, \fB\-\-qos\fR +This option enables QoS setup. It is disabled by default. +.TP +\fB\-Y\fR, \fB\-\-qos_policy_file\fR +This option defines the optional QoS policy file. The default +name is \fB\%@OPENSM_CONFIG_DIR@/@QOS_POLICY_FILE@\fP. See +QoS_management_in_OpenSM.txt in opensm doc for more information on +configuring QoS policy via this file. +.TP +\fB\-\-congestion_control\fR +(EXPERIMENTAL) This option enables congestion control configuration. +It is disabled by default. See config file for congestion control +configuration options. +\fB\-\-cc_key\fR +(EXPERIMENTAL) This option configures the CCkey to use when configuring +congestion control. Note that this option does not configure a new +CCkey into switches and CAs. Defaults to 0. +.TP +\fB\-N\fR, \fB\-\-no_part_enforce\fR \fB(DEPRECATED)\fR +This is a deprecated flag. Please use \fB\-\-part_enforce\fR instead. +This option disables partition enforcement on switch external ports. +.TP +\fB\-Z\fR, \fB\-\-part_enforce\fR [both | in | out | off] +This option indicates the partition enforcement type (for switches). +Enforcement type can be inbound only (in), outbound only (out), +both or disabled (off). Default is both. +.TP +\fB\-W\fR, \fB\-\-allow_both_pkeys\fR +This option indicates whether both full and limited membership on the +same partition can be configured in the PKeyTable. Default is not +to allow both pkeys. +.TP +\fB\-y\fR, \fB\-\-stay_on_fatal\fR +This option will cause SM not to exit on fatal initialization +issues: if SM discovers duplicated guids or a 12x link with +lane reversal badly configured. +By default, the SM will exit on these errors. +.TP +\fB\-B\fR, \fB\-\-daemon\fR +Run in daemon mode - OpenSM will run in the background. +.TP +\fB\-J\fR, \fB\-\-pidfile \fR +Makes the SM write its own PID to the specified file when started in daemon +mode. +.TP +\fB\-I\fR, \fB\-\-inactive\fR +Start SM in inactive rather than init SM state. This +option can be used in conjunction with the perfmgr so as to +run a standalone performance manager without SM/SA. However, +this is NOT currently implemented in the performance manager. +.TP +\fB\-\-perfmgr\fR +Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at +configure time. See performance-manager-HOWTO.txt in opensm doc for +more information on running perfmgr. +.TP +\fB\-\-perfmgr_sweep_time_s\fR +Specify the sweep time for the performance manager in seconds +(default is 180 seconds). Only takes +effect if --enable-perfmgr was specified at configure time. +.TP +.BI --consolidate_ipv6_snm_req +Use shared MLID for IPv6 Solicited Node Multicast groups per MGID scope +and P_Key. +.TP +\fB\-\-log_prefix\fR +This option specifies the prefix to the syslog messages from OpenSM. +A suitable prefix can be used to identify the IB subnet in syslog messages +when two or more instances of OpenSM run in a single node to manage multiple +fabrics. For example, in a dual-fabric (or dual-rail) IB cluster, the prefix +for the first fabric could be "mpi" and the other fabric could be "storage". +.TP +\fB\-\-torus_config\fR +This option defines the file name for the extra configuration +information needed for the torus-2QoS routing engine. The default +name is \fB\%@OPENSM_CONFIG_DIR@/@TORUS2QOS_CONF_FILE@\fP +.TP +\fB\-v\fR, \fB\-\-verbose\fR +This option increases the log verbosity level. +The -v option may be specified multiple times +to further increase the verbosity level. +See the -D option for more information about +log verbosity. +.TP +\fB\-V\fR +This option sets the maximum verbosity level and +forces log flushing. +The -V option is equivalent to \'-D 0xFF -d 2\'. +See the -D option for more information about +log verbosity. +.TP +\fB\-D\fR +This option sets the log verbosity level. +A flags field must follow the -D option. +A bit set/clear in the flags enables/disables a +specific log level as follows: + + BIT LOG LEVEL ENABLED + ---- ----------------- + 0x01 - ERROR (error messages) + 0x02 - INFO (basic messages, low volume) + 0x04 - VERBOSE (interesting stuff, moderate volume) + 0x08 - DEBUG (diagnostic, high volume) + 0x10 - FUNCS (function entry/exit, very high volume) + 0x20 - FRAMES (dumps all SMP and GMP frames) + 0x40 - ROUTING (dump FDB routing information) + 0x80 - SYS (syslog at LOG_INFO level in addition to OpenSM logging) + +Without -D, OpenSM defaults to ERROR + INFO (0x3). +Specifying -D 0 disables all messages. +Specifying -D 0xFF enables all messages (see -V). +High verbosity levels may require increasing +the transaction timeout with the -t option. +.TP +\fB\-d\fR, \fB\-\-debug\fR +This option specifies a debug option. +These options are not normally needed. +The number following -d selects the debug +option to enable as follows: + + OPT Description + --- ----------------- + -d0 - Ignore other SM nodes + -d1 - Force single threaded dispatching + -d2 - Force log flushing after each log message + -d3 - Disable multicast support +.TP +\fB\-h\fR, \fB\-\-help\fR +Display this usage info then exit. +.TP +\fB\-?\fR +Display this usage info then exit. + +.SH ENVIRONMENT VARIABLES +.PP +The following environment variables control opensm behavior: + +OSM_TMP_DIR - controls the directory in which the temporary files generated by +opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and +opensm.mcfdbs. By default, this directory is /var/log. Note that +\-\-dump_files_dir command line option or dump_file_dir option in +option/config file takes precedence over this environment variable. + +OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent +runs are consistent. The default directory used is /var/cache/opensm. +The following files are included in it: + + guid2lid - stores the LID range assigned to each GUID + guid2mkey - stores the MKey previously assigned to each GUID + neighbors - stores a map of the GUIDs at either end of each link + in the fabric + +.SH NOTES +.PP +When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found. +.PP +Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for +logrotate purposes. + +.SH PARTITION CONFIGURATION +.PP +The default name of OpenSM partitions configuration file is +\fB\%@OPENSM_CONFIG_DIR@/@PARTITION_CONFIG_FILE@\fP. The default may be changed +by using the --Pconfig (-P) option with OpenSM. + +The default partition will be created by OpenSM unconditionally even +when partition configuration file does not exist or cannot be accessed. + +The default partition has P_Key value 0x7fff. OpenSM\'s port will always +have full membership in default partition. All other end ports will have +full membership if the partition configuration file is not found or cannot +be accessed, or limited membership if the file exists and can be accessed +but there is no rule for the Default partition. + +Effectively, this amounts to the same as if one of the following rules +below appear in the partition configuration file. + +In the case of no rule for the Default partition: + +Default=0x7fff : ALL=limited, SELF=full ; + +In the case of no partition configuration file or file cannot be accessed: + +Default=0x7fff : ALL=full ; + + +File Format + +Comments: + +Line content followed after \'#\' character is comment and ignored by +parser. + +General file format: + +:[]; + + Partition Definition: +.nf + [PartitionName][=PKey][,indx0][,ipoib_bc_flags][,defmember=full|limited] +.fi + + PartitionName - string, will be used with logging. When + omitted, empty string will be used. + PKey - P_Key value for this partition. Only low 15 + bits will be used. When omitted will be + autogenerated. + indx0 - indicates that this pkey should be inserted in + block 0 index 0. + ipoib_bc_flags - used to indicate/specify IPoIB capability of + this partition. + + defmember=full|limited|both - specifies default membership for + port guid list. Default is limited. + + ipoib_bc_flags: + ipoib_flag|[mgroup_flag]* + + ipoib_flag: + ipoib - indicates that this partition may be used for + IPoIB, as a result the IPoIB broadcast group will + be created with the mgroup_flag flags given, + if any. + + Partition Properties: + [|]* | + + Port list: + [,] + + Port Specifier: + [=[full|limited|both]] + + PortGUID - GUID of partition member EndPort. + Hexadecimal numbers should start from + 0x, decimal numbers are accepted too. + full, limited, - indicates full and/or limited membership for + both this port. When omitted (or unrecognized) + limited membership is assumed. Both + indicates both full and limited membership + for this port. + + MCast Group: + mgid=gid[,mgroup_flag]* + + - gid specified is verified to be a Multicast + address. IP groups are verified to match + the rate and mtu of the broadcast group. + The P_Key bits of the mgid for IP groups are + verified to either match the P_Key specified + in by "Partition Definition" or if they are + 0x0000 the P_Key will be copied into those + bits. + + mgroup_flag: + rate= - specifies rate for this MC group + (default is 3 (10GBps)) + mtu= - specifies MTU for this MC group + (default is 4 (2048)) + sl= - specifies SL for this MC group + (default is 0) + scope= - specifies scope for this MC group + (default is 2 (link local)). Multiple scope + settings are permitted for a partition. + NOTE: This overwrites the scope nibble of the + specified mgid. Furthermore specifying + multiple scope settings will result in + multiple MC groups being created. + Q_Key= - specifies the Q_Key for this MC group + (default: 0x0b1b for IP groups, 0 for other + groups) + WARNING: changing this for the broadcast + group may break IPoIB on client + nodes!! + TClass= - specifies tclass for this MC group + (default is 0) + FlowLabel= - specifies FlowLabel for this MC group + (default is 0) + NOTE: All mgroup_flag flags MUST be separated by comma (,). + +Note that values for rate, mtu, and scope, for both partitions and multicast +groups, should be specified as defined in the IBTA specification (for example, +mtu=4 for 2048). + +There are several useful keywords for PortGUID definition: + + - 'ALL' means all end ports in this subnet. + - 'ALL_CAS' means all Channel Adapter end ports in this subnet. + - 'ALL_SWITCHES' means all Switch end ports in this subnet. + - 'ALL_ROUTERS' means all Router end ports in this subnet. + - 'SELF' means subnet manager's port. + +Empty list means no ports in this partition. + +Notes: + +White space is permitted between delimiters ('=', ',',':',';'). + +PartitionName does not need to be unique, PKey does need to be unique. +If PKey is repeated then those partition configurations will be merged +and first PartitionName will be used (see also next note). + +It is possible to split partition configuration in more than one +definition, but then PKey should be explicitly specified (otherwise +different PKey values will be generated for those definitions). + +Examples: + + Default=0x7fff : ALL, SELF=full ; + Default=0x7fff : ALL, ALL_SWITCHES=full, SELF=full ; + + NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; + + YetAnotherOne = 0x300 : SELF=full ; + YetAnotherOne = 0x300 : ALL=limited ; + + ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; + # 0x123453, 0x123454 will be limited + ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; + # 0x123456, 0x123457 will be limited + ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full; + ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; + ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; + + # multicast groups added to default + Default=0x7fff,ipoib: + mgid=ff12:401b::0707,sl=1 # random IPv4 group + mgid=ff12:601b::16 # MLDv2-capable routers + mgid=ff12:401b::16 # IGMP + mgid=ff12:601b::2 # All routers + mgid=ff12::1,sl=1,Q_Key=0xDEADBEEF,rate=3,mtu=2 # random group + ALL=full; + + +Note: + +The following rule is equivalent to how OpenSM used to run prior to the +partition manager: + + Default=0x7fff,ipoib:ALL=full; + +.SH QOS CONFIGURATION +.PP +There are a set of QoS related low-level configuration parameters. +All these parameter names are prefixed by "qos_" string. Here is a full +list of these parameters: + + qos_max_vls - The maximum number of VLs that will be on the subnet + qos_high_limit - The limit of High Priority component of VL + Arbitration table (IBA 7.6.9) + qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) + template + qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) + template + Both VL arbitration templates are pairs of + VL and weight + qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is + a list of VLs corresponding to SLs 0-15 (Note + that VL15 used here means drop this SL) + +Typical default values (hard-coded in OpenSM initialization) are: + + qos_max_vls 15 + qos_high_limit 0 +.nf + qos_vlarb_low 0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 + qos_vlarb_high 0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 +.fi + qos_sl2vl 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + +The syntax is compatible with rest of OpenSM configuration options and +values may be stored in OpenSM config file (cached options file). + +In addition to the above, we may define separate QoS configuration +parameters sets for various target types. As targets, we currently support +CAs, routers, switch external ports, and switch's enhanced port 0. The +names of such specialized parameters are prefixed by "qos__" +string. Here is a full list of the currently supported sets: + + qos_ca_ - QoS configuration parameters set for CAs. + qos_rtr_ - parameters set for routers. + qos_sw0_ - parameters set for switches' port 0. + qos_swe_ - parameters set for switches' external ports. + +Examples: + qos_sw0_max_vls=2 + qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, + qos_swe_high_limit=0 + +.SH PREFIX ROUTES +.PP +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. +Note that IBA does not specify how the SA should obtain off-subnet path +record information. +The prefix routes configuration is meant as a stop-gap until the +specification is completed. +.PP +Each line in the configuration file is a 64-bit prefix followed by a +64-bit GUID, separated by white space. +The GUID specifies the router port on the local subnet that will +handle the prefix. +Blank lines are ignored, as is anything between a \fB#\fP character +and the end of the line. +The prefix and GUID are both in hex, the leading 0x is optional. +Either, or both, can be wild-carded by specifying an +asterisk instead of an explicit prefix or GUID. +.PP +When responding to a path record query for an off-subnet DGID, +opensm searches for the first prefix match in the configuration file. +Therefore, the order of the lines in the configuration file is important: +a wild-carded prefix at the beginning of the configuration file renders +all subsequent lines useless. +If there is no match, then opensm fails the query. +It is legal to repeat prefixes in the configuration file, +opensm will return the path to the first available matching router. +A configuration file with a single line where both prefix and GUID +are wild-carded means that a path record query specifying any +off-subnet DGID should return a path to the first available router. +This configuration yields the same behavior formerly achieved by +compiling opensm with -DROUTER_EXP which has been obsoleted. + +.SH MKEY CONFIGURATION +.PP +OpenSM supports configuring a single management key (MKey) for use across +the subnet. + +The following configuration options are available: + + m_key - the 64-bit MKey to be used on the subnet + (IBA 14.2.4) + m_key_protection_level - the numeric value of the MKey ProtectBits + (IBA 14.2.4.1) + m_key_lease_period - the number of seconds a CA will wait for a + response from the SM before resetting the + protection level to 0 (IBA 14.2.4.2). + +OpenSM will configure all ports with the MKey specified by m_key, defaulting +to a value of 0. A m_key value of 0 disables MKey protection on the subnet. +Switches and HCAs with a non-zero MKey will not accept requests to change +their configuration unless the request includes the proper MKey. + +MKey Protection Levels + +MKey protection levels modify how switches and CAs respond to SMPs lacking +a valid MKey. +OpenSM will configure each port's ProtectBits to support the level defined by +the m_key_protection_level parameter. If no parameter is specified, OpenSM +defaults to operating at protection level 0. + +There are currently 4 protection levels defined by the IBA: + + 0 - Queries return valid data, including MKey. Configuration changes + are not allowed unless the request contains a valid MKey. + 1 - Like level 0, but the MKey is set to 0 (0x00000000) in queries, + unless the request contains a valid MKey. + 2 - Neither queries nor configuration changes are allowed, unless the + request contains a valid MKey. + 3 - Identical to 2. Maintained for backwards compatibility. + +MKey Lease Period + +InfiniBand supports a MKey lease timeout, which is intended to allow +administrators or a new SM to recover/reset lost MKeys on a fabric. + +If MKeys are enabled on the subnet and a switch or CA receives a request that +requires a valid MKey but does not contain one, it warns the SM by sending a trap +(Bad M_Key, Trap 256). If the MKey lease period is non-zero, it also starts a +countdown timer for the time specified by the lease period. +If a SM (or other agent) responds with the correct MKey, the timer is stopped +and reset. Should the timer reach zero, the switch or CA will reset its MKey +protection level to 0, exposing the MKey and allowing recovery. + +OpenSM will initialize all ports to use a mkey lease period of the number of +seconds specified in the config file. If no mkey_lease_period is specified, +a default of 0 will be used. + +OpenSM normally quickly responds to all Bad_M_Key traps, resetting the lease +timers. Additionally, OpenSM's subnet sweeps will also cancel +any running timers. For maximum protection against accidentally-exposed MKeys, +the MKey lease time should be a few multiples of the subnet sweep time. +If OpenSM detects at startup that your sweep interval is greater than your +MKey lease period, it will reset the lease period to be greater than the +sweep interval. Similarly, if sweeping is disabled at startup, it will be +re-enabled with an interval less than the Mkey lease period. + +If OpenSM is required to recover a subnet for which it is missing mkeys, +it must do so one switch level at a time. As such, the total time to +recover the subnet may be as long as the mkey lease period multiplied by +the maximum number of hops between the SM and an endpoint, plus one. + +MKey Effects on Diagnostic Utilities + +Setting a MKey may have a detrimental effect on diagnostic software run on +the subnet, unless your diagnostic software is able to retrieve MKeys from the +SA or can be explicitly configured with the proper MKey. This is particularly +true at protection level 2, where CAs will ignore queries for management +information that do not contain the proper MKey. + +.SH ROUTING +.PP +OpenSM now offers ten routing engines: + +1. Min Hop Algorithm - based on the minimum hops to each node where the +path length is optimized. + +2. UPDN Unicast routing algorithm - also based on the minimum hops to each +node, but it is constrained to ranking rules. This algorithm should be chosen +if the subnet is not a pure Fat Tree, and deadlock may occur due to a +loop in the subnet. + +3. DNUP Unicast routing algorithm - similar to UPDN but allows routing in +fabrics which have some CA nodes attached closer to the roots than some switch +nodes. + +4. Fat Tree Unicast routing algorithm - this algorithm optimizes routing +for congestion-free "shift" communication pattern. +It should be chosen if a subnet is a symmetrical or almost symmetrical +fat-tree of various types, not just K-ary-N-Trees: non-constant K, not +fully staffed, any Constant Bisectional Bandwidth (CBB) ratio. +Similar to UPDN, Fat Tree routing is constrained to ranking rules. + +5. LASH unicast routing algorithm - uses InfiniBand virtual layers +(SL) to provide deadlock-free shortest-path routing while also +distributing the paths between layers. LASH is an alternative +deadlock-free topology-agnostic routing algorithm to the non-minimal +UPDN algorithm avoiding the use of a potentially congested root node. + +6. DOR Unicast routing algorithm - based on the Min Hop algorithm, but +avoids port equalization except for redundant links between the same +two switches. This provides deadlock free routes for hypercubes when +the fabric is cabled as a hypercube and for meshes when cabled as a +mesh (see details below). + +7. Torus-2QoS unicast routing algorithm - a DOR-based routing algorithm +specialized for 2D/3D torus topologies. Torus-2QoS provides deadlock-free +routing while supporting two quality of service (QoS) levels. In addition +it is able to route around multiple failed fabric links or a single failed +fabric switch without introducing deadlocks, and without changing path SL +values granted before the failure. + +8. DFSSSP unicast routing algorithm - a deadlock-free +single-source-shortest-path routing, which uses the SSSP algorithm +(see algorithm 9.) as the base to optimize link utilization and uses +InfiniBand virtual lanes (SL) to provide deadlock-freedom. + +9. SSSP unicast routing algorithm - a single-source-shortest-path routing +algorithm, which globally balances the number of routes per link to +optimize link utilization. This routing algorithm has no restrictions +in terms of the underlying topology. + +10. Nue unicast routing algorithm - a 100%-applicable and deadlock-free routing +which can be used for any arbitrary or faulty network topology and any number +of virtual lanes (this includes the absence of VLs as well). Paths are globally +balanced w.r.t the number of routes per link, and are kept as short as possible +while enforcing deadlock-freedom within the VL constraint. + +OpenSM also supports a file method which +can load routes from a table. See \'Modular Routing Engine\' for more +information on this. + +The basic routing algorithm is comprised of two stages: + +1. MinHop matrix calculation + How many hops are required to get from each port to each LID ? + The algorithm to fill these tables is different if you run standard +(min hop) or Up/Down. + For standard routing, a "relaxation" algorithm is used to propagate +min hop from every destination LID through neighbor switches + For Up/Down routing, a BFS from every target is used. The BFS tracks link +direction (up or down) and avoid steps that will perform up after a down +step was used. + +2. Once MinHop matrices exist, each switch is visited and for each target LID a +decision is made as to what port should be used to get to that LID. + This step is common to standard and Up/Down routing. Each port has a +counter counting the number of target LIDs going through it. + When there are multiple alternative ports with same MinHop to a LID, +the one with less previously assigned LIDs is selected. + If LMC > 0, more checks are added: Within each group of LIDs assigned to +same target port, + a. use only ports which have same MinHop + b. first prefer the ones that go to different systemImageGuid (then +the previous LID of the same LMC group) + c. if none - prefer those which go through another NodeGuid + d. fall back to the number of paths method (if all go to same node). + +Effect of Topology Changes + +OpenSM will preserve existing routing in any case where there is no change in +the fabric switches unless the -r (--reassign_lids) option is specified. + +-r +.br +--reassign_lids + This option causes OpenSM to reassign LIDs to all + end nodes. Specifying -r on a running subnet + may disrupt subnet traffic. + Without -r, OpenSM attempts to preserve existing + LID assignments resolving multiple use of same LID. + +If a link is added or removed, OpenSM does not recalculate +the routes that do not have to change. A route has to change +if the port is no longer UP or no longer the MinHop. When routing changes +are performed, the same algorithm for balancing the routes is invoked. + +In the case of using the file based routing, any topology changes are +currently ignored The 'file' routing engine just loads the LFTs from the file +specified, with no reaction to real topology. Obviously, this will not be able +to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent +switches will be skipped. Multicast is not affected by 'file' routing engine +(this uses min hop tables). + + +Min Hop Algorithm + +The Min Hop algorithm is invoked by default if no routing algorithm is +specified. It can also be invoked by specifying '-R minhop'. + +The Min Hop algorithm is divided into two stages: computation of +min-hop tables on every switch and LFT output port assignment. Link +subscription is also equalized with the ability to override based on +port GUID. The latter is supplied by: + +-i +.br +\-\-ignore_guids + This option provides the means to define a set of ports + (by guid) that will be ignored by the link load + equalization algorithm. Note that only endports (CA, + switch port 0, and router ports) and not switch external + ports are supported. + +LMC awareness routes based on (remote) system or switch basis. + + +Purpose of UPDN Algorithm + +The UPDN algorithm is designed to prevent deadlocks from occurring in loops +of the subnet. A loop-deadlock is a situation in which it is no longer +possible to send data between any two hosts connected through the loop. As +such, the UPDN routing algorithm should be used if the subnet is not a pure +Fat Tree, and one of its loops may experience a deadlock (due, for example, +to high pressure). + +The UPDN algorithm is based on the following main stages: + +1. Auto-detect root nodes - based on the CA hop length from any switch in +the subnet, a statistical histogram is built for each switch (hop num vs +number of occurrences). If the histogram reflects a specific column (higher +than others) for a certain node, then it is marked as a root node. Since +the algorithm is statistical, it may not find any root nodes. The list of +the root nodes found by this auto-detect stage is used by the ranking +process stage. + + Note 1: The user can override the node list manually. + Note 2: If this stage cannot find any root nodes, and the user did + not specify a guid list file, OpenSM defaults back to the + Min Hop routing algorithm. + +2. Ranking process - All root switch nodes (found in stage 1) are assigned +a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the +subnet are ranked incrementally. This ranking aids in the process of enforcing +rules that ensure loop-free paths. + +3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from +each (CA or switch) node in the subnet. During the BFS process, the FDB table +of each switch node traversed by BFS is updated, in reference to the starting +node, based on the ranking rules and guid values. + +At the end of the process, the updated FDB tables ensure loop-free paths +through the subnet. + +Note: Up/Down routing does not allow LID routing communication between +switches that are located inside spine "switch systems". +The reason is that there is no way to allow a LID route between them +that does not break the Up/Down rule. +One ramification of this is that you cannot run SM on switches other +than the leaf switches of the fabric. + + +UPDN Algorithm Usage + +Activation through OpenSM + +Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. +Use '-a ' for adding an UPDN guid file that contains the +root nodes for ranking. +If the `-a' option is not used, OpenSM uses its auto-detect root nodes +algorithm. + +Notes on the guid list file: + +1. A valid guid file specifies one guid in each line. Lines with an invalid +format will be discarded. +.br +2. The user should specify the root switch guids. However, it is also +possible to specify CA guids; OpenSM will use the guid of the switch (if +it exists) that connects the CA to the subnet as a root node. + +Purpose of DNUP Algorithm + +The DNUP algorithm is designed to serve a similar purpose to UPDN. However +it is intended to work in network topologies which are unsuited to +UPDN due to nodes being connected closer to the roots than some of +the switches. An example would be a fabric which contains nodes and +uplinks connected to the same switch. The operation of DNUP is the +same as UPDN with the exception of the ranking process. In DNUP all +switch nodes are ranked based solely on their distance from CA Nodes, +all switch nodes directly connected to at least one CA are assigned a +value of 1 all other switch nodes are assigned a value of one more than +the minimum rank of all neighbor switch nodes. + +Fat-tree Routing Algorithm + +The fat-tree algorithm optimizes routing for "shift" communication pattern. +It should be chosen if a subnet is a symmetrical or almost symmetrical +fat-tree of various types. +It supports not just K-ary-N-Trees, by handling for non-constant K, +cases where not all leafs (CAs) are present, any CBB ratio. +As in UPDN, fat-tree also prevents credit-loop-deadlocks. + +If the root guid file is not provided ('-a' or '--root_guid_file' options), +the topology has to be pure fat-tree that complies with the following rules: + - Tree rank should be between two and eight (inclusively) + - Switches of the same rank should have the same number + of UP-going port groups*, unless they are root switches, + in which case the shouldn't have UP-going ports at all. + - Switches of the same rank should have the same number + of DOWN-going port groups, unless they are leaf switches. + - Switches of the same rank should have the same number + of ports in each UP-going port group. + - Switches of the same rank should have the same number + of ports in each DOWN-going port group. + - All the CAs have to be at the same tree level (rank). + +If the root guid file is provided, the topology doesn't have to be pure +fat-tree, and it should only comply with the following rules: + - Tree rank should be between two and eight (inclusively) + - All the Compute Nodes** have to be at the same tree level (rank). + Note that non-compute node CAs are allowed here to be at different + tree ranks. + +* ports that are connected to the same remote switch are referenced as +\'port group\'. + +** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\' +OpenSM options. + +Topologies that do not comply cause a fallback to min hop routing. +Note that this can also occur on link failures which cause the topology +to no longer be "pure" fat-tree. + +Note that although fat-tree algorithm supports trees with non-integer CBB +ratio, the routing will not be as balanced as in case of integer CBB ratio. +In addition to this, although the algorithm allows leaf switches to have any +number of CAs, the closer the tree is to be fully populated, the more +effective the "shift" communication pattern will be. +In general, even if the root list is provided, the closer the topology to a +pure and symmetrical fat-tree, the more optimal the routing will be. + +The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) +in the same directory where the OpenSM log resides. This ordering file provides +the CN order that may be used to create efficient communication pattern, that +will match the routing tables. + +Routing between non-CN nodes + +The use of the cn_guid_file option allows non-CN nodes to be located on different levels in the fat tree. +In such case, it is not guaranteed that the Fat Tree algorithm will route between two non-CN nodes. +To solve this problem, a list of non-CN nodes can be specified by \'-G\' or \'--io_guid_file\' option. +Theses nodes will be allowed to use switches the wrong way round a specific number of times (specified by \'-H\' or \'--max_reverse_hops\'. +With the proper max_reverse_hops and io_guid_file values, you can ensure full connectivity in the Fat Tree. + +Please note that using max_reverse_hops creates routes that use the switch in a counter-stream way. +This option should never be used to connect nodes with high bandwidth traffic between them ! It should only be used +to allow connectivity for HA purposes or similar. +Also having routes the other way around can in theory cause credit loops. + +Use these options with extreme care ! + +Activation through OpenSM + +Use '-R ftree' option to activate the fat-tree algorithm. +Use '-a ' to provide root nodes for ranking. If the `-a' option +is not used, routing algorithm will detect roots automatically. +Use '-u ' to provide the list of compute nodes. If the `-u' option +is not used, all the CAs are considered as compute nodes. + +Note: LMC > 0 is not supported by fat-tree routing. If this is +specified, the default routing algorithm is invoked instead. + + +LASH Routing Algorithm + +LASH is an acronym for LAyered SHortest Path Routing. It is a +deterministic shortest path routing algorithm that enables topology +agnostic deadlock-free routing within communication networks. + +When computing the routing function, LASH analyzes the network +topology for the shortest-path routes between all pairs of sources / +destinations and groups these paths into virtual layers in such a way +as to avoid deadlock. + +Note LASH analyzes routes and ensures deadlock freedom between switch +pairs. The link from HCA between and switch does not need virtual +layers as deadlock will not arise between switch and HCA. + +In more detail, the algorithm works as follows: + +1) LASH determines the shortest-path between all pairs of source / +destination switches. Note, LASH ensures the same SL is used for all +SRC/DST - DST/SRC pairs and there is no guarantee that the return +path for a given DST/SRC will be the reverse of the route SRC/DST. + +2) LASH then begins an SL assignment process where a route is assigned +to a layer (SL) if the addition of that route does not cause deadlock +within that layer. This is achieved by maintaining and analysing a +channel dependency graph for each layer. Once the potential addition +of a path could lead to deadlock, LASH opens a new layer and continues +the process. + +3) Once this stage has been completed, it is highly likely that the +first layers processed will contain more paths than the latter ones. +To better balance the use of layers, LASH moves paths from one layer +to another so that the number of paths in each layer averages out. + +Note, the implementation of LASH in opensm attempts to use as few layers +as possible. This number can be less than the number of actual layers +available. + +In general LASH is a very flexible algorithm. It can, for example, +reduce to Dimension Order Routing in certain topologies, it is topology +agnostic and fares well in the face of faults. + +It has been shown that for both regular and irregular topologies, LASH +outperforms Up/Down. The reason for this is that LASH distributes the +traffic more evenly through a network, avoiding the bottleneck issues +related to a root node and always routes shortest-path. + +The algorithm was developed by Simula Research Laboratory. + + +Use '-R lash -Q ' option to activate the LASH algorithm. + +Note: QoS support has to be turned on in order that SL/VL mappings are +used. + +Note: LMC > 0 is not supported by the LASH routing. If this is +specified, the default routing algorithm is invoked instead. + +For open regular cartesian meshes the DOR algorithm is the ideal +routing algorithm. For toroidal meshes on the other hand there +are routing loops that can cause deadlocks. LASH can be used to +route these cases. The performance of LASH can be improved by +preconditioning the mesh in cases where there are multiple links +connecting switches and also in cases where the switches are not +cabled consistently. An option exists for LASH to do this. To +invoke this use '-R lash -Q --do_mesh_analysis'. This will +add an additional phase that analyses the mesh to try to determine +the dimension and size of a mesh. If it determines that the mesh +looks like an open or closed cartesian mesh it reorders the ports +in dimension order before the rest of the LASH algorithm runs. + +DOR Routing Algorithm + +The Dimension Order Routing algorithm is based on the Min Hop +algorithm and so uses shortest paths. Instead of spreading traffic +out across different paths with the same shortest distance, it chooses +among the available shortest paths based on an ordering of dimensions. +Each port must be consistently cabled to represent a hypercube +dimension or a mesh dimension. Alternatively, the -O option can be +used to assign a custom mapping between the ports on a given switch, +and the associated dimension. Paths are grown from a destination back +to a source using the lowest dimension (port) of available paths at +each step. This provides the ordering necessary to avoid deadlock. +When there are multiple links between any two switches, they still +represent only one dimension and traffic is balanced across them +unless port equalization is turned off. In the case of hypercubes, +the same port must be used throughout the fabric to represent the +hypercube dimension and match on both ends of the cable, or the -O +option used to accomplish the alignment. In the case of meshes, the +dimension should consistently use the same pair of ports, one port on +one end of the cable, and the other port on the other end, continuing +along the mesh dimension, or the -O option used as an override. + +Use '-R dor' option to activate the DOR algorithm. + +DFSSSP and SSSP Routing Algorithm + +The (Deadlock-Free) Single-Source-Shortest-Path routing algorithm is +designed to optimize link utilization thru global balancing of routes, +while supporting arbitrary topologies. The DFSSSP routing algorithm +uses InfiniBand virtual lanes (SL) to provide deadlock-freedom. + +The DFSSSP algorithm consists of five major steps: +.br +1) It discovers the subnet and models the subnet as a directed +multigraph in which each node represents a node of the physical +network and each edge represents one direction of the full-duplex +links used to connect the nodes. +.br +2) A loop, which iterates over all CA and switches of the subnet, will +perform three steps to generate the linear forwarding tables for +each switch: +.br +2.1) use Dijkstra's algorithm to find the shortest path from all +nodes to the current selected destination; +.br +2.2) update the edge weights in the graph, i.e. add the number of +routes, which use a link to reach the destination, +to the link/edge; +.br +2.3) update the LFT of each switch with the outgoing port which was +used in the current step to route the traffic to the +destination node. +.br +3) After the number of available virtual lanes or layers in the subnet +is detected and a channel dependency graph is initialized for each +layer, the algorithm will put each possible route of the subnet into +the first layer. +.br +4) A loop iterates over all channel dependency graphs (CDG) and performs +the following substeps: +.br +4.1) search for a cycle in the current CDG; +.br +4.2) when a cycle is found, i.e. a possible deadlock is present, +one edge is selected and all routes, which induced this edge, +are moved to the "next higher" virtual layer (CDG[i+1]); +.br +4.3) the cycle search is continued until all cycles are broken and +routes are moved "up". +.br +5) When the number of needed layers does not exceeds the number of +available SL/VL to remove all cycles in all CDGs, the routing is +deadlock-free and an relation table is generated, which contains +the assignment of routes from source to destination to a SL + +Note on SSSP: +.br +This algorithm does not perform the steps 3)-5) and can not be +considered to be deadlock-free for all topologies. But on the one +hand, you can choose this algorithm for really large networks +(5,000+ CAs and deadlock-free by design) to reduce +the runtime of the algorithm. On the other hand, you might use +the SSSP routing algorithm as an alternative, when all deadlock-free +routing algorithms fail to route the network for whatever reason. +In the last case, SSSP was designed to deliver an equal or higher +bandwidth due to better congestion avoidance than the Min Hop +routing algorithm. + +Notes for usage: +.br +a) running DFSSSP: '-R dfsssp -Q' +.br +a.1) QoS has to be configured to equally spread the load on the +available SL or virtual lanes +.br +a.2) applications must perform a path record query to get path SL for +each route, which the application will use to transmit packages +.br +b) running SSSP: '-R sssp' +.br +c) both algorithms support LMC > 0 + +Hints for optimizing I/O traffic: +.br +Having more nodes (I/O and compute) connected to a switch than incoming links +can result in a 'bad' routing of the I/O traffic as long as (DF)SSSP routing +is not aware of the dedicated I/O nodes, i.e., in the following network +configuration CN1-CN3 might send all I/O traffic via Link2 to IO1,IO2: + + CN1 Link1 IO1 +.br + \\ /----\\ / +.br + CN2 -- Switch1 Switch2 -- CN4 +.br + / \\----/ \\ +.br + CN3 Link2 IO2 + +To prevent this from happening (DF)SSSP can use both the compute node guid +file and the I/O guid file specified by the \'-u\' or \'--cn_guid_file\' and +\'-G\' or \'--io_guid_file\' options (similar to the Fat-Tree routing). +This ensures that traffic towards compute nodes and I/O nodes is balanced +separately and therefore distributed as much as possible across the available +links. Port GUIDs, as listed by ibstat, must be specified (not Node GUIDs). +.br +The priority for the optimization is as follows: +.br + compute nodes -> I/O nodes -> other nodes +.br +Possible use case scenarios: +.br +a) neither \'-u\' nor \'-G\' are specified: all nodes a treated as \'other nodes\' +and therefore balanced equally; +.br +b) \'-G\' is specified: traffic towards I/O nodes will be balanced optimally; +.br +c) the system has three node types, such as login/admin, compute and I/O, +but the balancing focus should be I/O, then one has to use \'-u\' and \'-G\' +with I/O guids listed in cn_guid_file and compute node guids listed in +io_guid_file; +.br +d) ... + +Torus-2QoS Routing Algorithm + +Torus-2QoS is routing algorithm designed for large-scale 2D/3D torus fabrics; +see torus-2QoS(8) for full documentation. + +Use '-R torus-2QoS -Q' or '-R torus-2QoS,no_fallback -Q' +to activate the torus-2QoS algorithm. + +Nue Routing Algorithm + +Use either `-R nue' or `-R nue -Q --nue_max_num_vls ' to activate Nue. + +Note: if `--nue_max_num_vls' is specified and unequal to 1, then QoS support +must be turned on, so that SL2VL mappings are valid and applications comply with +suggested SLs to avoid credit-loops. For more details on QoS and Nue see below. + +The implementation of Nue routing for OpenSM is a 100%-applicable, balanced, and +deadlock-free unicast routing engine (which also configures multicast tables, +see 'Note on multicast' below). The key points of this algorithm are the +following: + - 100% fault-tolerant, oblivious routing strategy + - topology-agnostic, i.e., applicable to every topology (no matter if topology + is regular, irregular after faults, or random) + - 100% deadlock-free routing within the resource limits (i.e., it never + exceeds the given number of available virtual lanes, and it does not + necessarily require virtual lanes) for every topology + - very good path balancing and therefore high throughput (even better when + using METIS, see notes below) + - QoS (via SLs/VLs) + deadlock-freedom can be combined (since both rely on + VLs), e.g., using VL0-3 for Nue's deadlock-freedom (and 1. QoS level) and + VL4-7 as second QoS level + - forwarding tables are fast to calculate: O(n^2 * log n), however slightly + slower compared to topology-aware routings (for obvious reasons), and + - the path-to-VL mapping only depends on the destination, which may be useful + for scalable, efficient path resolution and caching mechanisms. +.br +From a very high level perspective, Nue routing is similar to DFSSSP (see above) +in the sense that both use Dijkstra and edge weight updates for path balancing, +and paths are mapped to virtual layers assuming a 1:1 mapping of SL2VL tables. +However, the fundamental difference is that Nue routing doesn't perform the path +calculation on the graph representing the real fabric, and instead routes +directly within the channel dependency graph. This approach allows Nue routing +to place routing restrictions (to avoid any credit-loops) in an on-demand +manner, which overcomes the problem of all other good VL-based algorithms. +Meaning, the competitors cannot control or limit the use of VLs, and might run +out of them and have to give up. On the flip side, Nue may have to use detours +for a few routes, and hence cannot really be considered "shortest-path" routing, +because it is impossible to accomplish deadlock-free, shortest-path routing with +an limited number of available virtual lanes for arbitrary network topologies. + +Note on the use of METIS library with Nue: +.br +Nue routing may has to separate the LIDs into multiple subsets, one for every +virtual layer, if multiple layers are used. Nue has two options to perform this +partitioning (not to be confused with IB partitions); the first is a fairly +simple semi-random assignment of LIDs to layers/subsets, and the second +partitioning uses the METIS library to partition the network graph into k +approximately equal sized parts. The latter approach has shown better results +in terms of path balancing and avoidance of using fallback paths, and hence +it is HIGHLY advised to install/use the METIS library with OpenSM (enforced +via `--enable-metis' configure flag when building OpenSM). For the rare case, +that METIS isn't packaged with the Linux distro, here is a link to the official +website to download and install METIS 5.1.0 manually: +.br + http://glaros.dtc.umn.edu/gkhome/metis/metis/overview +.br +OpenSM's configure script also provides options in case METIS header and library +aren't found in the default path. + +Runtime options for Nue: +.br +The behavior of Nue routing can be directly influenced by the osm.conf parameter +(which is also available as command line option): + - nue_max_num_vls: controls/limits the number of virtual lanes/layers which + Nue is allowed to use (detailed explanation in osm.conf file). +.br +Furthermore, Nue supports TRUE and FALSE settings of avoid_throttled_links, +use_ucast_cache, and qos (more on this hereafter); and lmc > 0. + +Notes on Quality of Service (QoS): +.br +The advantage of Nue is that it works with AND without QoS being enabled, i.e., +the usage of SLs/VLs for deadlock-freedom can be avoided. Here are the three +possible usage scenarios: + - neither setting `--nue_max_num_vls ' nor `-Q': Nue assumes that only 1 + virtual layer (identical to physical network; or OperVLs equal to VL0) is + usable and all paths are to be calculated within this one layer. Hence, + there is no need for special SL2VL mappings in the network and the use of + specific SLs by applications. + - setting `-Q' but not `--nue_max_num_vls ': This combination works like + the previous one, meaning the SL returned for path record requests is not + defined by Nue, since all paths are deadlock-free without using VLs. + However, any separate QoS settings may influence the SL returned to + applications. + - setting `-Q --nue_max_num_vls ' with int != 1: In this configuration, + applications have to query and obey the SL for path records as returned + by Nue because otherwise the deadlock-freedom cannot be guaranteed + anymore. Furthermore, errors in the fabric may require applications to + repath to avoid message deadlocks. Since Nue operates on virtual layer, + admins should configure the SL2VL mapping tables in an homogeneous 1:1 + manner across the entire subnet to separate the layers. +.br +As an additional note, using more VLs for Nue usually improves the overall +network throughput, so there are trade offs admins may have to consider when +configuring the subnet manager with Nue routing. + +Note on multicast: +.br +The Nue routing engine configures multicast forwarding tables by utilizing a +spanning tree calculation routed at a subnet switch suggested by OpenSM. This +spanning tree for a mcast group will try to use the least overloaded links +(w.r.t the ucast paths-per-link metric/weight) in the fabric. However, Nue +routing currently does not guarantee deadlock-freedom for the set of multicast +routes on all topologies, nor for the combination of deadlock-free unicast +routes with additional multicast routes. Assuming, for a given topology the +calculated mcast routes are dl-free, then an admin may fix the latter problem +by separating the VLs, e.g., using VL0-6 for unicast routing by specifying +`--nue_max_num_vls 7' and utilizing VL7 for multicast. + + +Routing References + +To learn more about deadlock-free routing, see the article +"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" +by William J Dally and Charles L Seitz (1985). + +To learn more about the up/down algorithm, see the article +"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks" +by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the +Universidad Politecnica de Valencia. + +To learn more about LASH and the flexibility behind it, the requirement +for layers, performance comparisons to other algorithms, see the +following articles: + +"Layered Routing in Irregular Networks", Lysne et al, IEEE +Transactions on Parallel and Distributed Systems, VOL.16, No12, +December 2005. + +"Routing for the ASI Fabric Manager", Solheim et al. IEEE +Communications Magazine, Vol.44, No.7, July 2006. + +"Layered Shortest Path (LASH) Routing in Irregular System Area +Networks", Skeie et al. IEEE Computer Society Communication +Architecture for Clusters 2002. + +To learn more about the DFSSSP and SSSP routing algorithm, +see the articles: +.br +J. Domke, T. Hoefler and W. Nagel: Deadlock-Free Oblivious Routing +for Arbitrary Topologies, In Proceedings of the 25th IEEE International +Parallel & Distributed Processing Symposium (IPDPS 2011) +.br +T. Hoefler, T. Schneider and A. Lumsdaine: Optimized Routing for +Large-Scale InfiniBand Networks, In 17th Annual IEEE Symposium on High +Performance Interconnects (HOTI 2009) + +To learn more about the Nue routing algorithm, see the article "Routing on the +Dependency Graph: A New Approach to Deadlock-Free High-Performance Routing" by +J. Domke, T. Hoefler and S. Matsuoka (published in HPDC'16). + +Modular Routing Engine + +Modular routing engine structure allows for the ease of +"plugging" new routing modules. + +Currently, only unicast callbacks are supported. Multicast +can be added later. + +One existing routing module is up-down "updn", which may be +activated with '-R updn' option (instead of old '-u'). + +General usage is: +$ opensm -R 'module-name' + +There is also a trivial routing module which is able +to load LFT tables from a file. + +Main features: + + - this will load switch LFTs and/or LID matrices (min hops tables) + - this will load switch LFTs according to the path entries introduced + in the file + - no additional checks will be performed (such as "is port connected", + etc.) + - in case when fabric LIDs were changed this will try to reconstruct + LFTs correctly if endport GUIDs are represented in the file + (in order to disable this, GUIDs may be removed from the file + or zeroed) + +The file format is compatible with output of 'ibroute' util and for +whole fabric can be generated with dump_lfts.sh script. + +To activate file based routing module, use: + + opensm -R file -U /path/to/lfts_file + +If the lfts_file is not found or is in error, the default routing +algorithm is utilized. + +The ability to dump switch lid matrices (aka min hops tables) to file and +later to load these is also supported. + +The usage is similar to unicast forwarding tables loading from a lfts +file (introduced by 'file' routing engine), but new lid matrix file +name should be specified by -M or --lid_matrix_file option. For example: + + opensm -R file -M ./opensm-lid-matrix.dump + +The dump file is named \'opensm-lid-matrix.dump\' and will be generated +in standard opensm dump directory (/var/log by default) when +OSM_LOG_ROUTING logging flag is set. + +When routing engine 'file' is activated, but the lfts file is not specified +or not cannot be open default lid matrix algorithm will be used. + +There is also a switch forwarding tables dumper which generates +a file compatible with dump_lfts.sh output. This file can be used +as input for forwarding tables loading by 'file' routing engine. +Both or one of options -U and -M can be specified together with \'-R file\'. + +.SH PER MODULE LOGGING CONFIGURATION +.PP +To enable per module logging, configure per_module_logging_file to +the per module logging config file name in the opensm options +file. To disable, configure per_module_logging_file to (null) +there. + +The per module logging config file format is a set of lines with module +name and logging level as follows: + + + + is the file name including .c + is either = , space, or tab + is the same levels as used in the coarse/overall + logging as follows: + + BIT LOG LEVEL ENABLED + ---- ----------------- + 0x01 - ERROR (error messages) + 0x02 - INFO (basic messages, low volume) + 0x04 - VERBOSE (interesting stuff, moderate volume) + 0x08 - DEBUG (diagnostic, high volume) + 0x10 - FUNCS (function entry/exit, very high volume) + 0x20 - FRAMES (dumps all SMP and GMP frames) + 0x40 - ROUTING (dump FDB routing information) + 0x80 - SYS (syslog at LOG_INFO level in addition to OpenSM logging) + +.SH FILES +.TP +.B @OPENSM_CONFIG_DIR@/@OPENSM_CONFIG_FILE@ +default OpenSM config file. + +.TP +.B @OPENSM_CONFIG_DIR@/@NODENAMEMAPFILE@ +default node name map file. See ibnetdiscover for more information on format. + +.TP +.B @OPENSM_CONFIG_DIR@/@PARTITION_CONFIG_FILE@ +default partition config file + +.TP +.B @OPENSM_CONFIG_DIR@/@QOS_POLICY_FILE@ +default QOS policy config file + +.TP +.B @OPENSM_CONFIG_DIR@/@PREFIX_ROUTES_FILE@ +default prefix routes file + +.TP +.B @OPENSM_CONFIG_DIR@/@PER_MOD_LOGGING_FILE@ +default per module logging config file + +.TP +.B @OPENSM_CONFIG_DIR@/@TORUS2QOS_CONF_FILE@ +default torus-2QoS config file + +.SH AUTHORS +.TP +Hal Rosenstock +.RI < hal@mellanox.com > +.TP +Sasha Khapyorsky +.RI < sashak@voltaire.com > +.TP +Eitan Zahavi +.RI < eitan@mellanox.co.il > +.TP +Yevgeny Kliteynik +.RI < kliteyn@mellanox.co.il > +.TP +Thomas Sodring +.RI < tsodring@simula.no > +.TP +Ira Weiny +.RI < weiny2@llnl.gov > +.TP +Dale Purdy +.RI < purdy@sgi.com > + +.SH SEE ALSO +torus-2QoS(8), torus-2QoS.conf(5). diff --git a/man/osmtest.8 b/man/osmtest.8 new file mode 100644 index 0000000..ed848b8 --- /dev/null +++ b/man/osmtest.8 @@ -0,0 +1,192 @@ +.TH OSMTEST 8 "March 9, 2013" "OpenIB" "OpenIB Management" + +.SH NAME +osmtest \- InfiniBand subnet manager and administration (SM/SA) test program + +.SH SYNOPSIS +.B osmtest +[\-f(low) ] [\-w(ait) ] [\-d(ebug) ] +[\-m(ax_lid) ] [\-g(uid)[=]] [-p(ort)] +[\-i(nventory) ] [\-s(tress)] [\-M(ulticast_Mode)] +[\-t(imeout) ] [\-l | \-\-log_file] [\-v] [\-vf ] +[\-h(elp)] + +.SH DESCRIPTION +.PP +osmtest is a test program to validate InfiniBand subnet manager and +administration (SM/SA). + +Default is to run all flows with the exception of the QoS flow. + +osmtest provides a test suite for opensm. + +osmtest has the following capabilities and testing flows: + +It creates an inventory file of all available Nodes, Ports, and PathRecords, +including all their fields. +It verifies the existing inventory, with all the object fields, and matches it +to a pre-saved one. +A Multicast Compliancy test. +An Event Forwarding test. +A Service Record registration test. +An RMPP stress test. +A Small SA Queries stress test. + +It is recommended that after installing opensm, the user should run +"osmtest -f c" to generate the inventory file, and +immediately afterwards run "osmtest -f a" to test OpenSM. + +Another recommendation for osmtest usage is to create the inventory when the +IB fabric is stable, and occasionally +run "osmtest -v" to verify that nothing has changed. + +.SH OPTIONS + +.PP +.TP +\fB\-f\fR, \fB\-\-flow\fR +This option directs osmtest to run a specific flow: + FLOW DESCRIPTION + c = create an inventory file with all nodes, ports and paths + a = run all validation tests (expecting an input inventory) + v = only validate the given inventory file + s = run service registration, deregistration, and lease test + e = run event forwarding test + f = flood the SA with queries according to the stress mode + m = multicast flow + q = QoS info: dump VLArb and SLtoVL tables + t = run trap 64/65 flow (this flow requires running of external tool) + (default is all flows except QoS) +.TP +\fB\-w\fR, \fB\-\-wait\fR +This option specifies the wait time for trap 64/65 in seconds +It is used only when running -f t - the trap 64/65 flow +(default to 10 sec) +.TP +\fB\-d\fR, \fB\-\-debug\fR +This option specifies a debug option. +These options are not normally needed. +The number following -d selects the debug +option to enable as follows: + + OPT Description + --- ----------------- + -d0 - Ignore other SM nodes + -d1 - Force single threaded dispatching + -d2 - Force log flushing after each log message + -d3 - Disable multicast support + -d4 - Use full world path record queries +.TP +\fB\-m\fR, \fB\-\-max_lid\fR +This option specifies the maximal LID number to be searched +for during inventory file build (default to 100) +.TP +\fB\-g\fR, \fB\-\-guid\fR +This option specifies the local port GUID value +with which OpenSM should bind. OpenSM may be +bound to 1 port at a time. +If GUID given is 0, OpenSM displays a list +of possible port GUIDs and waits for user input. +Without -g, OpenSM trys to use the default port. +.TP +\fB\-p\fR, \fB\-\-port\fR +This option displays a menu of possible local port GUID values +with which osmtest could bind +.TP +\fB\-i\fR, \fB\-\-inventory\fR +This option specifies the name of the inventory file. +Normally, osmtest expects to find an inventory file, +which osmtest uses to validate real-time information +received from the SA during testing +If -i is not specified, osmtest defaults to the file +\'osmtest.dat\' +See -f c option for related information +.TP +\fB\-s\fR, \fB\-\-stress\fR +This option runs the specified stress test instead +of the normal test suite +Stress test options are as follows: + + OPT Description + --- ----------------- + -s1 - Single-MAD (RMPP) response SA queries + -s2 - Multi-MAD (RMPP) response SA queries + -s3 - Multi-MAD (RMPP) Path Record SA queries + -s4 - Single-MAD (non RMPP) get Path Record SA queries + +Without -s, stress testing is not performed +.TP +\fB\-M\fR, \fB\-\-Multicast_Mode\fR +This option specify length of Multicast test: + + OPT Description + --- ----------------- + -M1 - Short Multicast Flow (default) - single mode + -M2 - Short Multicast Flow - multiple mode + -M3 - Long Multicast Flow - single mode + -M4 - Long Multicast Flow - multiple mode + +Single mode - Osmtest is tested alone, with no other +apps that interact with OpenSM MC + +Multiple mode - Could be run with other apps using MC with +OpenSM. Without -M, default flow testing is performed +.TP +\fB\-t\fR, \fB\-\-timeout\fR +This option specifies the time in milliseconds +used for transaction timeouts. +Specifying -t 0 disables timeouts. +Without -t, OpenSM defaults to a timeout value of +200 milliseconds. +.TP +\fB\-l\fR, \fB\-\-log_file\fR +This option defines the log to be the given file. +By default the log goes to stdout. +.TP +\fB\-v\fR, \fB\-\-verbose\fR +This option increases the log verbosity level. +The -v option may be specified multiple times +to further increase the verbosity level. +See the -vf option for more information about. +log verbosity. +.TP +\fB\-V\fR +This option sets the maximum verbosity level and +forces log flushing. +The -V is equivalent to '-vf 0xFF -d 2'. +See the -vf option for more information about. +log verbosity. +.TP +\fB\-vf\fR +This option sets the log verbosity level. +A flags field must follow the -D option. +A bit set/clear in the flags enables/disables a +specific log level as follows: + + BIT LOG LEVEL ENABLED + ---- ----------------- + 0x01 - ERROR (error messages) + 0x02 - INFO (basic messages, low volume) + 0x04 - VERBOSE (interesting stuff, moderate volume) + 0x08 - DEBUG (diagnostic, high volume) + 0x10 - FUNCS (function entry/exit, very high volume) + 0x20 - FRAMES (dumps all SMP and GMP frames) + 0x40 - ROUTING (dump FDB routing information) + 0x80 - currently unused. + +Without -vf, osmtest defaults to ERROR + INFO (0x3) +Specifying -vf 0 disables all messages +Specifying -vf 0xFF enables all messages (see -V) +High verbosity levels may require increasing +the transaction timeout with the -t option +.TP +\fB\-h\fR, \fB\-\-help\fR +Display this usage info then exit. + +.SH AUTHORS +.TP +Hal Rosenstock +.RI < hal.rosenstock@gmail.com > +.TP +Eitan Zahavi +.RI < eitan@mellanox.co.il > diff --git a/man/torus-2QoS.8.in b/man/torus-2QoS.8.in new file mode 100644 index 0000000..2122a00 --- /dev/null +++ b/man/torus-2QoS.8.in @@ -0,0 +1,485 @@ +.TH TORUS\-2QOS 8 "November 10, 2010" "OpenIB" "OpenIB Management" +. +.SH NAME +torus\-2QoS \- Routing engine for OpenSM subnet manager +. +.SH DESCRIPTION +. +Torus-2QoS is routing algorithm designed for large-scale 2D/3D torus fabrics. +The torus-2QoS routing engine can provide the following functionality on +a 2D/3D torus: +.br +\" roff illiteracy leads to following brain-dead list implementation +\" +.na \" otherwise line space adjustment can add spaces between dash and text +.in +2m +\[en] +'in +2m +Routing that is free of credit loops. +.in +\[en] +'in +2m +Two levels of Quality of Service (QoS), assuming switches support eight +data VLs and channel adapters support two data VLs. +.in +\[en] +'in +2m +The ability to route around a single failed switch, and/or multiple failed +links, without +.in +.in +2m +\[en] +'in +2 +introducing credit loops, or +.in +\[en] +'in +2m +changing path SL values. +.in -4m +\[en] +'in +2m +Very short run times, with good scaling properties as fabric size increases. +.ad +. +.SH UNICAST ROUTING +. +Unicast routing in torus-2QoS is based on Dimension Order Routing (DOR). +It avoids the deadlocks that would otherwise occur in a DOR-routed +torus using the concept of a dateline for each torus dimension. +It encodes into a path SL which datelines the path crosses, as follows: +\f(CR +.P +.nf + sl = 0; + for (d = 0; d < torus_dimensions; d++) { + /* path_crosses_dateline(d) returns 0 or 1 */ + sl |= path_crosses_dateline(d) << d; + } +.fi +\fR +.P +On a 3D torus this consumes three SL bits, leaving one SL bit unused. +Torus-2QoS uses this SL bit to implement two QoS levels. +.P +Torus-2QoS also makes use of the output port +dependence of switch SL2VL maps to encode into one VL bit the +information encoded in three SL bits. +It computes in which torus coordinate direction each inter-switch link +"points", and writes SL2VL maps for such ports as follows: +\f(CR +.P +.nf + for (sl = 0; sl < 16; sl++) { + /* cdir(port) computes which torus coordinate direction + * a switch port "points" in; returns 0, 1, or 2 + */ + sl2vl(iport,oport,sl) = 0x1 & (sl >> cdir(oport)); + } +.fi +\fR +.P +Thus, on a pristine 3D torus, +\fIi.e.\fR, +in the absence of failed fabric switches, +torus-2QoS consumes eight SL values (SL bits 0-2) and +two VL values (VL bit 0) per QoS level to provide deadlock-free routing. +.P +Torus-2QoS routes around link failure by "taking the long way around" any +1D ring interrupted by link failure. For example, consider the 2D 6x5 +torus below, where switches are denoted by [+a-zA-Z]: +. +. +\# define macros to start and end ascii art, assuming Roman font. +\# the start macro takes an argument which is the width in ems of +\# the ascii art, and is used to center it. +\# +.de ascii_art +.nop \f(CR +.nr indent_in_ems ((((\\n[.ll] - \\n[.i]) / \\w'm') - \\$1)/2) +.in +\\n[indent_in_ems]m +.nf +.. +.de end_ascii_art +.fi +.in +.nop \fR +.. +\# end of macro definitions +. +. +.ascii_art 36 + | | | | | | + 4 --+----+----+----+----+----+-- + | | | | | | + 3 --+----+----+----D----+----+-- + | | | | | | + 2 --+----+----I----r----+----+-- + | | | | | | + 1 --m----S----n----T----o----p-- + | | | | | | +y=0 --+----+----+----+----+----+-- + | | | | | | + + x=0 1 2 3 4 5 +.end_ascii_art +.P +For a pristine fabric the path from S to D would be S-n-T-r-D. +In the event that either link S-n or n-T has failed, torus-2QoS would +use the path S-m-p-o-T-r-D. +Note that it can do this without changing the path SL +value; once the 1D ring m-S-n-T-o-p-m has been broken by failure, path +segments using it cannot contribute to deadlock, and the x-direction +dateline (between, say, x=5 and x=0) can be ignored for path segments on +that ring. +.P +One result of this is that torus-2QoS can route around many simultaneous +link failures, as long as no 1D ring is broken into disjoint segments. +For example, if links n-T and T-o have both failed, that ring has been broken +into two disjoint segments, T and o-p-m-S-n. +Torus-2QoS checks for such +issues, reports if they are found, and refuses to route such fabrics. +.P +Note that in the case where there are multiple parallel links between a +pair of switches, torus-2QoS will allocate routes across such links +in a round-robin fashion, based on ports at the path destination switch that +are active and not used for inter-switch links. +Should a link that is one of several such parallel links fail, routes +are redistributed across the remaining links. +When the last of such a set of parallel links fails, traffic is rerouted +as described above. +.P +Handling a failed switch under DOR requires introducing into a path at +least one turn that would be otherwise "illegal", +\fIi.e.\fR, +not allowed by DOR rules. +Torus-2QoS will introduce such a turn as close as possible to the +failed switch in order to route around it. +.P +In the above example, suppose switch T has failed, and consider the path +from S to D. +Torus-2QoS will produce the path S-n-I-r-D, rather than the +S-n-T-r-D path for a pristine torus, by introducing an early turn at n. +Normal DOR rules will cause traffic arriving at switch I to be forwarded +to switch r; for traffic arriving from I due to the "early" turn at n, +this will generate an "illegal" turn at I. +.P +Torus-2QoS will also use the input port dependence of SL2VL maps to set VL +bit 1 (which would be otherwise unused) for y-x, z-x, and z-y turns, +\fIi.e.\fR, +those turns that are illegal under DOR. +This causes the first hop after any such turn to use a separate set of +VL values, and prevents deadlock in the presence of a single failed switch. +.P +For any given path, only the hops after a turn that is illegal under DOR +can contribute to a credit loop that leads to deadlock. So in the example +above with failed switch T, the location of the illegal turn at I in the +path from S to D requires that any credit loop caused by that turn must +encircle the failed switch at T. Thus the second and later hops after the +illegal turn at I (\fIi.e.\fR, hop r-D) cannot contribute to a credit loop +because they cannot be used to construct a loop encircling T. The hop I-r +uses a separate VL, so it cannot contribute to a credit loop encircling T. +.P +Extending this argument shows that in addition to being capable of routing +around a single switch failure without introducing deadlock, torus-2QoS can +also route around multiple failed switches on the condition they are +adjacent in the last dimension routed by DOR. For example, consider the +following case on a 6x6 2D torus: +. +.ascii_art 36 + | | | | | | + 5 --+----+----+----+----+----+-- + | | | | | | + 4 --+----+----+----D----+----+-- + | | | | | | + 3 --+----+----I----u----+----+-- + | | | | | | + 2 --+----+----q----R----+----+-- + | | | | | | + 1 --m----S----n----T----o----p-- + | | | | | | +y=0 --+----+----+----+----+----+-- + | | | | | | + + x=0 1 2 3 4 5 +.end_ascii_art +.P +Suppose switches T and R have failed, and consider the path from S to D. +Torus-2QoS will generate the path S-n-q-I-u-D, with an illegal turn at +switch I, and with hop I-u using a VL with bit 1 set. +.P +As a further example, consider a case that torus-2QoS cannot route without +deadlock: two failed switches adjacent in a dimension that is not the last +dimension routed by DOR; here the failed switches are O and T: +. +.ascii_art 36 + | | | | | | + 5 --+----+----+----+----+----+-- + | | | | | | + 4 --+----+----+----+----+----+-- + | | | | | | + 3 --+----+----+----+----D----+-- + | | | | | | + 2 --+----+----I----q----r----+-- + | | | | | | + 1 --m----S----n----O----T----p-- + | | | | | | +y=0 --+----+----+----+----+----+-- + | | | | | | + + x=0 1 2 3 4 5 +.end_ascii_art +.P +In a pristine fabric, torus-2QoS would generate the path from S to D as +S-n-O-T-r-D. With failed switches O and T, torus-2QoS will generate the +path S-n-I-q-r-D, with illegal turn at switch I, and with hop I-q using a +VL with bit 1 set. In contrast to the earlier examples, the second hop +after the illegal turn, q-r, can be used to construct a credit loop +encircling the failed switches. +. +.SH MULTICAST ROUTING +. +Since torus-2QoS uses all four available SL bits, and the three data VL +bits that are typically available in current switches, there is no way +to use SL/VL values to separate multicast traffic from unicast traffic. +Thus, torus-2QoS must generate multicast routing such that credit loops +cannot arise from a combination of multicast and unicast path segments. +.P +It turns out that it is possible to construct spanning trees for multicast +routing that have that property. For the 2D 6x5 torus example above, here +is the full-fabric spanning tree that torus-2QoS will construct, where "x" +is the root switch and each "+" is a non-root switch: +. +.ascii_art 36 + 4 + + + + + + + | | | | | | + 3 + + + + + + + | | | | | | + 2 +----+----+----x----+----+ + | | | | | | + 1 + + + + + + + | | | | | | +y=0 + + + + + + + + x=0 1 2 3 4 5 +.end_ascii_art +.P +For multicast traffic routed from root to tip, every turn in the above +spanning tree is a legal DOR turn. +.P +For traffic routed from tip to root, and some traffic routed through the +root, turns are not legal DOR turns. However, to construct a credit loop, +the union of multicast routing on this spanning tree with DOR unicast +routing can only provide 3 of the 4 turns needed for the loop. +.P +In addition, if none of the above spanning tree branches crosses a dateline +used for unicast credit loop avoidance on a torus, and if multicast traffic +is confined to SL 0 or SL 8 (recall that torus-2QoS uses SL bit 3 to +differentiate QoS level), then multicast traffic also cannot contribute to +the "ring" credit loops that are otherwise possible in a torus. +.P +Torus-2QoS uses these ideas to create a master spanning tree. Every +multicast group spanning tree will be constructed as a subset of the master +tree, with the same root as the master tree. +.P +Such multicast group spanning trees will in general not be optimal for +groups which are a subset of the full fabric. However, this compromise must +be made to enable support for two QoS levels on a torus while preventing +credit loops. +.P +In the presence of link or switch failures that result in a fabric for +which torus-2QoS can generate credit-loop-free unicast routes, it is also +possible to generate a master spanning tree for multicast that retains the +required properties. For example, consider that same 2D 6x5 torus, with +the link from (2,2) to (3,2) failed. Torus-2QoS will generate the following +master spanning tree: +. +.ascii_art 36 + 4 + + + + + + + | | | | | | + 3 + + + + + + + | | | | | | + 2 --+----+----+ x----+----+-- + | | | | | | + 1 + + + + + + + | | | | | | +y=0 + + + + + + + + x=0 1 2 3 4 5 +.end_ascii_art +.P +Two things are notable about this master spanning tree. First, assuming +the x dateline was between x=5 and x=0, this spanning tree has a branch +that crosses the dateline. However, just as for unicast, crossing a +dateline on a 1D ring (here, the ring for y=2) that is broken by a failure +cannot contribute to a torus credit loop. +.P +Second, this spanning tree is no longer optimal even for multicast groups +that encompass the entire fabric. That, unfortunately, is a compromise that +must be made to retain the other desirable properties of torus-2QoS routing. +.P +In the event that a single switch fails, torus-2QoS will generate a master +spanning tree that has no "extra" turns by appropriately selecting a root +switch. +In the 2D 6x5 torus example, assume now that the switch at (3,2), +\fIi.e.\fR, the root for a pristine fabric, fails. +Torus-2QoS will generate the +following master spanning tree for that case: +. +.ascii_art 36 + | + 4 + + + + + + + | | | | | | + 3 + + + + + + + | | | | | + 2 + + + + + + | | | | | + 1 +----+----x----+----+----+ + | | | | | | +y=0 + + + + + + + | + + x=0 1 2 3 4 5 +.end_ascii_art +.P +Assuming the y dateline was between y=4 and y=0, this spanning tree has +a branch that crosses a dateline. However, again this cannot contribute +to credit loops as it occurs on a 1D ring (the ring for x=3) that is +broken by a failure, as in the above example. +. +.SH TORUS TOPOLOGY DISCOVERY +. +The algorithm used by torus-2QoS to construct the torus topology from +the undirected graph representing the fabric requires that the radix of +each dimension be configured via torus-2QoS.conf. +It also requires that the torus topology be "seeded"; for a 3D torus this +requires configuring four switches that define the three coordinate +directions of the torus. +.P +Given this starting information, the algorithm is to examine the +cube formed by the eight switch locations bounded by the corners +(x,y,z) and (x+1,y+1,z+1). +Based on switches already placed into the torus topology at some of these +locations, the algorithm examines 4-loops of inter-switch links to find the +one that is consistent with a face of the cube of switch locations, +and adds its swiches to the discovered topology in the correct locations. +.P +Because the algorithm is based on examining the topology of 4-loops of links, +a torus with one or more radix-4 dimensions requires extra initial +seed configuration. +See torus-2QoS.conf(5) for details. +Torus-2QoS will detect and report when it has insufficient configuration +for a torus with radix-4 dimensions. +.P +In the event the torus is significantly degraded, \fIi.e.\fR, there are +many missing switches or links, it may happen that torus-2QoS is unable +to place into the torus some switches and/or links that were discovered +in the fabric, and will generate a warning in that case. +A similar condition occurs if torus-2QoS is misconfigured, \fIi.e.\fR, +the radix of a torus dimension as configured does not match the radix +of that torus dimension as wired, and many switches/links in the fabric +will not be placed into the torus. +. +.SH QUALITY OF SERVICE CONFIGURATION +. +OpenSM will not program switches and channel adapters with +SL2VL maps or VL arbitration configuration unless it is invoked with -Q. +Since torus-2QoS depends on such functionality for correct operation, +always invoke OpenSM with -Q when torus-2QoS is in the list of routing +engines. +.P +Any quality of service configuration method supported by OpenSM will +work with torus-2QoS, subject to the following limitations and +considerations. +.P +For all routing engines supported by OpenSM except torus-2QoS, +there is a one-to-one correspondence between QoS level and SL. +Torus-2QoS can only support two quality of service levels, so only +the high-order bit of any SL value used for unicast QoS configuration +will be honored by torus-2QoS. +.P +For multicast QoS configuration, only SL values 0 and 8 should be used +with torus-2QoS. +.P +Since SL to VL map configuration must be under the complete control of +torus-2QoS, any configuration via qos_sl2vl, qos_swe_sl2vl, +\fIetc.\fR, must and will be ignored, and a warning will be generated. +.P +For inter-switch links, Torus-2QoS uses VL values 0-3 to implement one of +its supported QoS levels, and VL values 4-7 to implement the other. For +endport links (CA, router, switch management port), Torus-2QoS uses VL +value 0 for one of its supported QoS levels and VL value 1 to implement +the other. Hard-to-diagnose application issues may arise if traffic is +not delivered fairly across each of these two VL ranges. For +inter-switch links, Torus-2QoS will detect and warn if VL arbitration is +configured unfairly across VLs in the range 0-3, and also in the range +4-7. Note that the default OpenSM VL arbitration configuration does +not meet this constraint, so all torus-2QoS users should configure VL +arbitration via qos_ca_vlarb_high, qos_swe_vlarb_high, qos_ca_vlarb_low, +qos_swe_vlarb_low, \fIetc.\fR +.P +Note that torus-2QoS maps SL values to VL values differently +for inter-switch and endport links. This is why qos_vlarb_high and +qos_vlarb_low should not be used, as using them may result in +VL arbitration for a QoS level being different across inter-switch +links vs. across endport links. +. +.SH OPERATIONAL CONSIDERATIONS +. +Any routing algorithm for a torus IB fabric must employ path +SL values to avoid credit loops. +As a result, all applications run over such fabrics must perform a +path record query to obtain the correct path SL for connection setup. +Applications that use \fBrdma_cm\fR for connection setup will automatically +meet this requirement. +.P +If a change in fabric topology causes changes in path SL values required +to route without credit loops, in general all applications would need +to repath to avoid message deadlock. Since torus-2QoS has the ability +to reroute after a single switch failure without changing path SL values, +repathing by running applications is not required when the fabric +is routed with torus-2QoS. +.P +Torus-2QoS can provide unchanging path SL values in the presence of +subnet manager failover provided that all OpenSM instances have the +same idea of dateline location. See torus-2QoS.conf(5) for details. +.P +Torus-2QoS will detect configurations of failed switches and links +that prevent routing that is free of credit loops, and will +log warnings and refuse to route. If "no_fallback" was configured in the +list of OpenSM routing engines, then no other routing engine +will attempt to route the fabric. In that case all paths that +do not transit the failed components will continue to work, and +the subset of paths that are still operational will continue to remain +free of credit loops. +OpenSM will continue to attempt to route the fabric after every sweep +interval, and after any change (such as a link up) in the fabric topology. +When the fabric components are repaired, full functionality will be +restored. +.P +In the event OpenSM was configured to allow some other engine to +route the fabric if torus-2QoS fails, then credit loops and message +deadlock are likely if torus-2QoS had previously routed +the fabric successfully. +Even if the other engine is capable of routing a torus +without credit loops, applications that built connections with +path SL values granted under torus-2QoS will likely experience +message deadlock under routing generated by a different engine, +unless they repath. +.P +To verify that a torus fabric is routed free of credit loops, +use \fBibdmchk\fR to analyze data collected via \fBibdiagnet -vlr\fR. +. +.SH FILES +.TP +.B @OPENSM_CONFIG_DIR@/@OPENSM_CONFIG_FILE@ +default OpenSM config file. +.TP +.B @OPENSM_CONFIG_DIR@/@QOS_POLICY_FILE@ +default QoS policy config file. +.TP +.B @OPENSM_CONFIG_DIR@/@TORUS2QOS_CONF_FILE@ +default torus-2QoS config file. +. +.SH SEE ALSO +. +opensm(8), torus-2QoS.conf(5), ibdiagnet(1), ibdmchk(1), rdma_cm(7). diff --git a/man/torus-2QoS.conf.5.in b/man/torus-2QoS.conf.5.in new file mode 100644 index 0000000..1ab0d2e --- /dev/null +++ b/man/torus-2QoS.conf.5.in @@ -0,0 +1,219 @@ +.TH TORUS\-2QOS.CONF 5 "January 4, 2013" "OpenIB" "OpenIB Management" +. +.SH NAME +torus\-2QoS.conf \- Torus-2QoS configuration for OpenSM subnet manager +. +.SH DESCRIPTION +. +The file +.B torus-2QoS.conf +contains configuration information that is specific to the OpenSM +routing engine torus-2QoS. +Blank lines and lines where the first non-whitespace character is +"#" are ignored. +A token is any contiguous group of non-whitespace characters. +Any tokens on a line following the recognized configuration tokens described +below are ignored. +. +.P +\fR[\fBtorus\fR|\fBmesh\fR] +\fIx_radix\fR[\fBm\fR|\fBM\fR|\fBt\fR|\fBT\fR] +\fIy_radix\fR[\fBm\fR|\fBM\fR|\fBt\fR|\fBT\fR] +\fIz_radix\fR[\fBm\fR|\fBM\fR|\fBt\fR|\fBT\fR] +.RS +Either \fBtorus\fR or \fBmesh\fR must be the first keyword in the +configuration, and sets the topology +that torus-2QoS will try to construct. +A 2D topology can be configured by specifying one of +\fIx_radix\fR, \fIy_radix\fR, or \fIz_radix\fR as 1. +An individual dimension can be configured as mesh (open) or torus +(looped) by suffixing its radix specification with one of +\fBm\fR, \fBM\fR, \fBt\fR, or \fBT\fR. Thus, "mesh 3T 4 5" and +"torus 3 4M 5M" both specify the same topology. +.P +Note that although torus-2QoS can route mesh fabrics, its ability to +route around failed components is severely compromised on such fabrics. +A failed fabric component is very likely to cause a disjoint ring; +see \fBUNICAST ROUTING\fR in torus-2QoS(8). +.RE +. +.P +\fBxp_link +\fIsw0_GUID sw1_GUID +.br +.ns +\fByp_link +\fIsw0_GUID sw1_GUID +.br +.ns +\fBzp_link +\fIsw0_GUID sw1_GUID +.br +.ns +\fBxm_link +\fIsw0_GUID sw1_GUID +.br +.ns +\fBym_link +\fIsw0_GUID sw1_GUID +.br +.ns +\fBzm_link +\fIsw0_GUID sw1_GUID +\fR +.RS +These keywords are used to seed the torus/mesh topology. +For example, "xp_link 0x2000 0x2001" specifies that a link from +the switch with node GUID 0x2000 to the switch with node GUID 0x2001 +would point in the positive x direction, +while "xm_link 0x2000 0x2001" specifies that a link from +the switch with node GUID 0x2000 to the switch with node GUID 0x2001 +would point in the negative x direction. All the link keywords for +a given seed must specify the same "from" switch. +.P +In general, it is not necessary to configure both the positive and +negative directions for a given coordinate; either is sufficient. +However, the algorithm used for topology discovery needs extra information +for torus dimensions of radix four (see \fBTOPOLOGY DISCOVERY\fR in +torus-2QoS(8)). For such cases both the positive and negative coordinate +directions must be specified. +.P +Based on the topology specified via the \fBtorus\fR/\fBmesh\fR keyword, +torus-2QoS will detect and log when it has insufficient seed configuration. +.RE +. +.P +\fBx_dateline +\fIposition +.br +.ns +\fBy_dateline +\fIposition +.br +.ns +\fBz_dateline +\fIposition +\fR +.RS +In order for torus-2QoS to provide the guarantee that path SL values +do not change under any conditions for which it can still route the fabric, +its idea of dateline position must not change relative to physical switch +locations. The dateline keywords provide the means to configure such +behavior. +.P +The dateline for a torus dimension is always between the switch with +coordinate 0 and the switch with coordinate radix-1 for that dimension. +By default, the common switch in a torus seed is taken as the origin of +the coordinate system used to describe switch location. +The \fIposition\fR parameter for a dateline keyword moves the origin +(and hence the dateline) the specified amount relative to the common +switch in a torus seed. +.RE +. +.P +\fBnext_seed +\fR +.RS +If any of the switches used to specify a seed were to fail torus-2QoS +would be unable to complete topology discovery successfully. +The \fBnext_seed\fR keyword specifies that the following link and dateline +keywords apply to a new seed specification. +.P +For maximum resiliency, no seed specification should share a switch +with any other seed specification. +Multiple seed specifications should use dateline configuration to +ensure that torus-2QoS can grant path SL values that are constant, +regardless of which seed was used to initiate topology discovery. +.RE +. +.P +\fBportgroup_max_ports +\fImax_ports +\fR +.RS +This keyword specifies the maximum number of parallel inter-switch +links, and also the maximum number of host ports per switch, that +torus-2QoS can accommodate. +The default value is 16. +Torus-2QoS will log an error message during topology discovery if this +parameter needs to be increased. +If this keyword appears multiple times, the last instance prevails. +.P +Note that the switch management port (switch port 0) gets put into +the same port group with the host ports, so if you have 16 host ports per +switch, portgroup_max_ports would need to be at least 17. +.RE +. +.P +\fBport_order +\fIp1 p2 p3 ... +\fR +.RS +This keyword specifies the order in which CA ports on a destination switch +are visited when computing routes. When the fabric contains switches connected +with multiple parallel links, routes are distributed in a round-robin fashion +across such links, and so changing the order that CA ports are visited changes +the distribution of routes across such links. This may be advantageous for +some specific traffic patterns. +.P +The default is to visit CA ports in increasing port +order on destination switches. +.P +Duplicate values in the list will be ignored. +.RE +. +.P +\fBmax_changes +\fImax +\fR +.RS +This keyword specifies the maximum number of torus changes reported. +The default value is 32. +.RE +. +.SH EXAMPLE +. +\f(RC +.nf +# Look for a 2D (since x radix is one) 4x5 torus. +torus 1 4 5 + +# y is radix-4 torus dimension, need both +# ym_link and yp_link configuration. +yp_link 0x200000 0x200005 # sw @ y=0,z=0 -> sw @ y=1,z=0 +ym_link 0x200000 0x20000f # sw @ y=0,z=0 -> sw @ y=3,z=0 + +# z is not radix-4 torus dimension, only need one of +# zm_link or zp_link configuration. +zp_link 0x200000 0x200001 # sw @ y=0,z=0 -> sw @ y=0,z=1 + +next_seed + +yp_link 0x20000b 0x200010 # sw @ y=2,z=1 -> sw @ y=3,z=1 +ym_link 0x20000b 0x200006 # sw @ y=2,z=1 -> sw @ y=1,z=1 +zp_link 0x20000b 0x20000c # sw @ y=2,z=1 -> sw @ y=2,z=2 + +y_dateline -2 # Move the dateline for this seed +z_dateline -1 # back to its original position. + +# If OpenSM failover is configured, for maximum resiliency +# one instance should run on a host attached to a switch +# from the first seed, and another instance should run +# on a host attached to a switch from the second seed. +# Both instances should use this torus-2QoS.conf to ensure +# path SL values do not change in the event of SM failover. + +# port_order defines the order on which the ports would be +# chosen for routing. +port_order 7 10 8 11 9 12 25 28 26 29 27 30 +.fi +\fR +. +.SH FILES +.TP +.B @OPENSM_CONFIG_DIR@/@TORUS2QOS_CONF_FILE@ +Default torus-2QoS config file. +. +.SH SEE ALSO +. +opensm(8), torus-2QoS(8). diff --git a/opensm.spec.in b/opensm.spec.in new file mode 100644 index 0000000..470e63f --- /dev/null +++ b/opensm.spec.in @@ -0,0 +1,165 @@ +%define RELEASE @RELEASE@ +%define rel %{?CUSTOM_RELEASE}%{!?CUSTOM_RELEASE:%RELEASE} +%if %{?_with_console_socket:1}%{!?_with_console_socket:0} +%define _enable_console_socket --enable-console-socket +%endif +%if %{?_without_console_socket:1}%{!?_without_console_socket:0} +%define _disable_console_socket --disable-console-socket +%endif + +%if %{?_with_perf_mgr:1}%{!?_with_perf_mgr:0} +%define _enable_perf_mgr --enable-perf-mgr +%endif +%if %{?_without_perf_mgr:1}%{!?_without_perf_mgr:0} +%define _disable_perf_mgr --disable-perf-mgr +%endif + +%if %{?_with_event_plugin:1}%{!?_with_event_plugin:0} +%define _enable_event_plugin --enable-default-event-plugin +%endif +%if %{?_without_event_plugin:1}%{!?_without_event_plugin:0} +%define _disable_event_plugin --disable-default-event-plugin +%endif + +%if %{?_with_node_name_map:1}%{!?_with_node_name_map:0} +%define _enable_node_name_map --with-node-name-map%{?_with_node_name_map} +%endif + +Summary: InfiniBand subnet manager and administration +Name: opensm +Version: @VERSION@ +Release: %rel%{?dist} +License: GPLv2 or BSD +Group: System Environment/Daemons +URL: https://github.com/linux-rdma/opensm +Source: https://github.com/linux-rdma/opensm/releases/download/@VERSION@/@TARBALL@ +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +BuildRequires: libibumad-devel, libtool, gcc, bison, flex +Requires: %{name}-libs = %{version}-%{release}, logrotate + +%description +OpenSM provides an implementation of an InfiniBand Subnet Manager and +Administration. Such a software entity is required to run for in order +to initialize the InfiniBand hardware (at least one per each +InfiniBand subnet). + +%package libs +Summary: Libraries from the opensm package +Group: System Environment/Libraries +Requires(post): /sbin/ldconfig +Requires(postun): /sbin/ldconfig +Obsoletes: libopensm, libosmcomp, libosmvendor + +%description libs +Shared libraries that are part of the opensm package but are also used by +other applications. If you don't need opensm itself installed, these +libraries can be installed to satisfy dependencies of other applications. + +%package devel +Summary: Development files for OpenSM +Group: System Environment/Libraries +Requires: %{name}-libs = %{version}-%{release} libibumad-devel +Obsoletes: libopensm-devel, libosmcomp-devel, libosmvendor-devel + +%description devel +Header files for OpenSM. + +%package static +Summary: Static version of the opensm libraries +Group: System Environment/Libraries +Requires: %{name}-libs = %{version}-%{release} libibumad-devel + +%description static +Static version of the opensm libraries + +%prep +%setup -q + +%build +%configure \ + %{?_enable_console_socket} \ + %{?_disable_console_socket} \ + %{?_enable_perf_mgr} \ + %{?_disable_perf_mgr} \ + %{?_enable_event_plugin} \ + %{?_disable_event_plugin} \ + %{?_enable_node_name_map} +make %{?_smp_mflags} + +%install +rm -rf $RPM_BUILD_ROOT +make DESTDIR=$RPM_BUILD_ROOT install +rm -f $RPM_BUILD_ROOT%{_libdir}/*.la +etc=$RPM_BUILD_ROOT%{_sysconfdir} +mkdir -p ${RPM_BUILD_ROOT}/var/cache/opensm +if [ -f /etc/redhat-release -o -s /etc/redhat-release ]; then + REDHAT="redhat-" +else + REDHAT="" +fi +mkdir -p $etc/{init.d,logrotate.d} $etc/@OPENSM_CONFIG_SUB_DIR@ +install -m 755 scripts/${REDHAT}opensm.init $etc/init.d/opensmd +install -D -m 644 scripts/opensm.logrotate $etc/logrotate.d/opensm +install -m 755 scripts/sldd.sh $RPM_BUILD_ROOT%{_sbindir}/sldd.sh + +%clean +rm -rf $RPM_BUILD_ROOT + +%post +if [ $1 = 1 ]; then + if [ -e /sbin/chkconfig ]; then + /sbin/chkconfig --add opensmd + elif [ -e /usr/sbin/update-rc.d ]; then + /usr/sbin/update-rc.d opensmd defaults + else + /usr/lib/lsb/install_initd /etc/init.d/opensmd + fi + if type systemctl >/dev/null 2>&1; then + systemctl --system daemon-reload + fi +else + /etc/init.d/opensmd condrestart +fi + +%preun +if [ $1 = 0 ]; then + /etc/init.d/opensmd stop + if [ -e /sbin/chkconfig ]; then + /sbin/chkconfig --del opensmd + elif [ -e /usr/sbin/update-rc.d ]; then + /usr/sbin/update-rc.d -f opensmd remove + else + /usr/lib/lsb/remove_initd /etc/init.d/opensmd + fi + rm -f /var/cache/opensm/* +fi + +%post libs -p /sbin/ldconfig +%postun libs -p /sbin/ldconfig + +%files +%defattr(-,root,root,-) +%{_sbindir}/opensm +%{_sbindir}/osmtest +%{_mandir}/man8/* +%{_mandir}/man5/* +%doc AUTHORS COPYING README doc/performance-manager-HOWTO.txt doc/QoS_management_in_OpenSM.txt doc/partition-config.txt doc/opensm-sriov.txt doc/current-routing.txt doc/opensm_release_notes-3.3.txt +%{_sysconfdir}/init.d/opensmd +%{_sbindir}/sldd.sh +%config(noreplace) %{_sysconfdir}/logrotate.d/opensm +%dir /var/cache/opensm +%dir %{_sysconfdir}/@OPENSM_CONFIG_SUB_DIR@ + +%files libs +%defattr(-,root,root,-) +%{_libdir}/*.so.* + +%files devel +%defattr(-,root,root,-) +%{_includedir}/infiniband/* +%{_libdir}/*.so + +%files static +%defattr(-,root,root,-) +%{_libdir}/*.a + diff --git a/opensm/ChangeLog b/opensm/ChangeLog new file mode 100644 index 0000000..97eb67a --- /dev/null +++ b/opensm/ChangeLog @@ -0,0 +1,115 @@ +2007-07-11 Hal Rosenstock + + * configure.in: Bump to version 2.2.1 + +2007-06-20 Hal Rosenstock + + * osm_helper.c: Add 3LeafNetworks and Xsigo to osm_get_manufacturer_str + +2007-06-15 Sasha Khapyorsky + + * osm_helper.c: Fix PortInfo:CapMask printing when CapMask is 0 + +2007-06-11 Sasha Khapyorsky + + * osm_helper.c: Remove OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED + from __osm_sm_mgr_signal_str + +2007-06-06 Sasha Khapyorsky + + * osm_helper.c: More optimally deal with manufacturer strings + +2007-06-06 Hal Rosenstock + + * osm_helper.c: Add Sun to osm_get_manufacturer_str + +2007-06-04 Hal Rosenstock + + * osm_helper.c: Add 8x to __osm_lwa_str_fixed_width + +2007-05-07 Sasha Khapyorsky + + * osm_helper.c: Remove repeated strlen() calls + +2007-04-27 Ira K. Weiny + + * osm_helper.c: In osm_dump_notice, use ib_get_producer_type_str + for printing producer type + +2007-04-26 Hal Rosenstock + + * osm_helper.c: Clarify the proper usage of + osm_get_node_type_str_fixed_width to take uint8_t rather + than uint32_t for node_type argument + +2007-04-25 Yevgeny Kliteynik + + * osm_helper.c: Fix problematic usage of sprintf() when + source and destination strings overlap. + +2007-04-24 Albert L. Chu + + * osm_helper.c: In osm_get_node_type_str_fixed_width, fix + both range limit and endian of node type check + +2007-03-29 Hal Rosenstock + + * configure.in: Bump version to 2.2.0 + +2007-03-21 Sasha Khapyorsky + + * osm_log.c: Changed to support daemon mode + +2007-03-01 Hal Rosenstock + + * configure.in: Bump version to 2.1.2 + + * osm_helper.c: Eliminate extraneous comma in __osm_disp_msg_ string + for OSM_MSG_MAD_PORT_INFO + +2007-02-26 Sasha Khapyorsky + + * osm_log.c: Minor optimization to previous change to osm_log + for also flushing on OSM_LOG_SYS + +2007-02-26 Yevgeny Kliteynik + + * osm_log.c: In osm_log, flush log on OSM_LOG_SYS (as well + as OSM_LOG_ERROR) + +2007-02-20 Hal Rosenstock + + * configure.in: Bump version to 2.1.1 + + * osm_helper.c: In osm_dbg_get_capabilities_str, only display + Capability Mask if there are capabilities present + +2007-01-22 Hal Rosenstock + + * osm_helper.c: Change DR path format from [%X] to %d, + +2007-01-08 Sasha Khapyorsky + + * osm_log.c: Add osm_log_reopen_file API + +2006-12-22 Hal Rosenstock + + * osm_helper.c: Add osm_dump_switch_info_record API + +2006-11-03 Sasha Khapyorsky + + * osm_log.c: Add osm_log_printf API + +2006-10-30 Sasha Khapyorsky + + * osm_helper.c: Fix seg fault with strings which + might not be null terminated + +2006-10-18 Yevgeny Kliteynik + + * osm_log.c: Windows porting changes + +2006-09-19 Yevgeny Kliteynik + + * osm_log.c: Windows porting changes + diff --git a/opensm/Makefile.am b/opensm/Makefile.am new file mode 100644 index 0000000..8aa6827 --- /dev/null +++ b/opensm/Makefile.am @@ -0,0 +1,121 @@ + +AM_CPPFLAGS = $(OSMV_INCLUDES) $(METIS_INCLUDES) + +AM_CFLAGS = -Wall -Wwrite-strings $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_GNU_SOURCE=1 + +if DEBUG +DBGFLAGS = -ggdb -D_DEBUG_ +else +DBGFLAGS = -g +endif + +sbin_PROGRAMS = opensm +opensm_LDFLAGS = -rdynamic +opensm_SOURCES = main.c osm_console_io.c osm_console.c osm_db_files.c \ + osm_db_pack.c osm_drop_mgr.c osm_guid_info_rcv.c \ + osm_guid_mgr.c osm_inform.c osm_lid_mgr.c osm_lin_fwd_rcv.c \ + osm_link_mgr.c osm_mcast_fwd_rcv.c \ + osm_mcast_mgr.c osm_mcast_tbl.c \ + osm_mcm_port.c osm_mesh.c osm_mtree.c osm_multicast.c osm_node.c \ + osm_node_desc_rcv.c osm_node_info_rcv.c \ + osm_opensm.c osm_pkey.c osm_pkey_mgr.c osm_pkey_rcv.c \ + osm_port.c osm_port_info_rcv.c osm_mlnx_ext_port_info_rcv.c \ + osm_remote_sm.c osm_req.c \ + osm_resp.c osm_sa.c osm_sa_class_port_info.c \ + osm_sa_informinfo.c osm_sa_lft_record.c osm_sa_mft_record.c \ + osm_sa_link_record.c osm_sa_mad_ctrl.c \ + osm_sa_mcmember_record.c osm_sa_node_record.c \ + osm_sa_path_record.c osm_sa_pkey_record.c \ + osm_sa_portinfo_record.c osm_sa_guidinfo_record.c \ + osm_sa_multipath_record.c \ + osm_sa_service_record.c osm_sa_slvl_record.c \ + osm_sa_sminfo_record.c osm_sa_vlarb_record.c \ + osm_sa_sw_info_record.c osm_service.c \ + osm_slvl_map_rcv.c osm_sm.c osm_sminfo_rcv.c \ + osm_sm_mad_ctrl.c osm_sm_state_mgr.c osm_state_mgr.c \ + osm_subnet.c osm_sw_info_rcv.c osm_switch.c \ + osm_prtn.c osm_prtn_config.c osm_qos.c osm_router.c \ + osm_trap_rcv.c osm_ucast_mgr.c osm_ucast_updn.c \ + osm_ucast_lash.c osm_ucast_file.c osm_ucast_ftree.c \ + osm_torus.c osm_ucast_dnup.c \ + osm_ucast_nue.c osm_ucast_dfsssp.c osm_vl15intf.c \ + osm_vl_arb_rcv.c st.c osm_perfmgr.c osm_perfmgr_db.c \ + osm_event_plugin.c osm_dump.c osm_ucast_cache.c \ + osm_qos_parser_y.y osm_qos_parser_l.l osm_qos_policy.c \ + osm_congestion_control.c + +AM_YFLAGS:= -d + +# we need to be able to load libraries from local build subtree before make install +# we always give precedence to local tree libs and then use the pre-installed ones. +opensm_LDADD = -L../complib -losmcomp -L../libopensm -lopensm -L../libvendor -losmvendor $(OSMV_LDADD) $(METIS_LDADD) + +opensmincludedir = $(includedir)/infiniband/opensm + +opensminclude_HEADERS = \ + $(srcdir)/../include/opensm/osm_base.h \ + $(srcdir)/../include/opensm/osm_console.h \ + $(srcdir)/../include/opensm/osm_console_io.h \ + $(srcdir)/../include/opensm/osm_db.h \ + $(srcdir)/../include/opensm/osm_db_pack.h \ + $(srcdir)/../include/opensm/osm_event_plugin.h \ + $(srcdir)/../include/opensm/osm_errors.h \ + $(srcdir)/../include/opensm/osm_file_ids.h \ + $(srcdir)/../include/opensm/osm_guid.h \ + $(srcdir)/../include/opensm/osm_helper.h \ + $(srcdir)/../include/opensm/osm_inform.h \ + $(srcdir)/../include/opensm/osm_ucast_lash.h \ + $(srcdir)/../include/opensm/osm_lid_mgr.h \ + $(srcdir)/../include/opensm/osm_log.h \ + $(srcdir)/../include/opensm/osm_mad_pool.h \ + $(srcdir)/../include/opensm/osm_madw.h \ + $(srcdir)/../include/opensm/osm_mcast_tbl.h \ + $(srcdir)/../include/opensm/osm_mcm_port.h \ + $(srcdir)/../include/opensm/osm_mesh.h \ + $(srcdir)/../include/opensm/osm_mtree.h \ + $(srcdir)/../include/opensm/osm_multicast.h \ + $(srcdir)/../include/opensm/osm_msgdef.h \ + $(srcdir)/../include/opensm/osm_node.h \ + $(srcdir)/../include/opensm/osm_opensm.h \ + $(srcdir)/../include/opensm/osm_partition.h \ + $(srcdir)/../include/opensm/osm_path.h \ + $(srcdir)/../include/opensm/osm_perfmgr.h \ + $(srcdir)/../include/opensm/osm_perfmgr_db.h \ + $(srcdir)/../include/opensm/osm_pkey.h \ + $(srcdir)/../include/opensm/osm_port.h \ + $(srcdir)/../include/opensm/osm_port_profile.h \ + $(srcdir)/../include/opensm/osm_prefix_route.h \ + $(srcdir)/../include/opensm/osm_qos_policy.h \ + $(srcdir)/../include/opensm/osm_congestion_control.h \ + $(srcdir)/../include/opensm/osm_remote_sm.h \ + $(srcdir)/../include/opensm/osm_router.h \ + $(srcdir)/../include/opensm/osm_sa.h \ + $(srcdir)/../include/opensm/osm_sa_mad_ctrl.h \ + $(srcdir)/../include/opensm/osm_service.h \ + $(srcdir)/../include/opensm/osm_sm.h \ + $(srcdir)/../include/opensm/osm_sm_mad_ctrl.h \ + $(srcdir)/../include/opensm/st.h \ + $(srcdir)/../include/opensm/osm_stats.h \ + $(srcdir)/../include/opensm/osm_subnet.h \ + $(srcdir)/../include/opensm/osm_switch.h \ + $(srcdir)/../include/opensm/osm_ucast_mgr.h \ + $(srcdir)/../include/opensm/osm_mcast_mgr.h \ + $(srcdir)/../include/opensm/osm_ucast_cache.h \ + $(srcdir)/../include/opensm/osm_vl15intf.h \ + $(top_builddir)/include/opensm/osm_version.h \ + $(top_builddir)/include/opensm/osm_config.h + +BUILT_SOURCES = osm_version osm_qos_parser_y.h +osm_version: + if [ -x $(top_srcdir)/gen_ver.sh ] ; then \ + ver_file=$(top_builddir)/include/opensm/osm_version.h ; \ + osm_ver=`cat $$ver_file | sed -ne '/#define OSM_VERSION /s/^.*\"OpenSM \(.*\)\"$$/\1/p'` ; \ + ver=`$(top_srcdir)/gen_ver.sh $(PACKAGE)` ; \ + if [ $$ver != $$osm_ver ] ; then \ + cat $$ver_file | sed -e '/#define OSM_VERSION /s/\"OpenSM .*\"/\"OpenSM '$$ver'\"/' > tmp_new_version ; \ + cat tmp_new_version > $$ver_file && rm -f tmp_new_version ; \ + fi ; \ + fi + +# files distributed as part of the srcdir +EXTRA_DIST = $(srcdir)/ChangeLog diff --git a/opensm/main.c b/opensm/main.c new file mode 100644 index 0000000..da849b5 --- /dev/null +++ b/opensm/main.c @@ -0,0 +1,1305 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Command line interface for opensm. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_MAIN_C +#include +#include +#include +#include +#include +#include + +volatile unsigned int osm_exit_flag = 0; + +static volatile unsigned int osm_hup_flag = 0; +static volatile unsigned int osm_usr1_flag = 0; +static char *pidfile; + +#define MAX_LOCAL_IBPORTS 64 +#define INVALID_GUID (0xFFFFFFFFFFFFFFFFULL) + +static void mark_exit_flag(int signum) +{ + if (!osm_exit_flag) + printf("OpenSM: Got signal %d - exiting...\n", signum); + osm_exit_flag = 1; +} + +static void mark_hup_flag(int signum) +{ + osm_hup_flag = 1; +} + +static void mark_usr1_flag(int signum) +{ + osm_usr1_flag = 1; +} + +static sigset_t saved_sigset; + +static void block_signals() +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGINT); + sigaddset(&set, SIGTERM); + sigaddset(&set, SIGHUP); +#ifndef HAVE_OLD_LINUX_THREADS + sigaddset(&set, SIGUSR1); +#endif + pthread_sigmask(SIG_SETMASK, &set, &saved_sigset); +} + +static void setup_signals() +{ + struct sigaction act; + + sigemptyset(&act.sa_mask); + act.sa_handler = mark_exit_flag; + act.sa_flags = 0; + sigaction(SIGINT, &act, NULL); + sigaction(SIGTERM, &act, NULL); + act.sa_handler = mark_hup_flag; + sigaction(SIGHUP, &act, NULL); + sigaction(SIGCONT, &act, NULL); +#ifndef HAVE_OLD_LINUX_THREADS + act.sa_handler = mark_usr1_flag; + sigaction(SIGUSR1, &act, NULL); +#endif + pthread_sigmask(SIG_SETMASK, &saved_sigset, NULL); +} + +static void show_usage(void) +{ + printf("\n------- OpenSM - Usage and options ----------------------\n"); + printf("Usage: opensm [options]\n"); + printf("Options:\n"); + printf("--version\n Prints OpenSM version and exits.\n\n"); + printf("--config, -F \n" + " The name of the OpenSM config file. When not specified\n" + " " OSM_DEFAULT_CONFIG_FILE + " will be used (if exists).\n\n"); + printf("--create-config, -c \n" + " OpenSM will dump its configuration to the specified file and exit.\n" + " This is a way to generate OpenSM configuration file template.\n\n"); + printf("--guid, -g \n" + " This option specifies the local port GUID value\n" + " with which OpenSM should bind. OpenSM may be\n" + " bound to 1 port at a time.\n" + " If GUID given is 0, OpenSM displays a list\n" + " of possible port GUIDs and waits for user input.\n" + " Without -g, OpenSM tries to use the default port.\n\n"); + printf("--lmc, -l \n" + " This option specifies the subnet's LMC value.\n" + " The number of LIDs assigned to each port is 2^LMC.\n" + " The LMC value must be in the range 0-7.\n" + " LMC values > 0 allow multiple paths between ports.\n" + " LMC values > 0 should only be used if the subnet\n" + " topology actually provides multiple paths between\n" + " ports, i.e. multiple interconnects between switches.\n" + " Without -l, OpenSM defaults to LMC = 0, which allows\n" + " one path between any two ports.\n\n"); + printf("--priority, -p \n" + " This option specifies the SM's PRIORITY.\n" + " This will effect the handover cases, where master\n" + " is chosen by priority and GUID. Range goes\n" + " from 0 (lowest priority) to 15 (highest).\n\n"); + printf("--subnet_prefix \n" + " Set the subnet prefix to something other than the\n" + " default value of 0xfe80000000000000\n\n"); + printf("--smkey, -k \n" + " This option specifies the SM's SM_Key (64 bits).\n" + " This will effect SM authentication.\n" + " Note that OpenSM version 3.2.1 and below used the\n" + " default value '1' in a host byte order, it is fixed\n" + " now but you may need this option to interoperate\n" + " with old OpenSM running on a little endian machine.\n\n"); + printf("--reassign_lids, -r\n" + " This option causes OpenSM to reassign LIDs to all\n" + " end nodes. Specifying -r on a running subnet\n" + " may disrupt subnet traffic.\n" + " Without -r, OpenSM attempts to preserve existing\n" + " LID assignments resolving multiple use of same LID.\n\n"); + printf("--routing_engine, -R \n" + " This option chooses routing engine(s) to use instead of default\n" + " Min Hop algorithm. Multiple routing engines can be specified\n" + " separated by commas so that specific ordering of routing\n" + " algorithms will be tried if earlier routing engines fail.\n" + " If all configured routing engines fail, OpenSM will always\n" + " attempt to route with Min Hop unless 'no_fallback' is\n" + " included in the list of routing engines.\n" + " Supported engines: updn, dnup, file, ftree, lash, dor,\n" + " torus-2QoS, nue, dfsssp, sssp\n\n"); + printf("--do_mesh_analysis\n" + " This option enables additional analysis for the lash\n" + " routing engine to precondition switch port assignments\n" + " in regular cartesian meshes which may reduce the number\n" + " of SLs required to give a deadlock free routing\n\n"); + printf("--lash_start_vl \n" + " Sets the starting VL to use for the lash routing algorithm.\n" + " Defaults to 0.\n\n"); + printf("--sm_sl \n" + " Sets the SL to use to communicate with the SM/SA. Defaults to 0.\n\n"); + printf("--nue_max_num_vls \n" + " Sets the maximum number of VLs to be used by Nue routing.\n" + " Defaults to 1 to enforce deadlock-freedom even if QoS is not\n" + " enabled. Set to 0 if Nue should automatically determine and\n" + " choose maximum supported by the fabric, or any integer >= 1.\n\n"); + printf("--connect_roots, -z\n" + " This option enforces routing engines (up/down and \n" + " fat-tree) to make connectivity between root switches\n" + " and in this way be IBA compliant. In many cases,\n" + " this can violate \"pure\" deadlock free algorithm, so\n" + " use it carefully.\n\n"); + printf("--ucast_cache, -A\n" + " This option enables unicast routing cache to prevent\n" + " routing recalculation (which is a heavy task in a\n" + " large cluster) when there was no topology change\n" + " detected during the heavy sweep, or when the topology\n" + " change does not require new routing calculation,\n" + " e.g. in case of host reboot.\n" + " This option becomes very handy when the cluster size\n" + " is thousands of nodes.\n\n"); + printf("--lid_matrix_file, -M \n" + " This option specifies the name of the lid matrix dump file\n" + " from where switch lid matrices (min hops tables will be\n" + " loaded.\n\n"); + printf("--lfts_file, -U \n" + " This option specifies the name of the LFTs file\n" + " from where switch forwarding tables will be loaded when using \"file\"\n" + " routing engine.\n\n"); + printf("--sadb_file, -S \n" + " This option specifies the name of the SA DB dump file\n" + " from where SA database will be loaded.\n\n"); + printf("--root_guid_file, -a \n" + " Set the root nodes for the Up/Down or Fat-Tree routing\n" + " algorithm to the guids provided in the given file (one\n" + " to a line)\n" "\n"); + printf("--cn_guid_file, -u \n" + " Set the compute nodes for the Fat-Tree or DFSSSP/SSSP routing algorithms\n" + " to the port GUIDs provided in the given file (one to a line)\n\n"); + printf("--io_guid_file, -G \n" + " Set the I/O nodes for the Fat-Tree or DFSSSP/SSSP routing algorithms\n" + " to the port GUIDs provided in the given file (one to a line)\n\n"); + printf("--port-shifting\n" + " Attempt to shift port routes around to remove alignment problems\n" + " in routing tables\n\n"); + printf("--scatter-ports \n" + " Randomize best port chosen for a route\n" + " Assign ports in a random order instead of round-robin\n" + " If zero disable (default), otherwise use the value as a random seed\n\n"); + printf("--max_reverse_hops, -H \n" + " Set the max number of hops the wrong way around\n" + " an I/O node is allowed to do (connectivity for I/O nodes on top switches)\n\n"); + printf("--ids_guid_file, -m \n" + " Name of the map file with set of the IDs which will be used\n" + " by Up/Down routing algorithm instead of node GUIDs\n" + " (format: per line)\n\n"); + printf("--guid_routing_order_file, -X \n" + " Set the order port guids will be routed for the MinHop\n" + " and Up/Down routing algorithms to the guids provided in the\n" + " given file (one to a line)\n\n"); + printf("--torus_config \n" + " This option defines the file name for the extra configuration\n" + " info needed for the torus-2QoS routing engine. The default\n" + " name is \'"OSM_DEFAULT_TORUS_CONF_FILE"\'\n\n"); + printf("--once, -o\n" + " This option causes OpenSM to configure the subnet\n" + " once, then exit. Ports remain in the ACTIVE state.\n\n"); + printf("--sweep, -s \n" + " This option specifies the number of seconds between\n" + " subnet sweeps. Specifying -s 0 disables sweeping.\n" + " Without -s, OpenSM defaults to a sweep interval of\n" + " 10 seconds.\n\n"); + printf("--timeout, -t \n" + " This option specifies the time in milliseconds\n" + " used for transaction timeouts.\n" + " Timeout values should be > 0.\n" + " Without -t, OpenSM defaults to a timeout value of\n" + " 200 milliseconds.\n\n"); + printf("--retries \n" + " This option specifies the number of retries used\n" + " for transactions.\n" + " Without --retries, OpenSM defaults to %u retries\n" + " for transactions.\n\n", OSM_DEFAULT_RETRY_COUNT); + printf("--maxsmps, -n \n" + " This option specifies the number of VL15 SMP MADs\n" + " allowed on the wire at any one time.\n" + " Specifying --maxsmps 0 allows unlimited outstanding\n" + " SMPs.\n" + " Without --maxsmps, OpenSM defaults to a maximum of\n" + " 4 outstanding SMPs.\n\n"); + printf("--console, -q [off|local" +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + "|loopback" +#endif +#ifdef ENABLE_OSM_CONSOLE_SOCKET + "|socket" +#endif + "]\n This option activates the OpenSM console (default off).\n\n"); +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + printf("--console-port, -C \n" + " Specify an alternate telnet port for the console (default %d).\n\n", + OSM_DEFAULT_CONSOLE_PORT); +#endif + printf("--ignore_guids, -i \n" + " This option provides the means to define a set of ports\n" + " (by guid) that will be ignored by the link load\n" + " equalization algorithm.\n\n"); + printf("--hop_weights_file, -w \n" + " This option provides the means to define a weighting\n" + " factor per port for customizing the least weight\n" + " hops for the routing.\n\n"); + printf("--port_search_ordering_file, -O \n" + " This option provides the means to define a mapping\n" + " between ports and dimension (Order) for controlling\n" + " Dimension Order Routing (DOR).\n" + " Moreover this option provides the means to define non\n" + " default routing port order.\n\n"); + printf("--dimn_ports_file, -O (DEPRECATED)\n" + " Use --port_search_ordering_file instead.\n" + " This option provides the means to define a mapping\n" + " between ports and dimension (Order) for controlling\n" + " Dimension Order Routing (DOR).\n\n"); + printf("--honor_guid2lid, -x\n" + " This option forces OpenSM to honor the guid2lid file,\n" + " when it comes out of Standby state, if such file exists\n" + " under OSM_CACHE_DIR, and is valid. By default, this is FALSE.\n\n"); + printf("--dump_files_dir " + " The directory to hold the file dumps.\n"); + printf("--log_file, -f \n" + " This option defines the log to be the given file.\n" + " By default, the log goes to /var/log/opensm.log.\n" + " For the log to go to standard output use -f stdout.\n\n"); + printf("--log_limit, -L \n" + " This option defines maximal log file size in MB. When\n" + " specified the log file will be truncated upon reaching\n" + " this limit.\n\n"); + printf("--erase_log_file, -e\n" + " This option will cause deletion of the log file\n" + " (if it previously exists). By default, the log file\n" + " is accumulative.\n\n"); + printf("--Pconfig, -P \n" + " This option defines the optional partition configuration file.\n" + " The default name is \'" + OSM_DEFAULT_PARTITION_CONFIG_FILE "\'.\n\n"); + printf("--no_part_enforce, -N (DEPRECATED)\n" + " Use --part_enforce instead.\n" + " This option disables partition enforcement on switch external ports.\n\n"); + printf("--part_enforce, -Z [both, in, out, off]\n" + " This option indicates the partition enforcement type (for switches)\n" + " Enforcement type can be outbound only (out), inbound only (in), both or\n" + " disabled (off). Default is both.\n\n"); + printf("--allow_both_pkeys, -W\n" + " This option indicates whether both full and limited membership\n" + " on the same partition can be configured in the PKeyTable.\n" + " Default is not to allow both pkeys.\n\n"); + printf("--qos, -Q\n" " This option enables QoS setup.\n\n"); + printf("--qos_policy_file, -Y \n" + " This option defines the optional QoS policy file.\n" + " The default name is \'" OSM_DEFAULT_QOS_POLICY_FILE + "\'.\n\n"); + printf("--congestion_control\n" + " (EXPERIMENTAL) This option enables congestion control configuration.\n\n"); + printf("--cc_key \n" + " (EXPERIMENTAL) This option configures the CCkey to use when configuring\n" + " congestion control.\n\n"); + printf("--stay_on_fatal, -y\n" + " This option will cause SM not to exit on fatal initialization\n" + " issues: if SM discovers duplicated guids or 12x link with\n" + " lane reversal badly configured.\n" + " By default, the SM will exit on these errors.\n\n"); + printf("--daemon, -B\n" + " Run in daemon mode - OpenSM will run in the background.\n\n"); + printf("--inactive, -I\n" + " Start SM in inactive rather than normal init SM state.\n\n"); +#ifdef ENABLE_OSM_PERF_MGR + printf("--perfmgr\n" " Start with PerfMgr enabled.\n\n"); + printf("--perfmgr_sweep_time_s \n" + " PerfMgr sweep interval in seconds.\n\n"); +#endif + printf("--prefix_routes_file \n" + " This option specifies the prefix routes file.\n" + " Prefix routes control how the SA responds to path record\n" + " queries for off-subnet DGIDs. Default file is:\n" + " " OSM_DEFAULT_PREFIX_ROUTES_FILE "\n\n"); + printf("--consolidate_ipv6_snm_req\n" + " Use shared MLID for IPv6 Solicited Node Multicast groups\n" + " per MGID scope and P_Key.\n\n"); + printf("--guid_routing_order_no_scatter\n" + " Don't use scatter for ports defined in guid_routing_order file\n\n"); + printf("--log_prefix \n" + " Prefix to syslog messages from OpenSM.\n\n"); + printf("--verbose, -v\n" + " This option increases the log verbosity level.\n" + " The -v option may be specified multiple times\n" + " to further increase the verbosity level.\n" + " See the -D option for more information about\n" + " log verbosity.\n\n"); + printf("--V, -V\n" + " This option sets the maximum verbosity level and\n" + " forces log flushing.\n" + " The -V is equivalent to '-D 0xFF -d 2'.\n" + " See the -D option for more information about\n" + " log verbosity.\n\n"); + printf("--D, -D \n" + " This option sets the log verbosity level.\n" + " A flags field must follow the -D option.\n" + " A bit set/clear in the flags enables/disables a\n" + " specific log level as follows:\n" + " BIT LOG LEVEL ENABLED\n" + " ---- -----------------\n" + " 0x01 - ERROR (error messages)\n" + " 0x02 - INFO (basic messages, low volume)\n" + " 0x04 - VERBOSE (interesting stuff, moderate volume)\n" + " 0x08 - DEBUG (diagnostic, high volume)\n" + " 0x10 - FUNCS (function entry/exit, very high volume)\n" + " 0x20 - FRAMES (dumps all SMP and GMP frames)\n" + " 0x40 - ROUTING (dump FDB routing information)\n" + " 0x80 - currently unused.\n" + " Without -D, OpenSM defaults to ERROR + INFO (0x3).\n" + " Specifying -D 0 disables all messages.\n" + " Specifying -D 0xFF enables all messages (see -V).\n" + " High verbosity levels may require increasing\n" + " the transaction timeout with the -t option.\n\n"); + printf("--debug, -d \n" + " This option specifies a debug option.\n" + " These options are not normally needed.\n" + " The number following -d selects the debug\n" + " option to enable as follows:\n" + " OPT Description\n" + " --- -----------------\n" + " -d0 - Ignore other SM nodes\n" + " -d1 - Force single threaded dispatching\n" + " -d2 - Force log flushing after each log message\n" + " -d3 - Disable multicast support\n" + " -d10 - Put OpenSM in testability mode\n" + " Without -d, no debug options are enabled\n\n"); + printf("--help, -h, -?\n" + " Display this usage info then exit.\n\n"); + fflush(stdout); + exit(2); +} + +static ib_net64_t get_port_guid(IN osm_opensm_t * p_osm, uint64_t port_guid) +{ + ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS]; + uint32_t num_ports = MAX_LOCAL_IBPORTS; + uint32_t i, choice = 0; + ib_api_status_t status; + + for (i = 0; i < num_ports; i++) { + attr_array[i].num_pkeys = 0; + attr_array[i].p_pkey_table = NULL; + attr_array[i].num_gids = 0; + attr_array[i].p_gid_table = NULL; + } + + /* Call the transport layer for a list of local port GUID values */ + status = osm_vendor_get_all_port_attr(p_osm->p_vendor, attr_array, + &num_ports); + if (status != IB_SUCCESS) { + printf("\nError from osm_vendor_get_all_port_attr (%x)\n", + status); + return 0; + } + + /* if num_ports is 0 - return 0 */ + if (num_ports == 0) { + printf("\nNo local ports detected!\n"); + return 0; + } + /* If num_ports is 1, then there is only one possible port to use. + * Use it. */ + if (num_ports == 1) { + printf("Using default GUID 0x%" PRIx64 "\n", + cl_hton64(attr_array[0].port_guid)); + return attr_array[0].port_guid; + } + /* If port_guid is 0 - use the first connected port */ + if (port_guid == 0) { + for (i = 0; i < num_ports; i++) + if (attr_array[i].link_state > IB_LINK_DOWN) + break; + if (i == num_ports) + i = 0; + printf("Using default GUID 0x%" PRIx64 "\n", + cl_hton64(attr_array[i].port_guid)); + return attr_array[i].port_guid; + } + + if (p_osm->subn.opt.daemon) + return 0; + + /* More than one possible port - list all ports and let the user + * to choose. */ + while (1) { + printf("\nChoose a local port number with which to bind:\n\n"); + for (i = 0; i < num_ports; i++) + /* Print the index + 1 since by convention, port + * numbers start with 1 on host channel adapters. */ + printf("\t%u: GUID 0x%" PRIx64 ", lid %u, state %s\n", + i + 1, cl_ntoh64(attr_array[i].port_guid), + attr_array[i].lid, + ib_get_port_state_str(attr_array[i].link_state)); + printf("\n\t0: Exit\n"); + printf("\nEnter choice (0-%u): ", i); + fflush(stdout); + if (scanf("%u", &choice) <= 0) { + char junk[128]; + if (scanf("%127s", junk) <= 0) + printf("\nError: Cannot scan!\n"); + } else if (choice == 0) + return 0; + else if (choice <= num_ports) + break; + printf("\nError: Lame choice! Please try again.\n"); + } + choice--; + printf("Choice guid=0x%" PRIx64 "\n", + cl_ntoh64(attr_array[choice].port_guid)); + return attr_array[choice].port_guid; +} + +static void remove_pidfile(void) +{ + if (pidfile) + unlink(pidfile); +} + +static int daemonize(osm_opensm_t * osm) +{ + pid_t pid; + int fd; + FILE *f; + + fd = open("/dev/null", O_WRONLY); + if (fd < 0) { + perror("open"); + return -1; + } + + if ((pid = fork()) < 0) { + perror("fork"); + exit(-1); + } else if (pid > 0) + exit(0); + + setsid(); + + if ((pid = fork()) < 0) { + perror("fork"); + exit(-1); + } else if (pid > 0) + exit(0); + + if (pidfile) { + remove_pidfile(); + f = fopen(pidfile, "w"); + if (f) { + fprintf(f, "%d\n", getpid()); + fclose(f); + } else { + perror("fopen"); + exit(1); + } + } + + close(0); + close(1); + close(2); + + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + + close(fd); + + return 0; +} + +int osm_manager_loop(osm_subn_opt_t * p_opt, osm_opensm_t * p_osm) +{ + int console_init_flag = 0; + + if (is_console_enabled(p_opt)) { + if (!osm_console_init(p_opt, &p_osm->console, &p_osm->log)) + console_init_flag = 1; + } + + /* + Sit here forever - dwell or do console i/o & cmds + */ + while (!osm_exit_flag) { + if (console_init_flag) { + if (osm_console(p_osm)) + console_init_flag = 0; + } else + cl_thread_suspend(10000); + + if (osm_usr1_flag) { + osm_usr1_flag = 0; + osm_log_reopen_file(&(p_osm->log)); + } + if (osm_hup_flag) { + osm_hup_flag = 0; + /* a HUP signal should only start a new heavy sweep */ + p_osm->subn.force_heavy_sweep = TRUE; + osm_opensm_sweep(p_osm); + } + } + if (is_console_enabled(p_opt)) + osm_console_exit(&p_osm->console, &p_osm->log); + return 0; +} + +#define SET_STR_OPT(opt, val) do { \ + opt = val ? strdup(val) : NULL ; \ +} while (0) + +int main(int argc, char *argv[]) +{ + osm_opensm_t osm; + osm_subn_opt_t opt; + ib_net64_t sm_key = 0; + ib_api_status_t status; + uint32_t temp, dbg_lvl; + boolean_t run_once_flag = FALSE; + int32_t vendor_debug = 0; + int next_option; + char *conf_template = NULL; + const char *config_file = NULL; + uint32_t val; + const char *const short_option = + "F:c:i:w:O:f:ed:D:g:l:L:s:t:a:u:m:X:R:zM:U:S:P:Y:ANZ:WBIQvVhoryxp:n:q:k:C:G:H:"; + + /* + In the array below, the 2nd parameter specifies the number + of arguments as follows: + 0: no arguments + 1: argument + 2: optional + */ + const struct option long_option[] = { + {"version", 0, NULL, 12}, + {"config", 1, NULL, 'F'}, + {"create-config", 1, NULL, 'c'}, + {"debug", 1, NULL, 'd'}, + {"guid", 1, NULL, 'g'}, + {"ignore_guids", 1, NULL, 'i'}, + {"hop_weights_file", 1, NULL, 'w'}, + {"dimn_ports_file", 1, NULL, 'O'}, + {"port_search_ordering_file", 1, NULL, 'O'}, + {"lmc", 1, NULL, 'l'}, + {"sweep", 1, NULL, 's'}, + {"timeout", 1, NULL, 't'}, + {"verbose", 0, NULL, 'v'}, + {"D", 1, NULL, 'D'}, + {"log_file", 1, NULL, 'f'}, + {"log_limit", 1, NULL, 'L'}, + {"erase_log_file", 0, NULL, 'e'}, + {"Pconfig", 1, NULL, 'P'}, + {"no_part_enforce", 0, NULL, 'N'}, + {"part_enforce", 1, NULL, 'Z'}, + {"allow_both_pkeys", 0, NULL, 'W'}, + {"qos", 0, NULL, 'Q'}, + {"qos_policy_file", 1, NULL, 'Y'}, + {"congestion_control", 0, NULL, 128}, + {"cc_key", 1, NULL, 129}, + {"maxsmps", 1, NULL, 'n'}, + {"console", 1, NULL, 'q'}, + {"V", 0, NULL, 'V'}, + {"help", 0, NULL, 'h'}, + {"once", 0, NULL, 'o'}, + {"reassign_lids", 0, NULL, 'r'}, + {"priority", 1, NULL, 'p'}, + {"subnet_prefix", 1, NULL, 16}, + {"smkey", 1, NULL, 'k'}, + {"routing_engine", 1, NULL, 'R'}, + {"ucast_cache", 0, NULL, 'A'}, + {"connect_roots", 0, NULL, 'z'}, + {"lid_matrix_file", 1, NULL, 'M'}, + {"lfts_file", 1, NULL, 'U'}, + {"sadb_file", 1, NULL, 'S'}, + {"root_guid_file", 1, NULL, 'a'}, + {"cn_guid_file", 1, NULL, 'u'}, + {"io_guid_file", 1, NULL, 'G'}, + {"port-shifting", 0, NULL, 11}, + {"scatter-ports", 1, NULL, 14}, + {"max_reverse_hops", 1, NULL, 'H'}, + {"ids_guid_file", 1, NULL, 'm'}, + {"guid_routing_order_file", 1, NULL, 'X'}, + {"stay_on_fatal", 0, NULL, 'y'}, + {"honor_guid2lid", 0, NULL, 'x'}, +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + {"console-port", 1, NULL, 'C'}, +#endif + {"daemon", 0, NULL, 'B'}, + {"pidfile", 1, NULL, 'J'}, + {"inactive", 0, NULL, 'I'}, +#ifdef ENABLE_OSM_PERF_MGR + {"perfmgr", 0, NULL, 1}, + {"perfmgr_sweep_time_s", 1, NULL, 2}, +#endif + {"prefix_routes_file", 1, NULL, 3}, + {"consolidate_ipv6_snm_req", 0, NULL, 4}, + {"do_mesh_analysis", 0, NULL, 5}, + {"lash_start_vl", 1, NULL, 6}, + {"sm_sl", 1, NULL, 7}, + {"retries", 1, NULL, 8}, + {"log_prefix", 1, NULL, 9}, + {"torus_config", 1, NULL, 10}, + {"guid_routing_order_no_scatter", 0, NULL, 13}, + {"nue_max_num_vls", 1, NULL, 15}, + {"dump_files_dir", 1, NULL, 17}, + {NULL, 0, NULL, 0} /* Required at the end of the array */ + }; + + /* force stdout to be line-buffered */ + setvbuf(stdout, NULL, _IOLBF, BUFSIZ); + + /* Make sure that the opensm and complib were compiled using + same modes (debug/free) */ + if (osm_is_debug() != cl_is_debug()) { + fprintf(stderr, + "ERROR: OpenSM and Complib were compiled using different modes\n"); + fprintf(stderr, "ERROR: OpenSM debug:%d Complib debug:%d \n", + osm_is_debug(), cl_is_debug()); + exit(1); + } + + printf("-------------------------------------------------\n"); + printf("%s\n", OSM_VERSION); + + do { + next_option = getopt_long_only(argc, argv, short_option, + long_option, NULL); + switch (next_option) { + case 'F': + config_file = optarg; + printf("Config file is `%s`:\n", config_file); + break; + default: + break; + } + } while (next_option != -1); + + optind = 0; /* reset command line */ + + if (!config_file) + config_file = OSM_DEFAULT_CONFIG_FILE; + + osm_subn_set_default_opt(&opt); + + if (osm_subn_parse_conf_file(config_file, &opt) < 0) + printf("\nFail to parse config file \'%s\'\n", config_file); + + printf("Command Line Arguments:\n"); + do { + next_option = getopt_long_only(argc, argv, short_option, + long_option, NULL); + switch (next_option) { + case 12: /* --version - already printed above */ + exit(0); + break; + case 'F': + break; + case 'c': + conf_template = optarg; + printf(" Creating config file template \'%s\'.\n", + conf_template); + break; + case 'o': + /* + Run once option. + */ + run_once_flag = TRUE; + printf(" Run Once\n"); + break; + + case 'r': + /* + Reassign LIDs subnet option. + */ + opt.reassign_lids = TRUE; + printf(" Reassign LIDs\n"); + break; + + case 'i': + /* + Specifies ignore guids file. + */ + SET_STR_OPT(opt.port_prof_ignore_file, optarg); + printf(" Ignore Guids File = %s\n", + opt.port_prof_ignore_file); + break; + + case 'w': + SET_STR_OPT(opt.hop_weights_file, optarg); + printf(" Hop Weights File = %s\n", + opt.hop_weights_file); + break; + + case 'O': + SET_STR_OPT(opt.port_search_ordering_file, optarg); + printf(" Port Search Ordering/Dimension Ports File = %s\n", + opt.port_search_ordering_file); + break; + + case 'g': + /* + Specifies port guid with which to bind. + */ + opt.guid = cl_hton64(strtoull(optarg, NULL, 16)); + if (!opt.guid) + /* If guid is 0 - need to display the + * guid list */ + opt.guid = INVALID_GUID; + else + printf(" Guid <0x%" PRIx64 ">\n", + cl_hton64(opt.guid)); + break; + + case 's': + val = strtol(optarg, NULL, 0); + /* Check that the number is not too large */ + if (((uint32_t) (val * 1000000)) / 1000000 != val) + fprintf(stderr, + "ERROR: sweep interval given is too large. Ignoring it.\n"); + else { + opt.sweep_interval = val; + printf(" sweep interval = %d\n", + opt.sweep_interval); + } + break; + + case 't': + val = strtoul(optarg, NULL, 0); + opt.transaction_timeout = strtoul(optarg, NULL, 0); + if (val == 0) + fprintf(stderr, "ERROR: timeout value 0 is invalid. Ignoring it.\n"); + else { + opt.transaction_timeout = val; + printf(" Transaction timeout = %u\n", + opt.transaction_timeout); + } + break; + + case 'n': + opt.max_wire_smps = strtoul(optarg, NULL, 0); + if (opt.max_wire_smps == 0 || + opt.max_wire_smps > 0x7FFFFFFF) + opt.max_wire_smps = 0x7FFFFFFF; + printf(" Max wire smp's = %d\n", opt.max_wire_smps); + break; + + case 'q': + /* + * OpenSM interactive console + */ + if (strcmp(optarg, OSM_DISABLE_CONSOLE) == 0 + || strcmp(optarg, OSM_LOCAL_CONSOLE) == 0 +#ifdef ENABLE_OSM_CONSOLE_SOCKET + || strcmp(optarg, OSM_REMOTE_CONSOLE) == 0 +#endif +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + || strcmp(optarg, OSM_LOOPBACK_CONSOLE) == 0 +#endif + ) + SET_STR_OPT(opt.console, optarg); + else + printf("-console %s option not understood\n", + optarg); + break; + +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + case 'C': + opt.console_port = strtol(optarg, NULL, 0); + break; +#endif + + case 'd': + dbg_lvl = strtol(optarg, NULL, 0); + printf(" d level = 0x%x\n", dbg_lvl); + if (dbg_lvl == 0) { + printf(" Debug mode: Ignore Other SMs\n"); + opt.ignore_other_sm = TRUE; + } else if (dbg_lvl == 1) { + printf(" Debug mode: Forcing Single Thread\n"); + opt.single_thread = TRUE; + } else if (dbg_lvl == 2) { + printf(" Debug mode: Force Log Flush\n"); + opt.force_log_flush = TRUE; + } else if (dbg_lvl == 3) { + printf + (" Debug mode: Disable multicast support\n"); + opt.disable_multicast = TRUE; + } + /* + * NOTE: Debug level 4 used to be used for memory + * tracking but this is now deprecated + */ + else if (dbg_lvl == 5) + vendor_debug++; + else + printf(" OpenSM: Unknown debug option %d" + " ignored\n", dbg_lvl); + break; + + case 'l': + temp = strtoul(optarg, NULL, 0); + if (temp > 7) { + fprintf(stderr, + "ERROR: LMC must be 7 or less.\n"); + return -1; + } + opt.lmc = (uint8_t) temp; + printf(" LMC = %d\n", temp); + break; + + case 'D': + opt.log_flags = strtol(optarg, NULL, 0); + printf(" verbose option -D = 0x%x\n", opt.log_flags); + break; + + case 'f': + SET_STR_OPT(opt.log_file, optarg); + break; + + case 'L': + opt.log_max_size = strtoul(optarg, NULL, 0); + printf(" Log file max size is %u MBytes\n", + opt.log_max_size); + break; + + case 'e': + opt.accum_log_file = FALSE; + printf(" Creating new log file\n"); + break; + + case 'J': + pidfile = optarg; + break; + + case 'P': + SET_STR_OPT(opt.partition_config_file, optarg); + break; + + case 'N': + opt.no_partition_enforcement = TRUE; + break; + + case 'Z': + if (strcmp(optarg, OSM_PARTITION_ENFORCE_BOTH) == 0 + || strcmp(optarg, OSM_PARTITION_ENFORCE_IN) == 0 + || strcmp(optarg, OSM_PARTITION_ENFORCE_OUT) == 0 + || strcmp(optarg, OSM_PARTITION_ENFORCE_OFF) == 0) { + SET_STR_OPT(opt.part_enforce, optarg); + if (strcmp(optarg, OSM_PARTITION_ENFORCE_BOTH) == 0) + opt.part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_BOTH; + else if (strcmp(optarg, OSM_PARTITION_ENFORCE_IN) == 0) + opt.part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_IN; + else if (strcmp(optarg, OSM_PARTITION_ENFORCE_OUT) == 0) + opt.part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_OUT; + else + opt.part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_OFF; + } else + printf("-part_enforce %s option not understood\n", + optarg); + break; + + case 'W': + opt.allow_both_pkeys = TRUE; + break; + + case 'Q': + opt.qos = TRUE; + break; + + case 'Y': + SET_STR_OPT(opt.qos_policy_file, optarg); + printf(" QoS policy file \'%s\'\n", optarg); + break; + + case 128: + opt.congestion_control = TRUE; + break; + + case 129: + opt.cc_key = strtoull(optarg, NULL, 0); + printf(" CC Key 0x%" PRIx64 "\n", opt.cc_key); + break; + + case 'y': + opt.exit_on_fatal = FALSE; + printf(" Staying on fatal initialization errors\n"); + break; + + case 'v': + opt.log_flags = (opt.log_flags << 1) | 1; + printf(" Verbose option -v (log flags = 0x%X)\n", + opt.log_flags); + break; + + case 'V': + opt.log_flags = 0xFF; + opt.force_log_flush = TRUE; + printf(" Big V selected\n"); + break; + + case 'p': + temp = strtoul(optarg, NULL, 0); + if (temp > 15) { + fprintf(stderr, + "ERROR: priority must be between 0 and 15\n"); + return -1; + } + opt.sm_priority = (uint8_t) temp; + printf(" Priority = %d\n", temp); + break; + + case 16: + opt.subnet_prefix = cl_hton64(strtoull(optarg, NULL, 16)); + printf(" Subnet_Prefix = <0x%" PRIx64 ">\n", cl_hton64(opt.subnet_prefix)); + break; + + case 'k': + sm_key = cl_hton64(strtoull(optarg, NULL, 16)); + printf(" SM Key <0x%" PRIx64 ">\n", cl_hton64(sm_key)); + opt.sm_key = sm_key; + break; + + case 'R': + SET_STR_OPT(opt.routing_engine_names, optarg); + printf(" Activate \'%s\' routing engine(s)\n", optarg); + break; + + case 'z': + opt.connect_roots = TRUE; + printf(" Connect roots option is on\n"); + break; + + case 'A': + opt.use_ucast_cache = TRUE; + printf(" Unicast routing cache option is on\n"); + break; + + case 'M': + SET_STR_OPT(opt.lid_matrix_dump_file, optarg); + printf(" Lid matrix dump file is \'%s\'\n", optarg); + break; + + case 'U': + SET_STR_OPT(opt.lfts_file, optarg); + printf(" LFTs file is \'%s\'\n", optarg); + break; + + case 'S': + SET_STR_OPT(opt.sa_db_file, optarg); + printf(" SA DB file is \'%s\'\n", optarg); + break; + + case 'a': + SET_STR_OPT(opt.root_guid_file, optarg); + printf(" Root Guid File: %s\n", opt.root_guid_file); + break; + + case 'u': + SET_STR_OPT(opt.cn_guid_file, optarg); + printf(" Compute Node Guid File: %s\n", + opt.cn_guid_file); + break; + + case 'G': + SET_STR_OPT(opt.io_guid_file, optarg); + printf(" I/O Node Guid File: %s\n", opt.io_guid_file); + break; + case 11: + opt.port_shifting = TRUE; + printf(" Port Shifting is on\n"); + break; + case 14: + opt.scatter_ports = strtol(optarg, NULL, 0); + printf(" Scatter Ports is on\n"); + break; + case 'H': + opt.max_reverse_hops = atoi(optarg); + printf(" Max Reverse Hops: %d\n", opt.max_reverse_hops); + break; + case 'm': + SET_STR_OPT(opt.ids_guid_file, optarg); + printf(" IDs Guid File: %s\n", opt.ids_guid_file); + break; + + case 'X': + SET_STR_OPT(opt.guid_routing_order_file, optarg); + printf(" GUID Routing Order File: %s\n", + opt.guid_routing_order_file); + break; + + case 'x': + opt.honor_guid2lid_file = TRUE; + printf(" Honor guid2lid file, if possible\n"); + break; + + case 'B': + opt.daemon = TRUE; + printf(" Daemon mode\n"); + break; + + case 'I': + opt.sm_inactive = TRUE; + printf(" SM started in inactive state\n"); + break; + +#ifdef ENABLE_OSM_PERF_MGR + case 1: + opt.perfmgr = TRUE; + break; + case 2: + opt.perfmgr_sweep_time_s = atoi(optarg); + break; +#endif /* ENABLE_OSM_PERF_MGR */ + + case 3: + SET_STR_OPT(opt.prefix_routes_file, optarg); + break; + case 4: + opt.consolidate_ipv6_snm_req = TRUE; + break; + case 5: + opt.do_mesh_analysis = TRUE; + break; + case 6: + temp = strtoul(optarg, NULL, 0); + if (temp >= IB_MAX_NUM_VLS) { + fprintf(stderr, + "ERROR: starting lash vl must be between 0 and 15\n"); + return -1; + } + opt.lash_start_vl = (uint8_t) temp; + printf(" LASH starting VL = %d\n", opt.lash_start_vl); + break; + case 7: + temp = strtoul(optarg, NULL, 0); + if (temp > 15) { + fprintf(stderr, + "ERROR: SM's SL must be between 0 and 15\n"); + return -1; + } + opt.sm_sl = (uint8_t) temp; + printf(" SMSL = %d\n", opt.sm_sl); + break; + case 8: + opt.transaction_retries = strtoul(optarg, NULL, 0); + printf(" Transaction retries = %u\n", + opt.transaction_retries); + break; + case 9: + SET_STR_OPT(opt.log_prefix, optarg); + printf("Log prefix = %s\n", opt.log_prefix); + break; + case 10: + SET_STR_OPT(opt.torus_conf_file, optarg); + printf("Torus-2QoS config file = %s\n", opt.torus_conf_file); + break; + case 13: + opt.guid_routing_order_no_scatter = TRUE; + break; + case 15: + temp = strtoul(optarg, NULL, 0); + if (temp >= IB_MAX_NUM_VLS) { + fprintf(stderr, + "ERROR: maximum #VLs for nue routing must be between 0 and %d\n", + IB_MAX_NUM_VLS); + return -1; + } + opt.nue_max_num_vls = (uint8_t) temp; + printf(" Nue maximum #VLs = %d\n", opt.nue_max_num_vls); + break; + case 17: + SET_STR_OPT(opt.dump_files_dir, optarg); + break; + case 'h': + case '?': + case ':': + show_usage(); + break; + + case -1: + break; /* done with option */ + default: /* something wrong */ + abort(); + } + } while (next_option != -1); + + if (opt.log_file != NULL) + printf(" Log File: %s\n", opt.log_file); + /* Done with options description */ + printf("-------------------------------------------------\n"); + + if (conf_template) { + status = osm_subn_write_conf_file(conf_template, &opt); + if (status) + printf("\nosm_subn_write_conf_file failed!\n"); + exit(status); + } + + osm_subn_verify_config(&opt); + + if (vendor_debug) + osm_vendor_set_debug(osm.p_vendor, vendor_debug); + + block_signals(); + + if (opt.daemon) { + if (INVALID_GUID == opt.guid) { + fprintf(stderr, + "ERROR: Invalid GUID specified; exiting because of daemon mode\n"); + return -1; + } + daemonize(&osm); + } + + if (complib_init_v2() != CL_SUCCESS) { + printf("\ncomplib_init_v2 error\n"); + return -1; + } + + status = osm_opensm_init(&osm, &opt); + if (status != IB_SUCCESS) { + const char *err_str = ib_get_err_str(status); + if (err_str == NULL) + err_str = "Unknown Error Type"; + printf("\nError from osm_opensm_init: %s.\n", err_str); + /* We will just exit, and not go to Exit, since we don't + want the destroy to be called. */ + complib_exit(); + return status; + } + + /* + If the user didn't specify a GUID on the command line, + then get a port GUID value with which to bind. + */ + if (opt.guid == 0 || cl_hton64(opt.guid) == CL_HTON64(INVALID_GUID)) + opt.guid = get_port_guid(&osm, opt.guid); + + if (opt.guid == 0) + goto Exit2; + + status = osm_opensm_init_finish(&osm, &opt); + if (status != IB_SUCCESS) { + const char *err_str = ib_get_err_str(status); + if (err_str == NULL) + err_str = "Unknown Error Type"; + printf("\nError from osm_opensm_init_finish: %s.\n", err_str); + goto Exit2; + } + + status = osm_opensm_bind(&osm, opt.guid); + if (status != IB_SUCCESS) { + printf("\nError from osm_opensm_bind (0x%X)\n", status); + printf + ("Perhaps another instance of OpenSM is already running\n"); + goto Exit; + } + + setup_signals(); + + osm_opensm_sweep(&osm); + + if (run_once_flag == TRUE) { + while (!osm_exit_flag) { + status = + osm_opensm_wait_for_subnet_up(&osm, + osm.subn.opt. + sweep_interval * + 1000000, TRUE); + if (!status) + osm_exit_flag = 1; + } + } else { + /* + * Sit here until signaled to exit + */ + osm_manager_loop(&opt, &osm); + } + + if (osm.mad_pool.mads_out) { + fprintf(stdout, + "There are still %u MADs out. Forcing the exit of the OpenSM application...\n", + osm.mad_pool.mads_out); +#ifdef HAVE_LIBPTHREAD + pthread_cond_signal(&osm.stats.cond); +#else + cl_event_signal(&osm.stats.event); +#endif + } + +Exit: + osm_opensm_destroy(&osm); +Exit2: + osm_opensm_destroy_finish(&osm); + complib_exit(); + remove_pidfile(); + + exit(0); +} diff --git a/opensm/osm_check b/opensm/osm_check new file mode 100755 index 0000000..3f30c3c --- /dev/null +++ b/opensm/osm_check @@ -0,0 +1,282 @@ +#!/usr/bin/perl -W +#!/usr/bin/perl -W +# +# +# Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. +# Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. +# Copyright (c) 1996-2003 Intel Corporation. All rights reserved. +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# +# +# Abstract: +# Perl script for simple source code error checking. +# +# Environment: +# Linux User Mode +# +# $Revision: 1.4 $ +# +# +# +# DESCRIPTION: +# +# This script performs some simple conformance checks on the +# OpenSM source code. It does NOT attempt to act like a full +# blown 'C' language parser, so it can be fooled. Something +# is better than nothing. Running the 'osm_indent' script before +# running this script will increase your chances of catching +# problems. +# +# +# The following checks are performed: +# 1) Verify that the function name provided in a log statement +# matches the name of the current function. +# +# 2) Verify that log statements are in the form that this script +# can readily parse. Improvements to the regular expressions +# might make this unnecessary. +# +# 3) Verify that lower two digits of the error codes used in log +# statements are unique within that file. +# +# 4) Verify that upper two digits of the error codes used in log +# statements are not used by any other module. +# +# USAGE: +# +# In the OpenSM source directory, type: +# osm_check.pl *.c +# + +# Do necessary upfront initialization +$verbose = 0; +$in_c_comment = 0; + +if( !exists $ARGV[0] ) +{ + print "ERROR: You must specify the files on which to operate, such as '*.c'\n"; + osm_check_usage(); + exit; +} + +# loop through all the command line options +do +{ + $doing_params = 0; + + # First, look for command line options. + if( $ARGV[0] =~ /-[v|V]/ ) + { + $verbose += 1; + shift; + print "Verbose mode on, level = $verbose.\n"; + $doing_params = 1; + } + + if( !exists $ARGV[0] ) + { + print "ERROR: You must specify the files on which to operate, such as '*.c'\n"; + osm_check_usage(); + exit; + } +}while( $doing_params == 1 ); + +LINE: while( <> ) +{ + # Skip C single line C style comments + # This line must come before the multi-line C comment check! + if( /\/\*.*\*\// ) + { + $in_c_comment = 0; + next LINE; + } + + # skip multi-line C style comments + if( /\/\*/ ) + { + $in_c_comment = 1; + next LINE; + } + + # end skipping of multi-line C style comments + if( /\*\// ) + { + $in_c_comment = 0; + next LINE; + } + + # We're still in a C comment, so ignore input + if( $in_c_comment == 1 ) + { + next LINE; + } + + + # skip C++ style comment lines + if( /^\s*\/\// ) + { + next LINE; + } + + # check for bad PRIx64 usage + # It's a common mistake to forget the % before the PRIx64 + if( /[^%]\"\s*PRIx64/ ) + { + print "No % sign before PRx64!!: $ARGV $.\n"; + } + + # This simple script doesn't handle checking PRIx64 usage + # when PRIx64 starts the line. Just give a warning. + if( /^\s*PRIx64/ ) + { + print "Warning: PRIx64 at start of line. $ARGV $.\n"; + } + + # Attempt to locate function names. + # Function names must start on the beginning of the line. + if( /^(\w+)\s*\(/ ) + { + $current_func = $1; + if( $verbose == 1 ) + { + print "Processing $ARGV: $current_func\n"; + } + } + + # Attempt to find OSM_LOG_ENTER entries. + # When found, verify that the function name provided matches + # the actual function. + if( /OSM_LOG_ENTER\s*\(\s*([\-\.\>\w]+)\s*,\s*(\w+)\s*\)/ ) + { + $log_func = $2; + if( $current_func ne $log_func ) + { + printf "MISMATCH!! $ARGV $.: $current_func != $log_func\n"; + } + } + + # Check for non-conforming log statements. + # Log statements must not start the log string on the same line + # as the osm_log function itself. + # Watch out for the #include "osm_log.h" statement as a false positive. + if( /osm_log\s*\(.*\"/ ) + { + print "NON-CONFORMING LOG STATEMENT!! $ARGV $.\n"; + } + + # Attempt to find osm_log entries. + if( /^\s*\"(\w+):/ ) + { + $log_func = $1; + if( $current_func ne $log_func ) + { + print "MISMATCHED LOG FUNCTION!! $ARGV $.: $current_func != $log_func\n"; + } + } + + # Error logging must look like 'ERR 1234:' + # The upper two digits are error range assigned to that module. + # The lower two digits are the error code itself. + # Error codes are in hexadecimal. + if( /ERR(\s+)([0-9a-fA-F]{2})([0-9a-fA-F]{2})(..)/ ) + { + # Check if we already established the error prefix for this module + $err_prefix = $module_err_prefixes{$ARGV}; + if( $err_prefix ) + { + if( $err_prefix ne $2 ) + { + print "BAD ERR RANGE IN LOG ENTRY!! $ARGV $.: $current_func\n"; + print "\tExpected $err_prefix but found $2\n"; + } + } + else + { + # Create a new prefix for this module. + $module_err_prefixes{$ARGV} = $2; + } + + $err_base = $module_err_bases{$3}; + if( $err_base ) + { + print "DUPLICATE ERR NUMBER IN LOG ENTRY!! $ARGV $.: $current_func: $3\n"; + print "\tPrevious use on line $err_base.\n"; + } + else + { + # Add this error code to the list used by this module + # The data stored in the line number on which it is used. + $module_err_bases{$3} = $.; + if( $verbose > 1 ) + { + print "Adding new error: $1$2 in $ARGV.\n"; + } + } + + if( $4 ne ": " ) + { + print "MALFORMED LOG STATEMENT!! NEEDS ': ' $ARGV $.\n"; + } + + if( $1 ne " " ) + { + print "USE ONLY 1 SPACE AFTER ERR!! $ARGV $.\n"; + } + } + + # verify expected use of sizeof() with pointers + if( /sizeof\s*\(\s*[h|p]_/ ) + { + print "SUSPICIOUS USE OF SIZEOF(), DO YOU NEED AN '*' $ARGV $.\n"; + } + + +} +continue +{ + # reset the module base error index when we finished out + # each source file. + if( eof ) + { + # reset the base error value, since each module can + # repeat this range. + %module_err_bases = (); + # closing the file here resets the line number with each new file + close ARGV; + } +} + +sub osm_check_usage +{ + print "Usage:\n"; + print "osm_check.pl [-v|V] \n"; + print "[-v|V] - enable verbose mode.\n\n"; +} diff --git a/opensm/osm_check_n_fix b/opensm/osm_check_n_fix new file mode 100755 index 0000000..3a87cfd --- /dev/null +++ b/opensm/osm_check_n_fix @@ -0,0 +1,517 @@ +eval '(exit $?0)' && + eval 'exec perl -S $0 ${1+"$@"}' && + eval 'exec perl -S $0 $argv:q' + if 0; + +#!/usr/bin/perl -W +# +# Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. +# Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. +# Copyright (c) 1996-2003 Intel Corporation. All rights reserved. +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +######################################################################### +# +# Abstract: +# Perl script for simple source code error checking and fixing +# +# Environment: +# Linux User Mode +# +# Author: +# Eitan Zahavi, Mellanox Technologies LTD Yokneam Israel. +# +# $Revision: 1.4 $ +# +# +# +# DESCRIPTION: +# +# This script performs some simple conformance checks on the +# OpenSM source code. It does NOT attempt to act like a full +# blown 'C' language parser, so it can be fooled. Something +# is better than nothing. +# +# The script starts by running the 'osm_indent' script on teh given files. +# +# We use an extra file for tracking error codes used by each file. +# The name is osm_errors_codes. +# +# The following checks are performed: +# 1) Verify that the function name provided in a log statement +# matches the name of the current function. +# +# 2) Verify that log statements are in the form that this script +# can readily parse. Improvements to the regular expressions +# might make this unnecessary. +# +# 3) Verify that lower two digits of the error codes used in log +# statements are unique within that file. +# +# 4) Verify that upper two digits of the error codes used in log +# statements are not used by any other module. +# +# 5) Verify the lines do not have extra spaces. +# +# USAGE: +# +# In the OpenSM source directory, type: +# osm_check_n_fix -f *.c +# +######################################################################### + +# Do necessary upfront initialization +$verbose = 0; +$in_c_comment = 0; +$fix_mode = 0; +$confirm_mode = 0; +$re_assign_err_prefix = 0; + +if( !scalar(@ARGV) ) +{ + print "ERROR: You must specify the files on which to operate, such as '*.c'\n"; + osm_check_usage(); + exit; +} + +# loop through all the command line options +do +{ + $doing_params = 0; + + # First, look for command line options. + if( $ARGV[0] =~ /-[v|V]/ ) + { + $verbose += 1; + shift; + print "Verbose mode on, level = $verbose.\n"; + $doing_params = 1; + } + + if( $ARGV[0] =~ /(-f|--fix)/ ) + { + $fix_mode += 1; + shift; + print "Fix mode on.\n"; + $doing_params = 1; + } + + if( $ARGV[0] =~ /(-c|--confirm)/ ) + { + $confirm_mode += 1; + shift; + print "Confirm mode on.\n"; + $doing_params = 1; + } + + if( $ARGV[0] =~ /(-r|--re-assign-mod-err-prefix)/ ) + { + $re_assign_err_prefix += 1; + shift; + print "Allow Re-Assignment of Module Err Prefixes.\n"; + $doing_params = 1; + } + + if( !scalar(@ARGV)) + { + print "ERROR: You must specify the files on which to operate, such as '*.c'\n"; + osm_check_usage(); + exit; + } +} while( $doing_params == 1 ); + +# parse the osm_error_codes file and define: +# module_by_prefix +# module_err_prefixes +# module_last_err_used +if (open(ERRS, "; + close(ERRS); + foreach $errDef (@ERR_DEFS) { + # the format should be + if ($errDef =~ m/^(\S+)\s+(\S+)\s+([0-9]+)$/) { + ($file_name,$mod_prefix,$last_err) = ($1,$2,$3); + if (defined($module_by_prefix{$mod_prefix})) { + print "ERROR: Double module prefix:$mod_prefix on:$module_by_prefix($mod_prefix) and $file_name\n"; + exit 3; + } + $module_by_prefix{$mod_prefix} = $file_name; + $module_err_prefixes{$file_name} = $mod_prefix; + $module_last_err_used{$file_name} = $last_err; + } else { + print "ERROR: Fail to parse sm_error_codes: $errDef\n"; + exit 3; + } + } +} + +# do a file by file read into memory so we can tweek it: +foreach $file_name (@ARGV) { + print "- $file_name ----------------------------------------------------\n"; + # first step is to run indent + $res=`osm_indent $file_name`; + + open(INFILE, "<$file_name") || die("Fail to open $file_name"); + @LINES = ; + close(INFILE); + $any_fix = 0; + $needed_fixing = 0; + $need_indentation = 0; + + LINE: for ($line_num = 0; $line_num \w]+)\s*,\s*(\w+)\s*\)/ ) { + $log_func = $2; + if( $current_func ne $log_func ) { + printf "MISMATCH!! $file_name $line_num: $current_func != $log_func\n"; + $needed_fixing++; + if ($fix_mode) { + $line =~ + s/OSM_LOG_ENTER\s*\(\s*([\-\.\>\w]+)\s*,\s*(\w+)\s*\)/OSM_LOG_ENTER( $1, $current_func )/; + if (confirm_change($line, $LINES[$line_num])) { + $LINES[$line_num] = $line; + $any_fix++; + } + } + } + } + + # Check for non-conforming log statements. + # Log statements must not start the log string on the same line + # as the osm_log function itself. + # Watch out for the #include "osm_log.h" statement as a false positive. + if (/osm_log\s*\(.*OSM_.*\"/ ) { + if (/Format Waved/) { + print "Skipping log format waiver at $file_name $line_num\n"; + } else { + print "NON-CONFORMING LOG STATEMENT!! $file_name $line_num\n"; + $needed_fixing++; + if ($fix_mode) { + print "Fatal: can not auto fix\n"; + exit 1; + } + } + } + + # Attempt to find osm_log entries. + if( /^\s*\"(\w+):/ ) + { + $log_func = $1; + if( $current_func ne $log_func ) + { + print "MISMATCHED LOG FUNCTION!! $file_name $line_num: $current_func != $log_func\n"; + $needed_fixing++; + if ($fix_mode) { + $line =~ + s/^(\s*)\"(\w+):/$1\"$current_func:/; + if (confirm_change($line, $LINES[$line_num])) { + $LINES[$line_num] = $line; + $any_fix++; + } + } + } + } + + # Error logging must look like 'ERR 1234:' + # The upper two digits are error range assigned to that module. + # The lower two digits are the error code itself. + # Error codes are in hexadecimal. + if( /ERR(\s+)([0-9a-fA-F]{2})([0-9a-fA-F]{2})(..)/ ) + { + # track any error for this exp: + $exp_err = 0; + + # the parsed prefix and err code: + ($found_prefix,$found_code) = ($2,$3); + + # Check if we already established the error prefix for this module + $err_prefix = $module_err_prefixes{$file_name}; + + # err prefix is not available for this file + if ( ! $err_prefix ) { + # make sure no other file uses this prefix: + if ($module_by_prefix{$found_prefix}) { + # some other file uses that prefix: + + # two modes: either use a new one or abort + if ($re_assign_err_prefix) { + # scan the available module prefixes for an empty one: + $found = 0; + for ($new_prefix_idx = 1; $found == 0; $new_prefix_idx++) { + $prefix = sprintf("%02X", $new_prefix_idx); + if (!defined($module_by_prefix{$prefix})) { + $module_err_prefixes{$file_name} = $prefix; + $module_by_prefix{$prefix} = $file_name; + $found = 1; + } + $exp_err = 1; + } + } else { + print "Fatal: File $module_by_prefix{$2} already uses same prefix:$2 used by: $file_name (line=$line_num)\n"; + exit 1; + } + } else { + # the prefix found is unused: + + # Create a new prefix for this module. + $module_err_prefixes{$file_name} = $found_prefix; + $module_by_prefix{$found_prefix} = $file_name; + $err_prefix = $found_prefix; + } + } else { + # we already have a prefix for this file + + if( $err_prefix ne $found_prefix ) + { + $needed_fixing++; + print "BAD ERR RANGE IN LOG ENTRY!! $file_name $line_num: $current_func\n"; + print "\tExpected $err_prefix but found $found_prefix\n"; + $exp_err = 1; + } + } + + # now check for code duplicates + $err_base = $module_err_bases{$found_code}; + if( $err_base ) { + $needed_fixing++; + print "DUPLICATE ERR NUMBER IN LOG ENTRY!! $file_name $line_num: $current_func: $3\n"; + print "\tPrevious use on line $err_base.\n"; + + # use the last error code for this module: + $module_last_err_used{$file_name}++; + $err_code = sprintf("%02X", $module_last_err_used{$file_name}); + print "\tUsing new err code:0x$err_code ($module_last_err_used{$file_name})\n"; + $module_err_bases{$err_code} = $line_num; + $exp_err = 1; + } else { + # Add this error code to the list used by this module + # The data stored in the line number on which it is used. + $module_err_bases{$found_code} = $line_num; + # track the last code used + $err_code_num = eval("0x$found_code"); + if ($module_last_err_used{$file_name} < $err_code_num) { + $module_last_err_used{$file_name} = $err_code_num; + } + $err_code = $found_code; + + if( $verbose > 1 ) { + print "Adding new error: $err_prefix$found_code in $file_name.\n"; + } + } + + if( $4 ne ": " ) { + $needed_fixing++; + print "MALFORMED LOG STATEMENT!! NEEDS ': ' $file_name $line_num\n"; + $exp_err = 1; + } + + if( $1 ne " " ) + { + $needed_fixing++; + print "USE ONLY 1 SPACE AFTER ERR!! $file_name $line_num\n"; + $exp_err = 1; + } + + if ($exp_err && $fix_mode) { + $line =~ + s/ERR(\s+)([0-9a-fA-F]{2})([0-9a-fA-F]{2})([^\"]*\")/ERR ${err_prefix}$err_code: \" /; + if (confirm_change($line, $LINES[$line_num])) { + $LINES[$line_num] = $line; + $any_fix++; + } + } + } + + # verify expected use of sizeof() with pointers + if( /sizeof\s*\(\s*[h|p]_[^-]+\)/ ) + { + print "SUSPICIOUS USE OF SIZEOF(), DO YOU NEED AN '*' $file_name $line_num\n"; + $needed_fixing++; + if ($fix_mode) { + $line =~ + s/sizeof\s*\(\s*([h|p])_/sizeof \(*$1_/; + if (confirm_change($line, $LINES[$line_num])) { + $LINES[$line_num] = $line; + $any_fix++; + } + } + } + } + + # reset the base error value, since each module can + # repeat this range. + %module_err_bases = (); + + # if any fix write out the fixed file: + if ($any_fix) { + open(OF,">$file_name.fix"); + print OF @LINES; + close(OF); + } elsif ($needed_fixing) { + print "Found $needed_fixing Errors on file: $file_name\n"; + } +} + +# write out the error codes. +# module_by_prefix +# module_err_prefixes +# module_last_err_used +open(ERRS,">osm_error_codes"); +foreach $fn (sort(keys(%module_err_prefixes))) { + print ERRS "$fn $module_err_prefixes{$fn} $module_last_err_used{$fn}\n"; +} +close(ERRS); + +sub osm_check_usage +{ + print "Usage:\n"; + print "osm_check.pl [-v|V] [-f|--fix] [-c|--confirm] [-r|--re-assign-mod-err-prefix] \n"; + print "[-v|V] - enable verbose mode.\n"; + print "[-f|--fix] - enable auto fix mode.\n"; + print "[-c|--confirm] - enable manual confirmation mode.\n"; + print "[-r|--re-assign-mod-err-prefix] - enables re-assign error prefixes if the file does not have one.\n"; +} + +sub confirm_change { + local ($line, $orig_line) = @_; + if ($confirm_mode) { + print "In Line:".($line_num + 1)."\n"; + print "From: ${orig_line}To: ${line}Ok [y] ?"; + $| = 1; + $ans = ; + chomp $ans; + + if ($ans && $ans ne "y") { + return 0; + } + } else { + print "From: ${orig_line}To: ${line}"; + } + return 1; +} diff --git a/opensm/osm_congestion_control.c b/opensm/osm_congestion_control.c new file mode 100644 index 0000000..74a35c6 --- /dev/null +++ b/opensm/osm_congestion_control.c @@ -0,0 +1,807 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2012 Lawrence Livermore National Lab. All rights reserved. + * Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * OSM Congestion Control configuration implementation + * + * Author: + * Albert Chu, LLNL + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include + +#include +#include +#include +#define FILE_ID OSM_FILE_CONGESTION_CONTROL_C +#include +#include +#include +#include +#include + +#define CONGESTION_CONTROL_INITIAL_TID_VALUE 0x7A93 + +static void cc_mad_post(osm_congestion_control_t *p_cc, + osm_madw_t *p_madw, + osm_node_t *p_node, + osm_physp_t *p_physp, + ib_net16_t attr_id, + ib_net32_t attr_mod) +{ + osm_subn_opt_t *p_opt = &p_cc->subn->opt; + ib_cc_mad_t *p_cc_mad; + uint8_t port; + + OSM_LOG_ENTER(p_cc->log); + + port = osm_physp_get_port_num(p_physp); + + p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); + + p_cc_mad->header.base_ver = 1; + p_cc_mad->header.mgmt_class = IB_MCLASS_CC; + p_cc_mad->header.class_ver = 2; + p_cc_mad->header.method = IB_MAD_METHOD_SET; + p_cc_mad->header.status = 0; + p_cc_mad->header.class_spec = 0; + p_cc_mad->header.trans_id = + cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) & + (uint64_t) (0xFFFFFFFF)); + if (p_cc_mad->header.trans_id == 0) + p_cc_mad->header.trans_id = + cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) & + (uint64_t) (0xFFFFFFFF)); + p_cc_mad->header.attr_id = attr_id; + p_cc_mad->header.resv = 0; + p_cc_mad->header.attr_mod = attr_mod; + + p_cc_mad->cc_key = p_opt->cc_key; + + memset(p_cc_mad->log_data, '\0', IB_CC_LOG_DATA_SIZE); + + p_madw->mad_addr.dest_lid = osm_node_get_base_lid(p_node, port); + p_madw->mad_addr.addr_type.gsi.remote_qp = IB_QP1; + p_madw->mad_addr.addr_type.gsi.remote_qkey = + cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); + p_madw->resp_expected = TRUE; + p_madw->fail_msg = CL_DISP_MSGID_NONE; + + p_madw->context.cc_context.node_guid = osm_node_get_node_guid(p_node); + p_madw->context.cc_context.port_guid = osm_physp_get_port_guid(p_physp); + p_madw->context.cc_context.port = port; + p_madw->context.cc_context.mad_method = IB_MAD_METHOD_SET; + p_madw->context.cc_context.attr_mod = attr_mod; + + cl_spinlock_acquire(&p_cc->mad_queue_lock); + cl_atomic_inc(&p_cc->outstanding_mads); + cl_qlist_insert_tail(&p_cc->mad_queue, &p_madw->list_item); + cl_spinlock_release(&p_cc->mad_queue_lock); + + cl_event_signal(&p_cc->cc_poller_wakeup); + + OSM_LOG_EXIT(p_cc->log); +} + +static void cc_setup_mad_data(osm_sm_t * p_sm) +{ + osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; + osm_subn_opt_t *p_opt = &p_sm->p_subn->opt; + uint16_t ccti_limit; + unsigned i; + + /* Switch Congestion Setting */ + p_cc->sw_cong_setting.control_map = p_opt->cc_sw_cong_setting_control_map; + + memcpy(p_cc->sw_cong_setting.victim_mask, + p_opt->cc_sw_cong_setting_victim_mask, + IB_CC_PORT_MASK_DATA_SIZE); + + memcpy(p_cc->sw_cong_setting.credit_mask, + p_opt->cc_sw_cong_setting_credit_mask, + IB_CC_PORT_MASK_DATA_SIZE); + + /* threshold is 4 bits, takes up upper nibble of byte */ + p_cc->sw_cong_setting.threshold_resv = (p_opt->cc_sw_cong_setting_threshold << 4); + + p_cc->sw_cong_setting.packet_size = p_opt->cc_sw_cong_setting_packet_size; + + /* cs threshold is 4 bits, takes up upper nibble of short */ + p_cc->sw_cong_setting.cs_threshold_resv = + cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_threshold << 12); + + p_cc->sw_cong_setting.cs_return_delay = + cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_return_delay.shift << 14 + | p_opt->cc_sw_cong_setting_credit_starvation_return_delay.multiplier); + + p_cc->sw_cong_setting.marking_rate = p_opt->cc_sw_cong_setting_marking_rate; + + /* CA Congestion Setting */ + p_cc->ca_cong_setting.port_control = p_opt->cc_ca_cong_setting_port_control; + p_cc->ca_cong_setting.control_map = p_opt->cc_ca_cong_setting_control_map; + + for (i = 0; i < IB_CA_CONG_ENTRY_DATA_SIZE; i++) { + ib_ca_cong_entry_t *p_entry; + + p_entry = &p_cc->ca_cong_setting.entry_list[i]; + + p_entry->ccti_timer = p_opt->cc_ca_cong_entries[i].ccti_timer; + p_entry->ccti_increase = p_opt->cc_ca_cong_entries[i].ccti_increase; + p_entry->trigger_threshold = p_opt->cc_ca_cong_entries[i].trigger_threshold; + p_entry->ccti_min = p_opt->cc_ca_cong_entries[i].ccti_min; + p_entry->resv0 = 0; + p_entry->resv1 = 0; + } + + /* Congestion Control Table */ + + /* if no entries, we will always send at least 1 mad to set ccti_limit = 0 */ + if (!p_opt->cc_cct.entries_len) + p_cc->cc_tbl_mads = 1; + else { + p_cc->cc_tbl_mads = p_opt->cc_cct.entries_len - 1; + p_cc->cc_tbl_mads /= IB_CC_TBL_ENTRY_LIST_MAX; + p_cc->cc_tbl_mads += 1; + } + + CL_ASSERT(p_cc->cc_tbl_mads <= OSM_CCT_ENTRY_MAD_BLOCKS); + + if (!p_opt->cc_cct.entries_len) + ccti_limit = 0; + else + ccti_limit = p_opt->cc_cct.entries_len - 1; + + for (i = 0; i < p_cc->cc_tbl_mads; i++) { + int j; + + p_cc->cc_tbl[i].ccti_limit = cl_hton16(ccti_limit); + p_cc->cc_tbl[i].resv = 0; + + memset(p_cc->cc_tbl[i].entry_list, + '\0', + sizeof(p_cc->cc_tbl[i].entry_list)); + + if (!ccti_limit) + break; + + for (j = 0; j < IB_CC_TBL_ENTRY_LIST_MAX; j++) { + int k; + + k = (i * IB_CC_TBL_ENTRY_LIST_MAX) + j; + p_cc->cc_tbl[i].entry_list[j].shift_multiplier = + cl_hton16(p_opt->cc_cct.entries[k].shift << 14 + | p_opt->cc_cct.entries[k].multiplier); + } + } +} + +static ib_api_status_t cc_send_sw_cong_setting(osm_sm_t * p_sm, + osm_node_t *p_node) +{ + osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; + unsigned force_update; + osm_physp_t *p_physp; + osm_madw_t *p_madw = NULL; + ib_cc_mad_t *p_cc_mad = NULL; + ib_sw_cong_setting_t *p_sw_cong_setting = NULL; + + OSM_LOG_ENTER(p_sm->p_log); + + p_physp = osm_node_get_physp_ptr(p_node, 0); + + force_update = p_physp->need_update || p_sm->p_subn->need_update; + + if (!force_update + && !memcmp(&p_cc->sw_cong_setting, + &p_physp->cc.sw.sw_cong_setting, + sizeof(p_cc->sw_cong_setting))) + return IB_SUCCESS; + + p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle, + MAD_BLOCK_SIZE, NULL); + if (p_madw == NULL) { + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C101: " + "failed to allocate mad\n"); + return IB_INSUFFICIENT_MEMORY; + } + + p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); + + p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); + + memcpy(p_sw_cong_setting, + &p_cc->sw_cong_setting, + sizeof(p_cc->sw_cong_setting)); + + cc_mad_post(p_cc, p_madw, p_node, p_physp, + IB_MAD_ATTR_SW_CONG_SETTING, 0); + + OSM_LOG_EXIT(p_sm->p_log); + + return IB_SUCCESS; +} + +static ib_api_status_t cc_send_ca_cong_setting(osm_sm_t * p_sm, + osm_node_t *p_node, + osm_physp_t *p_physp) +{ + osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; + unsigned force_update; + osm_madw_t *p_madw = NULL; + ib_cc_mad_t *p_cc_mad = NULL; + ib_ca_cong_setting_t *p_ca_cong_setting = NULL; + + OSM_LOG_ENTER(p_sm->p_log); + + force_update = p_physp->need_update || p_sm->p_subn->need_update; + + if (!force_update + && !memcmp(&p_cc->ca_cong_setting, + &p_physp->cc.ca.ca_cong_setting, + sizeof(p_cc->ca_cong_setting))) + return IB_SUCCESS; + + p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle, + MAD_BLOCK_SIZE, NULL); + if (p_madw == NULL) { + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C102: " + "failed to allocate mad\n"); + return IB_INSUFFICIENT_MEMORY; + } + + p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); + + p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); + + memcpy(p_ca_cong_setting, + &p_cc->ca_cong_setting, + sizeof(p_cc->ca_cong_setting)); + + cc_mad_post(p_cc, p_madw, p_node, p_physp, + IB_MAD_ATTR_CA_CONG_SETTING, 0); + + OSM_LOG_EXIT(p_sm->p_log); + + return IB_SUCCESS; +} + +static ib_api_status_t cc_send_cct(osm_sm_t * p_sm, + osm_node_t *p_node, + osm_physp_t *p_physp) +{ + osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; + unsigned force_update; + osm_madw_t *p_madw = NULL; + ib_cc_mad_t *p_cc_mad = NULL; + ib_cc_tbl_t *p_cc_tbl = NULL; + unsigned int index = 0; + + OSM_LOG_ENTER(p_sm->p_log); + + force_update = p_physp->need_update || p_sm->p_subn->need_update; + + for (index = 0; index < p_cc->cc_tbl_mads; index++) { + if (!force_update + && !memcmp(&p_cc->cc_tbl[index], + &p_physp->cc.ca.cc_tbl[index], + sizeof(p_cc->cc_tbl[index]))) + continue; + + p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle, + MAD_BLOCK_SIZE, NULL); + if (p_madw == NULL) { + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C103: " + "failed to allocate mad\n"); + return IB_INSUFFICIENT_MEMORY; + } + + p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); + + p_cc_tbl = (ib_cc_tbl_t *)ib_cc_mad_get_mgt_data_ptr(p_cc_mad); + + memcpy(p_cc_tbl, + &p_cc->cc_tbl[index], + sizeof(p_cc->cc_tbl[index])); + + cc_mad_post(p_cc, p_madw, p_node, p_physp, + IB_MAD_ATTR_CC_TBL, cl_hton32(index)); + } + + OSM_LOG_EXIT(p_sm->p_log); + + return IB_SUCCESS; +} + +int osm_congestion_control_setup(struct osm_opensm *p_osm) +{ + cl_qmap_t *p_tbl; + cl_map_item_t *p_next; + int ret = 0; + + if (!p_osm->subn.opt.congestion_control) + return 0; + + OSM_LOG_ENTER(&p_osm->log); + + /* + * Do nothing unless the most recent routing attempt was successful. + */ + if (!p_osm->routing_engine_used) + return 0; + + cc_setup_mad_data(&p_osm->sm); + + cl_plock_acquire(&p_osm->lock); + + p_tbl = &p_osm->subn.port_guid_tbl; + p_next = cl_qmap_head(p_tbl); + while (p_next != cl_qmap_end(p_tbl)) { + osm_port_t *p_port = (osm_port_t *) p_next; + osm_node_t *p_node = p_port->p_node; + ib_api_status_t status; + + p_next = cl_qmap_next(p_next); + + if (p_port->cc_unavailable_flag) + continue; + + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { + status = cc_send_sw_cong_setting(&p_osm->sm, p_node); + if (status != IB_SUCCESS) + ret = -1; + } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_CA) { + status = cc_send_ca_cong_setting(&p_osm->sm, + p_node, + p_port->p_physp); + if (status != IB_SUCCESS) + ret = -1; + + status = cc_send_cct(&p_osm->sm, + p_node, + p_port->p_physp); + if (status != IB_SUCCESS) + ret = -1; + } + } + + cl_plock_release(&p_osm->lock); + + OSM_LOG_EXIT(&p_osm->log); + + return ret; +} + +int osm_congestion_control_wait_pending_transactions(struct osm_opensm *p_osm) +{ + osm_congestion_control_t *cc = &p_osm->cc; + + if (!p_osm->subn.opt.congestion_control) + return 0; + + while (1) { + unsigned count = cc->outstanding_mads; + if (!count || osm_exit_flag) + break; + cl_event_wait_on(&cc->outstanding_mads_done_event, + EVENT_NO_TIMEOUT, + TRUE); + } + + return osm_exit_flag; +} + +static inline void decrement_outstanding_mads(osm_congestion_control_t *p_cc) +{ + uint32_t outstanding; + + outstanding = cl_atomic_dec(&p_cc->outstanding_mads); + if (!outstanding) + cl_event_signal(&p_cc->outstanding_mads_done_event); + + cl_atomic_dec(&p_cc->outstanding_mads_on_wire); + cl_event_signal(&p_cc->sig_mads_on_wire_continue); +} + +static void cc_rcv_mad(void *context, void *data) +{ + osm_congestion_control_t *p_cc = context; + osm_opensm_t *p_osm = p_cc->osm; + osm_madw_t *p_madw = data; + ib_cc_mad_t *p_cc_mad; + osm_madw_context_t *p_mad_context = &p_madw->context; + ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); + ib_net64_t node_guid = p_mad_context->cc_context.node_guid; + ib_net64_t port_guid = p_mad_context->cc_context.port_guid; + uint8_t port = p_mad_context->cc_context.port; + osm_port_t *p_port; + + OSM_LOG_ENTER(p_cc->log); + + OSM_LOG(p_cc->log, OSM_LOG_VERBOSE, + "Processing received MAD status 0x%x for " + "attr ID %u mod 0x%x node 0x%" PRIx64 " port %u\n", + cl_ntoh16(p_mad->status), cl_ntoh16(p_mad->attr_id), + cl_ntoh32(p_mad_context->cc_context.attr_mod), + cl_ntoh64(node_guid), port); + + p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); + + cl_plock_acquire(&p_osm->lock); + + p_port = osm_get_port_by_guid(p_cc->subn, port_guid); + if (!p_port) { + OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C109: " + "Port GUID 0x%" PRIx64 " not in table\n", + cl_ntoh64(port_guid)); + cl_plock_release(&p_osm->lock); + goto Exit; + } + + p_port->cc_timeout_count = 0; + + if (p_cc_mad->header.status) { + if (p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_CLASS_VER + || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD + || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD_ATTR) + p_port->cc_unavailable_flag = TRUE; + cl_plock_release(&p_osm->lock); + goto Exit; + } + else + p_port->cc_unavailable_flag = FALSE; + + if (p_cc_mad->header.attr_id == IB_MAD_ATTR_SW_CONG_SETTING) { + ib_sw_cong_setting_t *p_sw_cong_setting; + + p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); + p_port->p_physp->cc.sw.sw_cong_setting = *p_sw_cong_setting; + } + else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CA_CONG_SETTING) { + ib_ca_cong_setting_t *p_ca_cong_setting; + + p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); + p_port->p_physp->cc.ca.ca_cong_setting = *p_ca_cong_setting; + } + else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CC_TBL) { + ib_net32_t attr_mod = p_mad_context->cc_context.attr_mod; + uint32_t index = cl_ntoh32(attr_mod); + ib_cc_tbl_t *p_cc_tbl; + + p_cc_tbl = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); + p_port->p_physp->cc.ca.cc_tbl[index] = *p_cc_tbl; + } + else + OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10A: " + "Unexpected MAD attribute ID %u received\n", + cl_ntoh16(p_cc_mad->header.attr_id)); + + cl_plock_release(&p_osm->lock); + +Exit: + decrement_outstanding_mads(p_cc); + osm_mad_pool_put(p_cc->mad_pool, p_madw); + OSM_LOG_EXIT(p_cc->log); +} + +static void cc_poller_send(osm_congestion_control_t *p_cc, + osm_madw_t *p_madw) +{ + osm_subn_opt_t *p_opt = &p_cc->subn->opt; + ib_api_status_t status; + cl_status_t sts; + osm_madw_context_t mad_context = p_madw->context; + + status = osm_vendor_send(p_cc->bind_handle, p_madw, TRUE); + if (status == IB_SUCCESS) { + cl_atomic_inc(&p_cc->outstanding_mads_on_wire); + while (p_cc->outstanding_mads_on_wire > + (int32_t)p_opt->cc_max_outstanding_mads) { +wait: + sts = cl_event_wait_on(&p_cc->sig_mads_on_wire_continue, + EVENT_NO_TIMEOUT, TRUE); + if (sts != CL_SUCCESS) + goto wait; + } + } else + OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C104: " + "send failed to node 0x%" PRIx64 "port %u\n", + cl_ntoh64(mad_context.cc_context.node_guid), + mad_context.cc_context.port); +} + +static void cc_poller(void *p_ptr) +{ + osm_congestion_control_t *p_cc = p_ptr; + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_cc->log); + + if (p_cc->thread_state == OSM_THREAD_STATE_NONE) + p_cc->thread_state = OSM_THREAD_STATE_RUN; + + while (p_cc->thread_state == OSM_THREAD_STATE_RUN) { + cl_spinlock_acquire(&p_cc->mad_queue_lock); + + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue); + + cl_spinlock_release(&p_cc->mad_queue_lock); + + if (p_madw != (osm_madw_t *) cl_qlist_end(&p_cc->mad_queue)) + cc_poller_send(p_cc, p_madw); + else + cl_event_wait_on(&p_cc->cc_poller_wakeup, + EVENT_NO_TIMEOUT, TRUE); + } + + OSM_LOG_EXIT(p_cc->log); +} + +ib_api_status_t osm_congestion_control_init(osm_congestion_control_t * p_cc, + struct osm_opensm *p_osm, + const osm_subn_opt_t * p_opt) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osm->log); + + memset(p_cc, 0, sizeof(*p_cc)); + + p_cc->osm = p_osm; + p_cc->subn = &p_osm->subn; + p_cc->sm = &p_osm->sm; + p_cc->log = &p_osm->log; + p_cc->mad_pool = &p_osm->mad_pool; + p_cc->trans_id = CONGESTION_CONTROL_INITIAL_TID_VALUE; + p_cc->vendor = p_osm->p_vendor; + + p_cc->cc_disp_h = cl_disp_register(&p_osm->disp, OSM_MSG_MAD_CC, + cc_rcv_mad, p_cc); + if (p_cc->cc_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + cl_qlist_init(&p_cc->mad_queue); + + status = cl_spinlock_init(&p_cc->mad_queue_lock); + if (status != IB_SUCCESS) + goto Exit; + + cl_event_construct(&p_cc->cc_poller_wakeup); + status = cl_event_init(&p_cc->cc_poller_wakeup, FALSE); + if (status != IB_SUCCESS) + goto Exit; + + cl_event_construct(&p_cc->outstanding_mads_done_event); + status = cl_event_init(&p_cc->outstanding_mads_done_event, FALSE); + if (status != IB_SUCCESS) + goto Exit; + + cl_event_construct(&p_cc->sig_mads_on_wire_continue); + status = cl_event_init(&p_cc->sig_mads_on_wire_continue, FALSE); + if (status != IB_SUCCESS) + goto Exit; + + p_cc->thread_state = OSM_THREAD_STATE_NONE; + + status = cl_thread_init(&p_cc->cc_poller, cc_poller, p_cc, + "cc poller"); + if (status != IB_SUCCESS) + goto Exit; + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_cc->log); + return status; +} + +static void cc_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, + osm_madw_t * p_req_madw) +{ + osm_congestion_control_t *p_cc = bind_context; + + OSM_LOG_ENTER(p_cc->log); + + CL_ASSERT(p_madw); + + /* HACK - should be extended when supporting CC traps */ + CL_ASSERT(p_req_madw != NULL); + + osm_madw_copy_context(p_madw, p_req_madw); + osm_mad_pool_put(p_cc->mad_pool, p_req_madw); + + /* Do not decrement outstanding mads here, do it in the dispatcher */ + + if (cl_disp_post(p_cc->cc_disp_h, OSM_MSG_MAD_CC, + p_madw, NULL, NULL) != CL_SUCCESS) { + OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C105: " + "Congestion Control Dispatcher post failed\n"); + osm_mad_pool_put(p_cc->mad_pool, p_madw); + } + + OSM_LOG_EXIT(p_cc->log); +} + +static void cc_mad_send_err_callback(void *bind_context, + osm_madw_t * p_madw) +{ + osm_congestion_control_t *p_cc = bind_context; + osm_madw_context_t *p_madw_context = &p_madw->context; + osm_opensm_t *p_osm = p_cc->osm; + uint64_t node_guid = p_madw_context->cc_context.node_guid; + uint64_t port_guid = p_madw_context->cc_context.port_guid; + uint8_t port = p_madw_context->cc_context.port; + osm_port_t *p_port; + int log_flag = 1; + + OSM_LOG_ENTER(p_cc->log); + + cl_plock_acquire(&p_osm->lock); + + p_port = osm_get_port_by_guid(p_cc->subn, port_guid); + if (!p_port) { + OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10B: " + "Port GUID 0x%" PRIx64 " not in table\n", + cl_ntoh64(port_guid)); + cl_plock_release(&p_osm->lock); + goto Exit; + } + + /* If timed out before, don't bothering logging again + * we assume no CC support + */ + if (p_madw->status == IB_TIMEOUT + && p_port->cc_timeout_count) + log_flag = 0; + + if (log_flag) + OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): " + "attr id = %u LID %u GUID 0x%016" PRIx64 " port %u " + "TID 0x%" PRIx64 "\n", + ib_get_err_str(p_madw->status), + p_madw->p_mad->attr_id, + cl_ntoh16(p_madw->mad_addr.dest_lid), + cl_ntoh64(node_guid), + port, + cl_ntoh64(p_madw->p_mad->trans_id)); + + if (p_madw->status == IB_TIMEOUT) { + p_port->cc_timeout_count++; + if (p_port->cc_timeout_count > OSM_CC_TIMEOUT_COUNT_THRESHOLD + && !p_port->cc_unavailable_flag) { + p_port->cc_unavailable_flag = TRUE; + p_port->cc_timeout_count = 0; + } + } else + p_cc->subn->subnet_initialization_error = TRUE; + + cl_plock_release(&p_osm->lock); + +Exit: + osm_mad_pool_put(p_cc->mad_pool, p_madw); + + decrement_outstanding_mads(p_cc); + + OSM_LOG_EXIT(p_cc->log); +} + +ib_api_status_t osm_congestion_control_bind(osm_congestion_control_t * p_cc, + ib_net64_t port_guid) +{ + osm_bind_info_t bind_info; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_cc->log); + + bind_info.port_guid = p_cc->port_guid = port_guid; + bind_info.mad_class = IB_MCLASS_CC; + bind_info.class_version = 2; + bind_info.is_responder = FALSE; + bind_info.is_report_processor = FALSE; + bind_info.is_trap_processor = FALSE; + bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; + bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; + bind_info.timeout = p_cc->subn->opt.transaction_timeout; + bind_info.retries = p_cc->subn->opt.transaction_retries; + + OSM_LOG(p_cc->log, OSM_LOG_VERBOSE, + "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); + + p_cc->bind_handle = osm_vendor_bind(p_cc->vendor, &bind_info, + p_cc->mad_pool, + cc_mad_recv_callback, + cc_mad_send_err_callback, p_cc); + + if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) { + status = IB_ERROR; + OSM_LOG(p_cc->log, OSM_LOG_ERROR, + "ERR C107: Vendor specific bind failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_cc->log); + return status; +} + +void osm_congestion_control_shutdown(osm_congestion_control_t * p_cc) +{ + OSM_LOG_ENTER(p_cc->log); + if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) { + OSM_LOG(p_cc->log, OSM_LOG_ERROR, + "ERR C108: No previous bind\n"); + goto Exit; + } + cl_disp_unregister(p_cc->cc_disp_h); +Exit: + OSM_LOG_EXIT(p_cc->log); +} + +void osm_congestion_control_destroy(osm_congestion_control_t * p_cc) +{ + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_cc->log); + + p_cc->thread_state = OSM_THREAD_STATE_EXIT; + + cl_event_signal(&p_cc->sig_mads_on_wire_continue); + cl_event_signal(&p_cc->cc_poller_wakeup); + + cl_thread_destroy(&p_cc->cc_poller); + + cl_spinlock_acquire(&p_cc->mad_queue_lock); + + while (!cl_is_qlist_empty(&p_cc->mad_queue)) { + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue); + osm_mad_pool_put(p_cc->mad_pool, p_madw); + } + + cl_spinlock_release(&p_cc->mad_queue_lock); + + cl_spinlock_destroy(&p_cc->mad_queue_lock); + + cl_event_destroy(&p_cc->cc_poller_wakeup); + cl_event_destroy(&p_cc->outstanding_mads_done_event); + cl_event_destroy(&p_cc->sig_mads_on_wire_continue); + + OSM_LOG_EXIT(p_cc->log); +} diff --git a/opensm/osm_console.c b/opensm/osm_console.c new file mode 100644 index 0000000..641bdcd --- /dev/null +++ b/opensm/osm_console.c @@ -0,0 +1,1941 @@ +/* + * Copyright (c) 2005-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * Copyright (c) 2010,2011 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK +#include +#endif +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_CONSOLE_C +#include +#include +#include +#include + +extern void osm_update_node_desc(IN osm_opensm_t *osm); + +struct command { + const char *name; + void (*help_function) (FILE * out, int detail); + void (*parse_function) (char **p_last, osm_opensm_t * p_osm, + FILE * out); +}; + +static struct { + int on; + int delay_s; + time_t previous; + void (*loop_function) (osm_opensm_t * p_osm, FILE * out); +} loop_command = { +.on = 0, .delay_s = 2, .loop_function = NULL}; + +static const struct command console_cmds[]; + +static char *next_token(char **p_last) +{ + return strtok_r(NULL, " \t\n\r", p_last); +} + +#ifdef ENABLE_OSM_PERF_MGR +static char *name_token(char **p_last) +{ + return strtok_r(NULL, "\t\n\r", p_last); +} +#endif + +static void help_command(FILE * out, int detail) +{ + int i; + + fprintf(out, "Supported commands and syntax:\n"); + fprintf(out, "help []\n"); + /* skip help command */ + for (i = 1; console_cmds[i].name; i++) + console_cmds[i].help_function(out, 0); +} + +static void help_quit(FILE * out, int detail) +{ + fprintf(out, "quit (not valid in local mode; use ctl-c)\n"); +} + +static void help_loglevel(FILE * out, int detail) +{ + fprintf(out, "loglevel []\n"); + if (detail) { + fprintf(out, " log-level is OR'ed from the following\n"); + fprintf(out, " OSM_LOG_NONE 0x%02X\n", + OSM_LOG_NONE); + fprintf(out, " OSM_LOG_ERROR 0x%02X\n", + OSM_LOG_ERROR); + fprintf(out, " OSM_LOG_INFO 0x%02X\n", + OSM_LOG_INFO); + fprintf(out, " OSM_LOG_VERBOSE 0x%02X\n", + OSM_LOG_VERBOSE); + fprintf(out, " OSM_LOG_DEBUG 0x%02X\n", + OSM_LOG_DEBUG); + fprintf(out, " OSM_LOG_FUNCS 0x%02X\n", + OSM_LOG_FUNCS); + fprintf(out, " OSM_LOG_FRAMES 0x%02X\n", + OSM_LOG_FRAMES); + fprintf(out, " OSM_LOG_ROUTING 0x%02X\n", + OSM_LOG_ROUTING); + fprintf(out, " OSM_LOG_SYS 0x%02X\n", + OSM_LOG_SYS); + fprintf(out, "\n"); + fprintf(out, " OSM_LOG_DEFAULT_LEVEL 0x%02X\n", + OSM_LOG_DEFAULT_LEVEL); + } +} + +static void help_permodlog(FILE * out, int detail) +{ + fprintf(out, "permodlog\n"); +} + +static void help_priority(FILE * out, int detail) +{ + fprintf(out, "priority []\n"); +} + +static void help_resweep(FILE * out, int detail) +{ + fprintf(out, "resweep [heavy|light]\n"); +} + +static void help_reroute(FILE * out, int detail) +{ + fprintf(out, "reroute\n"); + if (detail) { + fprintf(out, "reroute the fabric\n"); + } +} + +static void help_sweep(FILE * out, int detail) +{ + fprintf(out, "sweep [on|off]\n"); + if (detail) { + fprintf(out, "enable or disable sweeping\n"); + fprintf(out, " [on] sweep normally\n"); + fprintf(out, " [off] inhibit all sweeping\n"); + } +} + +static void help_status(FILE * out, int detail) +{ + fprintf(out, "status [loop]\n"); + if (detail) { + fprintf(out, " loop -- type \"q\" to quit\n"); + } +} + +static void help_logflush(FILE * out, int detail) +{ + fprintf(out, "logflush [on|off] -- toggle opensm.log file flushing\n"); +} + +static void help_querylid(FILE * out, int detail) +{ + fprintf(out, + "querylid lid -- print internal information about the lid specified\n"); +} + +static void help_portstatus(FILE * out, int detail) +{ + fprintf(out, "portstatus [ca|switch|router]\n"); + if (detail) { + fprintf(out, "summarize port status\n"); + fprintf(out, + " [ca|switch|router] -- limit the results to the node type specified\n"); + } + +} + +static void help_switchbalance(FILE * out, int detail) +{ + fprintf(out, "switchbalance [verbose] [guid]\n"); + if (detail) { + fprintf(out, "output switch balancing information\n"); + fprintf(out, + " [verbose] -- verbose output\n" + " [guid] -- limit results to specified guid\n"); + } +} + +static void help_lidbalance(FILE * out, int detail) +{ + fprintf(out, "lidbalance [switchguid]\n"); + if (detail) { + fprintf(out, "output lid balanced forwarding information\n"); + fprintf(out, + " [switchguid] -- limit results to specified switch guid\n"); + } +} + +static void help_dump_conf(FILE *out, int detail) +{ + fprintf(out, "dump_conf\n"); + if (detail) { + fprintf(out, "dump current opensm configuration\n"); + } +} + +static void help_update_desc(FILE *out, int detail) +{ + fprintf(out, "update_desc\n"); + if (detail) { + fprintf(out, "update node description for all nodes\n"); + } +} + +#ifdef ENABLE_OSM_PERF_MGR +static void help_perfmgr(FILE * out, int detail) +{ + fprintf(out, + "perfmgr(pm) [enable|disable\n" + " |clear_counters|dump_counters|print_counters(pc)|print_errors(pe)\n" + " |set_rm_nodes|clear_rm_nodes|clear_inactive\n" + " |set_query_cpi|clear_query_cpi\n" + " |dump_redir|clear_redir\n" + " |sweep|sweep_time[seconds]]\n"); + if (detail) { + fprintf(out, + "perfmgr -- print the performance manager state\n"); + fprintf(out, + " [enable|disable] -- change the perfmgr state\n"); + fprintf(out, + " [sweep] -- Initiate a sweep of the fabric\n"); + fprintf(out, + " [sweep_time] -- change the perfmgr sweep time (requires [seconds] option)\n"); + fprintf(out, + " [clear_counters] -- clear the counters stored\n"); + fprintf(out, + " [dump_counters [mach]] -- dump the counters (optionally in [mach]ine readable format)\n"); + fprintf(out, + " [print_counters [][:]] -- print the internal counters\n" + " Optionally limit output by name, guid, or port\n"); + fprintf(out, + " [pc [][:]] -- same as print_counters\n"); + fprintf(out, + " [print_errors []] -- print only ports with errors\n" + " Optionally limit output by name or guid\n"); + fprintf(out, + " [pe []] -- same as print_errors\n"); + fprintf(out, + " [dump_redir []] -- dump the redirection table\n"); + fprintf(out, + " [clear_redir []] -- clear the redirection table\n"); + fprintf(out, + " [[set|clear]_rm_nodes] -- enable/disable the removal of \"inactive\" nodes from the DB\n" + " Inactive nodes are those which no longer appear on the fabric\n"); + fprintf(out, + " [[set|clear]_query_cpi] -- enable/disable PerfMgrGet(ClassPortInfo)\n" + " ClassPortInfo indicates hardware support for extended attributes such as PortCountersExtended\n"); + fprintf(out, + " [clear_inactive] -- Delete inactive nodes from the DB\n"); + } +} +static void help_pm(FILE *out, int detail) +{ + if (detail) + help_perfmgr(out, detail); +} +#endif /* ENABLE_OSM_PERF_MGR */ + +/* more help routines go here */ + +static void help_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + int i, found = 0; + + p_cmd = next_token(p_last); + if (!p_cmd) + help_command(out, 0); + else { + for (i = 1; console_cmds[i].name; i++) { + if (!strcmp(p_cmd, console_cmds[i].name)) { + found = 1; + console_cmds[i].help_function(out, 1); + break; + } + } + if (!found) { + fprintf(out, "%s : Command not found\n\n", p_cmd); + help_command(out, 0); + } + } +} + +static void loglevel_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + int level; + + p_cmd = next_token(p_last); + if (!p_cmd) + fprintf(out, "Current log level is 0x%x\n", + osm_log_get_level(&p_osm->log)); + else { + /* Handle x, 0x, and decimal specification of log level */ + if (!strncmp(p_cmd, "x", 1)) { + p_cmd++; + level = strtoul(p_cmd, NULL, 16); + } else { + if (!strncmp(p_cmd, "0x", 2)) { + p_cmd += 2; + level = strtoul(p_cmd, NULL, 16); + } else + level = strtol(p_cmd, NULL, 10); + } + if ((level >= 0) && (level < 256)) { + fprintf(out, "Setting log level to 0x%x\n", level); + osm_log_set_level(&p_osm->log, level); + } else + fprintf(out, "Invalid log level 0x%x\n", level); + } +} + +static void permodlog_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + FILE *fp; + char buf[1024]; + + if (p_osm->subn.opt.per_module_logging_file != NULL) { + fp = fopen(p_osm->subn.opt.per_module_logging_file, "r"); + if (!fp) { + if (errno == ENOENT) + return; + fprintf(out, "fopen(%s) failed: %s\n", + p_osm->subn.opt.per_module_logging_file, + strerror(errno)); + return; + } + + fprintf(out, "Per module logging file: %s\n", + p_osm->subn.opt.per_module_logging_file); + while (fgets(buf, sizeof buf, fp) != NULL) + fprintf(out, "%s", buf); + fclose(fp); + fprintf(out, "\n"); + } +} + +static void priority_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + int priority; + + p_cmd = next_token(p_last); + if (!p_cmd) + fprintf(out, "Current sm-priority is %d\n", + p_osm->subn.opt.sm_priority); + else { + priority = strtol(p_cmd, NULL, 0); + if (0 > priority || 15 < priority) + fprintf(out, + "Invalid sm-priority %d; must be between 0 and 15\n", + priority); + else { + fprintf(out, "Setting sm-priority to %d\n", priority); + osm_set_sm_priority(&p_osm->sm, (uint8_t)priority); + } + } +} + +static const char *sm_state_str(int state) +{ + switch (state) { + case IB_SMINFO_STATE_DISCOVERING: + return "Discovering"; + case IB_SMINFO_STATE_STANDBY: + return "Standby "; + case IB_SMINFO_STATE_NOTACTIVE: + return "Not Active "; + case IB_SMINFO_STATE_MASTER: + return "Master "; + } + return "UNKNOWN "; +} + +static const char *sa_state_str(osm_sa_state_t state) +{ + switch (state) { + case OSM_SA_STATE_INIT: + return "Init"; + case OSM_SA_STATE_READY: + return "Ready"; + } + return "UNKNOWN"; +} + +static void dump_sms(osm_opensm_t * p_osm, FILE * out) +{ + osm_subn_t *p_subn = &p_osm->subn; + osm_remote_sm_t *p_rsm; + + fprintf(out, "\n Known SMs\n" + " ---------\n"); + fprintf(out, " Port GUID SM State Priority\n"); + fprintf(out, " --------- -------- --------\n"); + fprintf(out, " 0x%" PRIx64 " %s %d SELF\n", + cl_ntoh64(p_subn->sm_port_guid), + sm_state_str(p_subn->sm_state), + p_subn->opt.sm_priority); + + CL_PLOCK_ACQUIRE(p_osm->sm.p_lock); + p_rsm = (osm_remote_sm_t *) cl_qmap_head(&p_subn->sm_guid_tbl); + while (p_rsm != (osm_remote_sm_t *) cl_qmap_end(&p_subn->sm_guid_tbl)) { + fprintf(out, " 0x%" PRIx64 " %s %d\n", + cl_ntoh64(p_rsm->smi.guid), + sm_state_str(ib_sminfo_get_state(&p_rsm->smi)), + ib_sminfo_get_priority(&p_rsm->smi)); + p_rsm = (osm_remote_sm_t *) cl_qmap_next(&p_rsm->map_item); + } + CL_PLOCK_RELEASE(p_osm->sm.p_lock); +} + +static void print_status(osm_opensm_t * p_osm, FILE * out) +{ + cl_list_item_t *item; + + if (out) { + const char *re_str; + + cl_plock_acquire(&p_osm->lock); + fprintf(out, " OpenSM Version : %s\n", p_osm->osm_version); + fprintf(out, " SM State : %s\n", + sm_state_str(p_osm->subn.sm_state)); + fprintf(out, " SM Priority : %d\n", + p_osm->subn.opt.sm_priority); + fprintf(out, " SA State : %s\n", + sa_state_str(p_osm->sa.state)); + + re_str = p_osm->routing_engine_used ? + osm_routing_engine_type_str(p_osm->routing_engine_used->type) : + osm_routing_engine_type_str(OSM_ROUTING_ENGINE_TYPE_NONE); + fprintf(out, " Routing Engine : %s\n", re_str); + + fprintf(out, " Loaded event plugins :"); + if (cl_qlist_head(&p_osm->plugin_list) == + cl_qlist_end(&p_osm->plugin_list)) { + fprintf(out, " "); + } + for (item = cl_qlist_head(&p_osm->plugin_list); + item != cl_qlist_end(&p_osm->plugin_list); + item = cl_qlist_next(item)) + fprintf(out, " %s", + ((osm_epi_plugin_t *)item)->plugin_name); + fprintf(out, "\n"); + +#ifdef ENABLE_OSM_PERF_MGR + fprintf(out, "\n PerfMgr state/sweep state : %s/%s\n", + osm_perfmgr_get_state_str(&p_osm->perfmgr), + osm_perfmgr_get_sweep_state_str(&p_osm->perfmgr)); +#endif + fprintf(out, "\n MAD stats\n" + " ---------\n" + " QP0 MADs outstanding : %u\n" + " QP0 MADs outstanding (on wire) : %u\n" + " QP0 MADs rcvd : %u\n" + " QP0 MADs sent : %u\n" + " QP0 unicasts sent : %u\n" + " QP0 unknown MADs rcvd : %u\n" + " SA MADs outstanding : %u\n" + " SA MADs rcvd : %u\n" + " SA MADs sent : %u\n" + " SA unknown MADs rcvd : %u\n" + " SA MADs ignored : %u\n", + (uint32_t)p_osm->stats.qp0_mads_outstanding, + (uint32_t)p_osm->stats.qp0_mads_outstanding_on_wire, + (uint32_t)p_osm->stats.qp0_mads_rcvd, + (uint32_t)p_osm->stats.qp0_mads_sent, + (uint32_t)p_osm->stats.qp0_unicasts_sent, + (uint32_t)p_osm->stats.qp0_mads_rcvd_unknown, + (uint32_t)p_osm->stats.sa_mads_outstanding, + (uint32_t)p_osm->stats.sa_mads_rcvd, + (uint32_t)p_osm->stats.sa_mads_sent, + (uint32_t)p_osm->stats.sa_mads_rcvd_unknown, + (uint32_t)p_osm->stats.sa_mads_ignored); + fprintf(out, "\n Subnet flags\n" + " ------------\n" + " Sweeping enabled : %d\n" + " Sweep interval (seconds) : %u\n" + " Ignore existing lfts : %d\n" + " Subnet Init errors : %d\n" + " In sweep hop 0 : %d\n" + " First time master sweep : %d\n" + " Coming out of standby : %d\n", + p_osm->subn.sweeping_enabled, + p_osm->subn.opt.sweep_interval, + p_osm->subn.ignore_existing_lfts, + p_osm->subn.subnet_initialization_error, + p_osm->subn.in_sweep_hop_0, + p_osm->subn.first_time_master_sweep, + p_osm->subn.coming_out_of_standby); + dump_sms(p_osm, out); + fprintf(out, "\n"); + cl_plock_release(&p_osm->lock); + } +} + +static int loop_command_check_time(void) +{ + time_t cur = time(NULL); + if ((loop_command.previous + loop_command.delay_s) < cur) { + loop_command.previous = cur; + return 1; + } + return 0; +} + +static void status_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + + p_cmd = next_token(p_last); + if (p_cmd) { + if (strcmp(p_cmd, "loop") == 0) { + fprintf(out, "Looping on status command...\n"); + fflush(out); + loop_command.on = 1; + loop_command.previous = time(NULL); + loop_command.loop_function = print_status; + } else { + help_status(out, 1); + return; + } + } + print_status(p_osm, out); +} + +static void resweep_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + + p_cmd = next_token(p_last); + if (!p_cmd || + (strcmp(p_cmd, "heavy") != 0 && strcmp(p_cmd, "light") != 0)) { + fprintf(out, "Invalid resweep command\n"); + help_resweep(out, 1); + } else { + if (strcmp(p_cmd, "heavy") == 0) + p_osm->subn.force_heavy_sweep = TRUE; + osm_opensm_sweep(p_osm); + } +} + +static void reroute_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + p_osm->subn.force_reroute = TRUE; + osm_opensm_sweep(p_osm); +} + +static void sweep_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + + p_cmd = next_token(p_last); + if (!p_cmd || + (strcmp(p_cmd, "on") != 0 && strcmp(p_cmd, "off") != 0)) { + fprintf(out, "Invalid sweep command\n"); + help_sweep(out, 1); + } else { + if (strcmp(p_cmd, "on") == 0) + p_osm->subn.sweeping_enabled = TRUE; + else + p_osm->subn.sweeping_enabled = FALSE; + } +} + +static void logflush_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + + p_cmd = next_token(p_last); + if (!p_cmd || + (strcmp(p_cmd, "on") != 0 && strcmp(p_cmd, "off") != 0)) { + fprintf(out, "Invalid logflush command\n"); + help_sweep(out, 1); + } else { + if (strcmp(p_cmd, "on") == 0) { + p_osm->log.flush = TRUE; + fflush(p_osm->log.out_port); + } else + p_osm->log.flush = FALSE; + } +} + +static void querylid_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + unsigned int p = 0; + uint16_t lid = 0; + osm_port_t *p_port = NULL; + char *p_cmd = next_token(p_last); + + if (!p_cmd) { + fprintf(out, "no LID specified\n"); + help_querylid(out, 1); + return; + } + + lid = (uint16_t) strtoul(p_cmd, NULL, 0); + cl_plock_acquire(&p_osm->lock); + p_port = osm_get_port_by_lid_ho(&p_osm->subn, lid); + if (!p_port) + goto invalid_lid; + + fprintf(out, "Query results for LID %u\n", lid); + fprintf(out, + " GUID : 0x%016" PRIx64 "\n" + " Node Desc : %s\n" + " Node Type : %s\n" + " Num Ports : %d\n", + cl_ntoh64(p_port->guid), + p_port->p_node->print_desc, + ib_get_node_type_str(osm_node_get_type(p_port->p_node)), + p_port->p_node->node_info.num_ports); + + if (p_port->p_node->sw) + p = 0; + else + p = 1; + for ( /* see above */ ; p < p_port->p_node->physp_tbl_size; p++) { + fprintf(out, + " Port %u health : %s\n", + p, + p_port->p_node->physp_table[p]. + healthy ? "OK" : "ERROR"); + } + + cl_plock_release(&p_osm->lock); + return; + +invalid_lid: + cl_plock_release(&p_osm->lock); + fprintf(out, "Invalid lid %d\n", lid); + return; +} + +/** + * Data structures for the portstatus command + */ +typedef struct _port_report { + struct _port_report *next; + uint64_t node_guid; + uint8_t port_num; + char print_desc[IB_NODE_DESCRIPTION_SIZE + 1]; +} port_report_t; + +static void +__tag_port_report(port_report_t ** head, uint64_t node_guid, + uint8_t port_num, char *print_desc) +{ + port_report_t *rep = malloc(sizeof(*rep)); + if (!rep) + return; + + rep->node_guid = node_guid; + rep->port_num = port_num; + memcpy(rep->print_desc, print_desc, IB_NODE_DESCRIPTION_SIZE + 1); + rep->next = NULL; + if (*head) { + rep->next = *head; + *head = rep; + } else + *head = rep; +} + +static void __print_port_report(FILE * out, port_report_t * head) +{ + port_report_t *item = head; + while (item != NULL) { + fprintf(out, " 0x%016" PRIx64 " %d (%s)\n", + item->node_guid, item->port_num, item->print_desc); + port_report_t *next = item->next; + free(item); + item = next; + } +} + +typedef struct { + uint8_t node_type_lim; /* limit the results; 0 == ALL */ + uint64_t total_nodes; + uint64_t total_ports; + uint64_t ports_down; + uint64_t ports_active; + uint64_t ports_disabled; + port_report_t *disabled_ports; + uint64_t ports_1X; + uint64_t ports_4X; + uint64_t ports_8X; + uint64_t ports_12X; + uint64_t ports_2X; + uint64_t ports_unknown_width; + port_report_t *unknown_width_ports; + uint64_t ports_unenabled_width; + port_report_t *unenabled_width_ports; + uint64_t ports_reduced_width; + port_report_t *reduced_width_ports; + uint64_t ports_sdr; + uint64_t ports_ddr; + uint64_t ports_qdr; + uint64_t ports_fdr10; + uint64_t ports_fdr; + uint64_t ports_edr; + uint64_t ports_hdr; + uint64_t ports_unknown_speed; + port_report_t *unknown_speed_ports; + uint64_t ports_unenabled_speed; + port_report_t *unenabled_speed_ports; + uint64_t ports_reduced_speed; + port_report_t *reduced_speed_ports; +} fabric_stats_t; + +/** + * iterator function to get portstatus on each node + */ +static void __get_stats(cl_map_item_t * const p_map_item, void *context) +{ + fabric_stats_t *fs = (fabric_stats_t *) context; + osm_node_t *node = (osm_node_t *) p_map_item; + osm_physp_t *physp0; + ib_port_info_t *pi0; + uint8_t num_ports = osm_node_get_num_physp(node); + uint8_t port = 0; + + /* Skip nodes we are not interested in */ + if (fs->node_type_lim != 0 + && fs->node_type_lim != node->node_info.node_type) + return; + + fs->total_nodes++; + + if (osm_node_get_type(node) == IB_NODE_TYPE_SWITCH) { + physp0 = osm_node_get_physp_ptr(node, 0); + pi0 = &physp0->port_info; + } else + pi0 = NULL; + + for (port = 1; port < num_ports; port++) { + osm_physp_t *phys = osm_node_get_physp_ptr(node, port); + ib_port_info_t *pi = NULL; + ib_mlnx_ext_port_info_t *epi = NULL; + uint8_t active_speed = 0; + uint8_t enabled_speed = 0; + uint8_t active_width = 0; + uint8_t enabled_width = 0; + uint8_t port_state = 0; + uint8_t port_phys_state = 0; + + if (!phys) + continue; + + pi = &phys->port_info; + epi = &phys->ext_port_info; + if (!pi0) + pi0 = pi; + active_speed = ib_port_info_get_link_speed_active(pi); + enabled_speed = ib_port_info_get_link_speed_enabled(pi); + active_width = pi->link_width_active; + enabled_width = pi->link_width_enabled; + port_state = ib_port_info_get_port_state(pi); + port_phys_state = ib_port_info_get_port_phys_state(pi); + + if (port_state == IB_LINK_DOWN) + fs->ports_down++; + else if (port_state == IB_LINK_ACTIVE) + fs->ports_active++; + if (port_phys_state == IB_PORT_PHYS_STATE_DISABLED) { + __tag_port_report(&(fs->disabled_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_disabled++; + } + + fs->total_ports++; + + if (port_state == IB_LINK_DOWN) + continue; + + if (!(active_width & enabled_width)) { + __tag_port_report(&(fs->unenabled_width_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_unenabled_width++; + } + else if ((enabled_width ^ active_width) > active_width) { + __tag_port_report(&(fs->reduced_width_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_reduced_width++; + } + + /* unenabled speed usually due to problems with force_link_speed */ + if (!(active_speed & enabled_speed)) { + __tag_port_report(&(fs->unenabled_speed_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_unenabled_speed++; + } + else if ((enabled_speed ^ active_speed) > active_speed) { + __tag_port_report(&(fs->reduced_speed_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_reduced_speed++; + } + + switch (active_speed) { + case IB_LINK_SPEED_ACTIVE_2_5: + fs->ports_sdr++; + break; + case IB_LINK_SPEED_ACTIVE_5: + fs->ports_ddr++; + break; + case IB_LINK_SPEED_ACTIVE_10: + if (!(pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) || + !ib_port_info_get_link_speed_ext_active(pi)) { + if (epi->link_speed_active & FDR10) + fs->ports_fdr10++; + else { + fs->ports_qdr++; + /* check for speed reduced from FDR10 */ + if (epi->link_speed_enabled & FDR10) { + __tag_port_report(&(fs->reduced_speed_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_reduced_speed++; + } + } + } + break; + case IB_LINK_SPEED_ACTIVE_EXTENDED: + break; + default: + __tag_port_report(&(fs->unknown_speed_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_unknown_speed++; + break; + } + if (pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS && + ib_port_info_get_link_speed_ext_sup(pi) && + (enabled_speed = ib_port_info_get_link_speed_ext_enabled(pi)) != IB_LINK_SPEED_EXT_DISABLE && + active_speed == IB_LINK_SPEED_ACTIVE_10) { + active_speed = ib_port_info_get_link_speed_ext_active(pi); + if (!(active_speed & enabled_speed)) { + __tag_port_report(&(fs->unenabled_speed_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_unenabled_speed++; + } + else if ((enabled_speed ^ active_speed) > active_speed) { + __tag_port_report(&(fs->reduced_speed_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_reduced_speed++; + } + switch (active_speed) { + case IB_LINK_SPEED_EXT_ACTIVE_14: + fs->ports_fdr++; + break; + case IB_LINK_SPEED_EXT_ACTIVE_25: + fs->ports_edr++; + break; + case IB_LINK_SPEED_EXT_ACTIVE_50: + fs->ports_hdr++; + break; + case IB_LINK_SPEED_EXT_ACTIVE_NONE: + break; + default: + __tag_port_report(&(fs->unknown_speed_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_unknown_speed++; + break; + } + } + switch (active_width) { + case IB_LINK_WIDTH_ACTIVE_1X: + fs->ports_1X++; + break; + case IB_LINK_WIDTH_ACTIVE_4X: + fs->ports_4X++; + break; + case IB_LINK_WIDTH_ACTIVE_8X: + fs->ports_8X++; + break; + case IB_LINK_WIDTH_ACTIVE_12X: + fs->ports_12X++; + break; + case IB_LINK_WIDTH_ACTIVE_2X: + fs->ports_2X++; + break; + default: + __tag_port_report(&(fs->unknown_width_ports), + cl_ntoh64(node->node_info.node_guid), + port, node->print_desc); + fs->ports_unknown_width++; + break; + } + } +} + +static void portstatus_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + fabric_stats_t fs; + struct timeval before, after; + char *p_cmd; + + memset(&fs, 0, sizeof(fs)); + + p_cmd = next_token(p_last); + if (p_cmd) { + if (strcmp(p_cmd, "ca") == 0) { + fs.node_type_lim = IB_NODE_TYPE_CA; + } else if (strcmp(p_cmd, "switch") == 0) { + fs.node_type_lim = IB_NODE_TYPE_SWITCH; + } else if (strcmp(p_cmd, "router") == 0) { + fs.node_type_lim = IB_NODE_TYPE_ROUTER; + } else { + fprintf(out, "Node type not understood\n"); + help_portstatus(out, 1); + return; + } + } + + gettimeofday(&before, NULL); + + /* for each node in the system gather the stats */ + cl_plock_acquire(&p_osm->lock); + cl_qmap_apply_func(&(p_osm->subn.node_guid_tbl), __get_stats, + (void *)&fs); + cl_plock_release(&p_osm->lock); + + gettimeofday(&after, NULL); + + /* report the stats */ + fprintf(out, "\"%s\" port status:\n", + fs.node_type_lim ? ib_get_node_type_str(fs. + node_type_lim) : "ALL"); + fprintf(out, + " %" PRIu64 " port(s) scanned on %" PRIu64 + " nodes in %lu us\n", fs.total_ports, fs.total_nodes, + after.tv_usec - before.tv_usec); + + if (fs.ports_down) + fprintf(out, " %" PRIu64 " down\n", fs.ports_down); + if (fs.ports_active) + fprintf(out, " %" PRIu64 " active\n", fs.ports_active); + if (fs.ports_1X) + fprintf(out, " %" PRIu64 " at 1X\n", fs.ports_1X); + if (fs.ports_4X) + fprintf(out, " %" PRIu64 " at 4X\n", fs.ports_4X); + if (fs.ports_8X) + fprintf(out, " %" PRIu64 " at 8X\n", fs.ports_8X); + if (fs.ports_12X) + fprintf(out, " %" PRIu64 " at 12X\n", fs.ports_12X); + + if (fs.ports_sdr) + fprintf(out, " %" PRIu64 " at 2.5 Gbps\n", fs.ports_sdr); + if (fs.ports_ddr) + fprintf(out, " %" PRIu64 " at 5.0 Gbps\n", fs.ports_ddr); + if (fs.ports_qdr) + fprintf(out, " %" PRIu64 " at 10.0 Gbps\n", fs.ports_qdr); + if (fs.ports_fdr10) + fprintf(out, " %" PRIu64 " at 10.0 Gbps (FDR10)\n", fs.ports_fdr10); + if (fs.ports_fdr) + fprintf(out, " %" PRIu64 " at 14.0625 Gbps\n", fs.ports_fdr); + if (fs.ports_edr) + fprintf(out, " %" PRIu64 " at 25.78125 Gbps\n", fs.ports_edr); + if (fs.ports_hdr) + fprintf(out, " %" PRIu64 " at 53.125 Gbps\n", fs.ports_hdr); + + if (fs.ports_disabled + fs.ports_reduced_speed + fs.ports_reduced_width + + fs.ports_unenabled_width + fs.ports_unenabled_speed + + fs.ports_unknown_width + fs.ports_unknown_speed > 0) { + fprintf(out, "\nPossible issues:\n"); + } + if (fs.ports_disabled) { + fprintf(out, " %" PRIu64 " disabled\n", fs.ports_disabled); + __print_port_report(out, fs.disabled_ports); + } + if (fs.ports_unenabled_speed) { + fprintf(out, " %" PRIu64 " with unenabled speed\n", + fs.ports_unenabled_speed); + __print_port_report(out, fs.unenabled_speed_ports); + } + if (fs.ports_reduced_speed) { + fprintf(out, " %" PRIu64 " with reduced speed\n", + fs.ports_reduced_speed); + __print_port_report(out, fs.reduced_speed_ports); + } + if (fs.ports_unknown_speed) { + fprintf(out, " %" PRIu64 " with unknown speed\n", + fs.ports_unknown_speed); + __print_port_report(out, fs.unknown_speed_ports); + } + if (fs.ports_unenabled_width) { + fprintf(out, " %" PRIu64 " with unenabled width\n", + fs.ports_unenabled_width); + __print_port_report(out, fs.unenabled_width_ports); + } + if (fs.ports_reduced_width) { + fprintf(out, " %" PRIu64 " with reduced width\n", + fs.ports_reduced_width); + __print_port_report(out, fs.reduced_width_ports); + } + if (fs.ports_unknown_width) { + fprintf(out, " %" PRIu64 " with unknown width\n", + fs.ports_unknown_width); + __print_port_report(out, fs.unknown_width_ports); + } + fprintf(out, "\n"); +} + +static void switchbalance_check(osm_opensm_t * p_osm, + osm_switch_t * p_sw, FILE * out, int verbose) +{ + uint8_t port_num; + uint8_t num_ports; + const cl_qmap_t *p_port_tbl; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_physp_t *p_rem_physp; + osm_node_t *p_rem_node; + uint32_t count[255]; /* max ports is a uint8_t */ + uint8_t output_ports[255]; + uint8_t output_ports_count = 0; + uint32_t min_count = 0xFFFFFFFF; + uint32_t max_count = 0; + unsigned int i; + + memset(count, '\0', sizeof(uint32_t) * 255); + + /* Count port usage */ + p_port_tbl = &p_osm->subn.port_guid_tbl; + for (p_port = (osm_port_t *) cl_qmap_head(p_port_tbl); + p_port != (osm_port_t *) cl_qmap_end(p_port_tbl); + p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item)) { + uint16_t min_lid_ho; + uint16_t max_lid_ho; + uint16_t lid_ho; + + /* Don't count switches in port usage */ + if (osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_SWITCH) + continue; + + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + + if (min_lid_ho == 0 || max_lid_ho == 0) + continue; + + for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++) { + port_num = osm_switch_get_port_by_lid(p_sw, lid_ho, + OSM_NEW_LFT); + if (port_num == OSM_NO_PATH) + continue; + + count[port_num]++; + } + } + + num_ports = p_sw->num_ports; + for (port_num = 1; port_num < num_ports; port_num++) { + p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); + + /* if port is down/unhealthy, don't consider it in + * min/max calculations + */ + if (!p_physp || !osm_physp_is_healthy(p_physp) + || !osm_physp_get_remote(p_physp)) + continue; + + p_rem_physp = osm_physp_get_remote(p_physp); + p_rem_node = osm_physp_get_node_ptr(p_rem_physp); + + /* If we are directly connected to a CA/router, its not really + * up for balancing consideration. + */ + if (osm_node_get_type(p_rem_node) != IB_NODE_TYPE_SWITCH) + continue; + + output_ports[output_ports_count] = port_num; + output_ports_count++; + + if (count[port_num] < min_count) + min_count = count[port_num]; + if (count[port_num] > max_count) + max_count = count[port_num]; + } + + if (verbose || ((max_count - min_count) > 1)) { + if ((max_count - min_count) > 1) + fprintf(out, + "Unbalanced Switch: 0x%016" PRIx64 " (%s)\n", + cl_ntoh64(p_sw->p_node->node_info.node_guid), + p_sw->p_node->print_desc); + else + fprintf(out, + "Switch: 0x%016" PRIx64 " (%s)\n", + cl_ntoh64(p_sw->p_node->node_info.node_guid), + p_sw->p_node->print_desc); + + for (i = 0; i < output_ports_count; i++) { + fprintf(out, + "Port %d: %d\n", + output_ports[i], count[output_ports[i]]); + } + } +} + +static void switchbalance_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + uint64_t guid = 0; + osm_switch_t *p_sw; + int verbose = 0; + + p_cmd = next_token(p_last); + if (p_cmd) { + char *p_end; + + if (strcmp(p_cmd, "verbose") == 0) { + verbose++; + p_cmd = next_token(p_last); + } + + if (p_cmd) { + guid = strtoull(p_cmd, &p_end, 0); + if (!guid || *p_end != '\0') { + fprintf(out, "Invalid guid specified\n"); + help_switchbalance(out, 1); + return; + } + } + } + + cl_plock_acquire(&p_osm->lock); + if (guid) { + p_sw = osm_get_switch_by_guid(&p_osm->subn, cl_hton64(guid)); + if (!p_sw) { + fprintf(out, "guid not found\n"); + goto lock_exit; + } + + switchbalance_check(p_osm, p_sw, out, verbose); + } else { + cl_qmap_t *p_sw_guid_tbl = &p_osm->subn.sw_guid_tbl; + for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_guid_tbl); + p_sw != (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl); + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) + switchbalance_check(p_osm, p_sw, out, verbose); + } +lock_exit: + cl_plock_release(&p_osm->lock); + return; +} + +static void lidbalance_check(osm_opensm_t * p_osm, + osm_switch_t * p_sw, FILE * out) +{ + uint8_t port_num; + const cl_qmap_t *p_port_tbl; + osm_port_t *p_port; + + p_port_tbl = &p_osm->subn.port_guid_tbl; + for (p_port = (osm_port_t *) cl_qmap_head(p_port_tbl); + p_port != (osm_port_t *) cl_qmap_end(p_port_tbl); + p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item)) { + uint32_t port_count[255]; /* max ports is a uint8_t */ + osm_node_t *rem_node[255]; + uint32_t rem_node_count; + uint32_t rem_count[255]; + osm_physp_t *p_physp; + osm_physp_t *p_rem_physp; + osm_node_t *p_rem_node; + uint32_t port_min_count = 0xFFFFFFFF; + uint32_t port_max_count = 0; + uint32_t rem_min_count = 0xFFFFFFFF; + uint32_t rem_max_count = 0; + uint16_t min_lid_ho; + uint16_t max_lid_ho; + uint16_t lid_ho; + uint8_t num_ports; + unsigned int i; + + /* we only care about non-switches */ + if (osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_SWITCH) + continue; + + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + + if (min_lid_ho == 0 || max_lid_ho == 0) + continue; + + memset(port_count, '\0', sizeof(uint32_t) * 255); + memset(rem_node, '\0', sizeof(osm_node_t *) * 255); + rem_node_count = 0; + memset(rem_count, '\0', sizeof(uint32_t) * 255); + + for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++) { + boolean_t rem_node_found = FALSE; + unsigned int indx = 0; + + port_num = osm_switch_get_port_by_lid(p_sw, lid_ho, + OSM_NEW_LFT); + if (port_num == OSM_NO_PATH) + continue; + + p_physp = + osm_node_get_physp_ptr(p_sw->p_node, port_num); + + /* if port is down/unhealthy, can't calculate */ + if (!p_physp || !osm_physp_is_healthy(p_physp) + || !osm_physp_get_remote(p_physp)) + continue; + + p_rem_physp = osm_physp_get_remote(p_physp); + p_rem_node = osm_physp_get_node_ptr(p_rem_physp); + + /* determine if we've seen this remote node before. + * If not, store it. If yes, update the counter + */ + for (i = 0; i < rem_node_count; i++) { + if (rem_node[i] == p_rem_node) { + rem_node_found = TRUE; + indx = i; + break; + } + } + + if (!rem_node_found) { + rem_node[rem_node_count] = p_rem_node; + rem_count[rem_node_count]++; + indx = rem_node_count; + rem_node_count++; + } else + rem_count[indx]++; + + port_count[port_num]++; + } + + if (!rem_node_count) + continue; + + for (i = 0; i < rem_node_count; i++) { + if (rem_count[i] < rem_min_count) + rem_min_count = rem_count[i]; + if (rem_count[i] > rem_max_count) + rem_max_count = rem_count[i]; + } + + num_ports = p_sw->num_ports; + for (i = 0; i < num_ports; i++) { + if (!port_count[i]) + continue; + if (port_count[i] < port_min_count) + port_min_count = port_count[i]; + if (port_count[i] > port_max_count) + port_max_count = port_count[i]; + } + + /* Output if this CA/router is being forwarded an unbalanced number of + * times to a destination. + */ + if ((rem_max_count - rem_min_count) > 1) { + fprintf(out, + "Unbalanced Remote Forwarding: Switch 0x%016" + PRIx64 " (%s): ", + cl_ntoh64(p_sw->p_node->node_info.node_guid), + p_sw->p_node->print_desc); + if (osm_node_get_type(p_port->p_node) == + IB_NODE_TYPE_CA) + fprintf(out, "CA"); + else if (osm_node_get_type(p_port->p_node) == + IB_NODE_TYPE_ROUTER) + fprintf(out, "Router"); + fprintf(out, " 0x%016" PRIx64 " (%s): ", + cl_ntoh64(p_port->p_node->node_info.node_guid), + p_port->p_node->print_desc); + for (i = 0; i < rem_node_count; i++) { + fprintf(out, + "Dest 0x%016" PRIx64 "(%s) - %u ", + cl_ntoh64(rem_node[i]->node_info. + node_guid), + rem_node[i]->print_desc, rem_count[i]); + } + fprintf(out, "\n"); + } + + /* Output if this CA/router is being forwarded through a port + * an unbalanced number of times. + */ + if ((port_max_count - port_min_count) > 1) { + fprintf(out, + "Unbalanced Port Forwarding: Switch 0x%016" + PRIx64 " (%s): ", + cl_ntoh64(p_sw->p_node->node_info.node_guid), + p_sw->p_node->print_desc); + if (osm_node_get_type(p_port->p_node) == + IB_NODE_TYPE_CA) + fprintf(out, "CA"); + else if (osm_node_get_type(p_port->p_node) == + IB_NODE_TYPE_ROUTER) + fprintf(out, "Router"); + fprintf(out, " 0x%016" PRIx64 " (%s): ", + cl_ntoh64(p_port->p_node->node_info.node_guid), + p_port->p_node->print_desc); + for (i = 0; i < num_ports; i++) { + if (!port_count[i]) + continue; + fprintf(out, "Port %u - %u: ", i, + port_count[i]); + } + fprintf(out, "\n"); + } + } +} + +static void lidbalance_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + uint64_t guid = 0; + osm_switch_t *p_sw; + + p_cmd = next_token(p_last); + if (p_cmd) { + char *p_end; + + guid = strtoull(p_cmd, &p_end, 0); + if (!guid || *p_end != '\0') { + fprintf(out, "Invalid switchguid specified\n"); + help_lidbalance(out, 1); + return; + } + } + + cl_plock_acquire(&p_osm->lock); + if (guid) { + p_sw = osm_get_switch_by_guid(&p_osm->subn, cl_hton64(guid)); + if (!p_sw) { + fprintf(out, "switchguid not found\n"); + goto lock_exit; + } + lidbalance_check(p_osm, p_sw, out); + } else { + cl_qmap_t *p_sw_guid_tbl = &p_osm->subn.sw_guid_tbl; + for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_guid_tbl); + p_sw != (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl); + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) + lidbalance_check(p_osm, p_sw, out); + } + +lock_exit: + cl_plock_release(&p_osm->lock); + return; +} + +static void dump_conf_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + osm_subn_output_conf(out, &p_osm->subn.opt); +} + +static void update_desc_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + osm_update_node_desc(p_osm); +} + +#ifdef ENABLE_OSM_PERF_MGR +static monitored_node_t *find_node_by_name(osm_opensm_t * p_osm, + char *nodename) +{ + cl_map_item_t *item; + monitored_node_t *node; + + item = cl_qmap_head(&p_osm->perfmgr.monitored_map); + while (item != cl_qmap_end(&p_osm->perfmgr.monitored_map)) { + node = (monitored_node_t *)item; + if (strcmp(node->name, nodename) == 0) + return node; + item = cl_qmap_next(item); + } + + return NULL; +} + +static monitored_node_t *find_node_by_guid(osm_opensm_t * p_osm, + uint64_t guid) +{ + cl_map_item_t *node; + + node = cl_qmap_get(&p_osm->perfmgr.monitored_map, guid); + if (node != cl_qmap_end(&p_osm->perfmgr.monitored_map)) + return (monitored_node_t *)node; + + return NULL; +} + +static void dump_redir_entry(monitored_node_t *p_mon_node, FILE * out) +{ + int port, redir; + + /* only display monitored nodes with redirection info */ + redir = 0; + for (port = (p_mon_node->esp0) ? 0 : 1; + port < p_mon_node->num_ports; port++) { + if (p_mon_node->port[port].redirection) { + if (!redir) { + fprintf(out, " Node GUID ESP0 Name\n"); + fprintf(out, " --------- ---- ----\n"); + fprintf(out, " 0x%" PRIx64 " %d %s\n", + p_mon_node->guid, p_mon_node->esp0, + p_mon_node->name); + fprintf(out, "\n Port Valid LIDs PKey QP PKey Index\n"); + fprintf(out, " ---- ----- ---- ---- -- ----------\n"); + redir = 1; + } + fprintf(out, " %d %d %u->%u 0x%x 0x%x %d\n", + port, p_mon_node->port[port].valid, + cl_ntoh16(p_mon_node->port[port].orig_lid), + cl_ntoh16(p_mon_node->port[port].lid), + cl_ntoh16(p_mon_node->port[port].pkey), + cl_ntoh32(p_mon_node->port[port].qp), + p_mon_node->port[port].pkey_ix); + } + } + if (redir) + fprintf(out, "\n"); +} + +static void dump_redir(osm_opensm_t * p_osm, char *nodename, FILE * out) +{ + monitored_node_t *p_mon_node; + uint64_t guid; + + if (!p_osm->subn.opt.perfmgr_redir) + fprintf(out, "Perfmgr redirection not enabled\n"); + + fprintf(out, "\nRedirection Table\n"); + fprintf(out, "-----------------\n"); + cl_plock_acquire(&p_osm->lock); + if (nodename) { + guid = strtoull(nodename, NULL, 0); + if (guid == 0 && errno) + p_mon_node = find_node_by_name(p_osm, nodename); + else + p_mon_node = find_node_by_guid(p_osm, guid); + if (p_mon_node) + dump_redir_entry(p_mon_node, out); + else { + if (guid == 0 && errno) + fprintf(out, "Node %s not found...\n", nodename); + else + fprintf(out, "Node 0x%" PRIx64 " not found...\n", guid); + } + } else { + p_mon_node = (monitored_node_t *) cl_qmap_head(&p_osm->perfmgr.monitored_map); + while (p_mon_node != (monitored_node_t *) cl_qmap_end(&p_osm->perfmgr.monitored_map)) { + dump_redir_entry(p_mon_node, out); + p_mon_node = (monitored_node_t *) cl_qmap_next((const cl_map_item_t *)p_mon_node); + } + } + cl_plock_release(&p_osm->lock); +} + +static void clear_redir_entry(monitored_node_t *p_mon_node) +{ + int port; + ib_net16_t orig_lid; + + for (port = (p_mon_node->esp0) ? 0 : 1; + port < p_mon_node->num_ports; port++) { + if (p_mon_node->port[port].redirection) { + orig_lid = p_mon_node->port[port].orig_lid; + memset(&p_mon_node->port[port], 0, + sizeof(monitored_port_t)); + p_mon_node->port[port].valid = TRUE; + p_mon_node->port[port].orig_lid = orig_lid; + } + } +} + +static void clear_redir(osm_opensm_t * p_osm, char *nodename, FILE * out) +{ + monitored_node_t *p_mon_node; + uint64_t guid; + + if (!p_osm->subn.opt.perfmgr_redir) + fprintf(out, "Perfmgr redirection not enabled\n"); + + cl_plock_acquire(&p_osm->lock); + if (nodename) { + guid = strtoull(nodename, NULL, 0); + if (guid == 0 && errno) + p_mon_node = find_node_by_name(p_osm, nodename); + else + p_mon_node = find_node_by_guid(p_osm, guid); + if (p_mon_node) + clear_redir_entry(p_mon_node); + else { + if (guid == 0 && errno) + fprintf(out, "Node %s not found...\n", nodename); + else + fprintf(out, "Node 0x%" PRIx64 " not found...\n", guid); + } + } else { + p_mon_node = (monitored_node_t *) cl_qmap_head(&p_osm->perfmgr.monitored_map); + while (p_mon_node != (monitored_node_t *) cl_qmap_end(&p_osm->perfmgr.monitored_map)) { + clear_redir_entry(p_mon_node); + p_mon_node = (monitored_node_t *) cl_qmap_next((const cl_map_item_t *)p_mon_node); + } + } + cl_plock_release(&p_osm->lock); +} + +static void perfmgr_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + char *p_cmd; + + p_cmd = next_token(p_last); + if (p_cmd) { + if (strcmp(p_cmd, "enable") == 0) { + osm_perfmgr_set_state(&p_osm->perfmgr, + PERFMGR_STATE_ENABLED); + } else if (strcmp(p_cmd, "disable") == 0) { + osm_perfmgr_set_state(&p_osm->perfmgr, + PERFMGR_STATE_DISABLE); + } else if (strcmp(p_cmd, "clear_counters") == 0) { + osm_perfmgr_clear_counters(&p_osm->perfmgr); + } else if (strcmp(p_cmd, "set_rm_nodes") == 0) { + osm_perfmgr_set_rm_nodes(&p_osm->perfmgr, 1); + } else if (strcmp(p_cmd, "clear_rm_nodes") == 0) { + osm_perfmgr_set_rm_nodes(&p_osm->perfmgr, 0); + } else if (strcmp(p_cmd, "set_query_cpi") == 0) { + osm_perfmgr_set_query_cpi(&p_osm->perfmgr, 1); + } else if (strcmp(p_cmd, "clear_query_cpi") == 0) { + osm_perfmgr_set_query_cpi(&p_osm->perfmgr, 0); + } else if (strcmp(p_cmd, "dump_counters") == 0) { + p_cmd = next_token(p_last); + if (p_cmd && (strcmp(p_cmd, "mach") == 0)) { + osm_perfmgr_dump_counters(&p_osm->perfmgr, + PERFMGR_EVENT_DB_DUMP_MR); + } else { + osm_perfmgr_dump_counters(&p_osm->perfmgr, + PERFMGR_EVENT_DB_DUMP_HR); + } + } else if (strcmp(p_cmd, "clear_inactive") == 0) { + unsigned cnt = osm_perfmgr_delete_inactive(&p_osm->perfmgr); + fprintf(out, "Removed %u nodes from Database\n", cnt); + } else if (strcmp(p_cmd, "print_counters") == 0 || + strcmp(p_cmd, "pc") == 0) { + char *port = NULL; + p_cmd = name_token(p_last); + if (p_cmd) { + port = strchr(p_cmd, ':'); + if (port) { + *port = '\0'; + port++; + } + } + osm_perfmgr_print_counters(&p_osm->perfmgr, p_cmd, + out, port, 0); + } else if (strcmp(p_cmd, "print_errors") == 0 || + strcmp(p_cmd, "pe") == 0) { + p_cmd = name_token(p_last); + osm_perfmgr_print_counters(&p_osm->perfmgr, p_cmd, + out, NULL, 1); + } else if (strcmp(p_cmd, "dump_redir") == 0) { + p_cmd = name_token(p_last); + dump_redir(p_osm, p_cmd, out); + } else if (strcmp(p_cmd, "clear_redir") == 0) { + p_cmd = name_token(p_last); + clear_redir(p_osm, p_cmd, out); + } else if (strcmp(p_cmd, "sweep_time") == 0) { + p_cmd = next_token(p_last); + if (p_cmd) { + uint16_t time_s = atoi(p_cmd); + if (time_s < 1) + fprintf(out, + "sweep_time requires a " + "positive time period " + "(in seconds) to be " + "specified\n"); + else + osm_perfmgr_set_sweep_time_s( + &p_osm->perfmgr, + time_s); + } else { + fprintf(out, + "sweep_time requires a time period " + "(in seconds) to be specified\n"); + } + } else if (strcmp(p_cmd, "sweep") == 0) { + osm_sm_signal(&p_osm->sm, OSM_SIGNAL_PERFMGR_SWEEP); + fprintf(out, "sweep initiated...\n"); + } else { + fprintf(out, "\"%s\" option not found\n", p_cmd); + } + } else { + fprintf(out, "Performance Manager status:\n" + "state : %s\n" + "sweep state : %s\n" + "sweep time : %us\n" + "outstanding queries/max : %d/%u\n" + "remove missing nodes from DB : %s\n" + "query ClassPortInfo : %s\n", + osm_perfmgr_get_state_str(&p_osm->perfmgr), + osm_perfmgr_get_sweep_state_str(&p_osm->perfmgr), + osm_perfmgr_get_sweep_time_s(&p_osm->perfmgr), + p_osm->perfmgr.outstanding_queries, + p_osm->perfmgr.max_outstanding_queries, + osm_perfmgr_get_rm_nodes(&p_osm->perfmgr) + ? "TRUE" : "FALSE", + osm_perfmgr_get_query_cpi(&p_osm->perfmgr) + ? "TRUE" : "FALSE"); + } +} +#endif /* ENABLE_OSM_PERF_MGR */ + +static void quit_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + cio_close(&p_osm->console, &p_osm->log); +} + +static void help_version(FILE * out, int detail) +{ + fprintf(out, "version -- print the OSM version\n"); +} + +static void version_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + fprintf(out, "%s build %s %s\n", p_osm->osm_version, __DATE__, __TIME__); +} + +/* more parse routines go here */ +typedef struct _regexp_list { + regex_t exp; + struct _regexp_list *next; +} regexp_list_t; + +static void dump_portguid_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) +{ + cl_qmap_t *p_alias_port_guid_tbl; + osm_alias_guid_t *p_alias_guid, *p_next_alias_guid; + regexp_list_t *p_regexp, *p_head_regexp = NULL; + FILE *output = out; + + while (1) { + char *p_cmd = next_token(p_last); + if (!p_cmd) + break; + + if (strcmp(p_cmd, "file") == 0) { + p_cmd = next_token(p_last); + if (p_cmd) { + if (output != out) + fclose(output); + output = fopen(p_cmd, "w+"); + if (output == NULL) { + fprintf(out, + "Could not open file %s: %s\n", + p_cmd, strerror(errno)); + output = out; + } + } else + fprintf(out, "No file name passed\n"); + } else if (!(p_regexp = malloc(sizeof(*p_regexp)))) { + fprintf(out, "No memory\n"); + break; + } else if (regcomp(&p_regexp->exp, p_cmd, + REG_NOSUB | REG_EXTENDED) != 0) { + fprintf(out, "Cannot parse regular expression \'%s\'." + " Skipping\n", p_cmd); + free(p_regexp); + continue; + } else { + p_regexp->next = p_head_regexp; + p_head_regexp = p_regexp; + } + } + + /* Check we have at least one expression to match */ + if (p_head_regexp == NULL) { + fprintf(out, "No valid expression provided. Aborting\n"); + goto Exit; + } + + if (p_osm->sm.p_subn->need_update != 0) { + fprintf(out, "Subnet is not ready yet. Try again later\n"); + goto Free_and_exit; + } + + /* Subnet doesn't need to be updated so we can carry on */ + + p_alias_port_guid_tbl = &(p_osm->sm.p_subn->alias_port_guid_tbl); + CL_PLOCK_ACQUIRE(p_osm->sm.p_lock); + + p_next_alias_guid = (osm_alias_guid_t *) cl_qmap_head(p_alias_port_guid_tbl); + while (p_next_alias_guid != (osm_alias_guid_t *) cl_qmap_end(p_alias_port_guid_tbl)) { + + p_alias_guid = p_next_alias_guid; + p_next_alias_guid = + (osm_alias_guid_t *) cl_qmap_next(&p_next_alias_guid->map_item); + + for (p_regexp = p_head_regexp; p_regexp != NULL; + p_regexp = p_regexp->next) + if (regexec(&p_regexp->exp, + p_alias_guid->p_base_port->p_node->print_desc, + 0, NULL, 0) == 0) { + fprintf(output, "0x%" PRIxLEAST64 "\n", + cl_ntoh64(p_alias_guid->alias_guid)); + break; + } + } + + CL_PLOCK_RELEASE(p_osm->sm.p_lock); + +Free_and_exit: + for (; p_head_regexp; p_head_regexp = p_regexp) { + p_regexp = p_head_regexp->next; + regfree(&p_head_regexp->exp); + free(p_head_regexp); + } +Exit: + if (output != out) + fclose(output); +} + +static void help_dump_portguid(FILE * out, int detail) +{ + fprintf(out, + "dump_portguid [file filename] regexp1 [regexp2 [regexp3 ...]] -- Dump port GUID matching a regexp \n"); + if (detail) { + fprintf(out, + "getguidgetguid -- Dump all the port GUID whom node_desc matches one of the provided regexp\n"); + fprintf(out, + " [file filename] -- Send the port GUID list to the specified file instead of regular output\n"); + } + +} + +static const struct command console_cmds[] = { + {"help", &help_command, &help_parse}, + {"quit", &help_quit, &quit_parse}, + {"loglevel", &help_loglevel, &loglevel_parse}, + {"permodlog", &help_permodlog, &permodlog_parse}, + {"priority", &help_priority, &priority_parse}, + {"resweep", &help_resweep, &resweep_parse}, + {"reroute", &help_reroute, &reroute_parse}, + {"sweep", &help_sweep, &sweep_parse}, + {"status", &help_status, &status_parse}, + {"logflush", &help_logflush, &logflush_parse}, + {"querylid", &help_querylid, &querylid_parse}, + {"portstatus", &help_portstatus, &portstatus_parse}, + {"switchbalance", &help_switchbalance, &switchbalance_parse}, + {"lidbalance", &help_lidbalance, &lidbalance_parse}, + {"dump_conf", &help_dump_conf, &dump_conf_parse}, + {"update_desc", &help_update_desc, &update_desc_parse}, + {"version", &help_version, &version_parse}, +#ifdef ENABLE_OSM_PERF_MGR + {"perfmgr", &help_perfmgr, &perfmgr_parse}, + {"pm", &help_pm, &perfmgr_parse}, +#endif /* ENABLE_OSM_PERF_MGR */ + {"dump_portguid", &help_dump_portguid, &dump_portguid_parse}, + {NULL, NULL, NULL} /* end of array */ +}; + +static void parse_cmd_line(char *line, osm_opensm_t * p_osm) +{ + char *p_cmd, *p_last; + int i, found = 0; + FILE *out = p_osm->console.out; + + while (isspace(*line)) + line++; + if (!*line) + return; + + /* find first token which is the command */ + p_cmd = strtok_r(line, " \t\n\r", &p_last); + if (p_cmd) { + for (i = 0; console_cmds[i].name; i++) { + if (loop_command.on) { + if (!strcmp(p_cmd, "q")) { + loop_command.on = 0; + } + found = 1; + break; + } + if (!strcmp(p_cmd, console_cmds[i].name)) { + found = 1; + console_cmds[i].parse_function(&p_last, p_osm, + out); + break; + } + } + if (!found) { + fprintf(out, "%s : Command not found\n\n", p_cmd); + help_command(out, 0); + } + } else { + fprintf(out, "Error parsing command line: `%s'\n", line); + } + if (loop_command.on) { + fprintf(out, "use \"q\" to quit loop\n"); + fflush(out); + } +} + +int osm_console(osm_opensm_t * p_osm) +{ + struct pollfd pollfd[2]; + char *p_line; + size_t len; + ssize_t n; + struct pollfd *fds; + nfds_t nfds; + osm_console_t *p_oct = &p_osm->console; + + pollfd[0].fd = p_oct->socket; + pollfd[0].events = POLLIN; + pollfd[0].revents = 0; + + pollfd[1].fd = p_oct->in_fd; + pollfd[1].events = POLLIN; + pollfd[1].revents = 0; + + fds = p_oct->socket < 0 ? &pollfd[1] : pollfd; + nfds = p_oct->socket < 0 || pollfd[1].fd < 0 ? 1 : 2; + + if (loop_command.on && loop_command_check_time() && + loop_command.loop_function) { + if (p_oct->out) { + loop_command.loop_function(p_osm, p_oct->out); + fflush(p_oct->out); + } else { + loop_command.on = 0; + } + } + + if (poll(fds, nfds, 1000) <= 0) + return 0; + +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + if (pollfd[0].revents & POLLIN) { + int new_fd = 0; + struct sockaddr_in sin; + socklen_t len = sizeof(sin); + struct hostent *hent; + if ((new_fd = accept(p_oct->socket, &sin, &len)) < 0) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "ERR 4B04: Failed to accept console socket: %s\n", + strerror(errno)); + p_oct->in_fd = -1; + return 0; + } + if (inet_ntop + (AF_INET, &sin.sin_addr, p_oct->client_ip, + sizeof(p_oct->client_ip)) == NULL) { + snprintf(p_oct->client_ip, sizeof(p_oct->client_ip), + "STRING_UNKNOWN"); + } + if ((hent = gethostbyaddr((const char *)&sin.sin_addr, + sizeof(struct in_addr), + AF_INET)) == NULL) { + snprintf(p_oct->client_hn, sizeof(p_oct->client_hn), + "STRING_UNKNOWN"); + } else { + snprintf(p_oct->client_hn, sizeof(p_oct->client_hn), + "%s", hent->h_name); + } + if (is_authorized(p_oct)) { + cio_open(p_oct, new_fd, &p_osm->log); + } else { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "ERR 4B05: Console connection denied: %s (%s)\n", + p_oct->client_hn, p_oct->client_ip); + close(new_fd); + } + return 0; + } +#endif + + if (pollfd[1].revents & POLLIN) { + p_line = NULL; + /* Get input line */ + n = getline(&p_line, &len, p_oct->in); + if (n > 0) { + /* Parse and act on input */ + parse_cmd_line(p_line, p_osm); + if (!loop_command.on) { + osm_console_prompt(p_oct->out); + } + } else + cio_close(p_oct, &p_osm->log); + if (p_line) + free(p_line); + return 0; + } + /* input fd is closed (hanged up) */ + if (pollfd[1].revents & POLLHUP) { +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + /* If we are using a socket, we close the current connection */ + if (p_oct->socket >= 0) { + cio_close(p_oct, &p_osm->log); + return 0; + } +#endif + /* If we use a local console, stdin is closed (most probable is pipe ended) + * so we close the local console */ + return -1; + } + + return 0; +} diff --git a/opensm/osm_console_io.c b/opensm/osm_console_io.c new file mode 100644 index 0000000..97fe674 --- /dev/null +++ b/opensm/osm_console_io.c @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2005-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2008 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Provide a framework for the Console which decouples the connection + * or I/O from the functionality, or commands. + * + * Extensible - allows a variety of connection methods independent of + * the console commands. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#define FILE_ID OSM_FILE_CONSOLE_IO_C +#include +#include + +static int is_local(char *str) +{ + /* convenience - checks if just stdin/stdout */ + if (str) + return (strcmp(str, OSM_LOCAL_CONSOLE) == 0); + return 0; +} + +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK +static int is_loopback(char *str) +{ + /* convenience - checks if socket based connection */ + if (str) + return (strcmp(str, OSM_LOOPBACK_CONSOLE) == 0); + return 0; +} +#else +#define is_loopback is_local +#endif + +#ifdef ENABLE_OSM_CONSOLE_SOCKET +static int is_remote(char *str) +{ + /* convenience - checks if socket based connection */ + if (str) + return strcmp(str, OSM_REMOTE_CONSOLE) == 0 || is_loopback(str); + return 0; +} +#else +#define is_remote is_loopback +#endif + +int is_console_enabled(osm_subn_opt_t * p_opt) +{ + /* checks for a variety of types of consoles - default is off or 0 */ + if (p_opt) + return is_local(p_opt->console) || is_loopback(p_opt->console) + || is_remote(p_opt->console); + return 0; +} + + +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK +int cio_close(osm_console_t * p_oct, osm_log_t * p_log) +{ + int rtnval = -1; + if (p_oct && p_oct->in_fd > 0) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Console connection closed: %s (%s)\n", + p_oct->client_hn, p_oct->client_ip); + rtnval = fclose(p_oct->in); + p_oct->in_fd = -1; + p_oct->out_fd = -1; + p_oct->in = NULL; + p_oct->out = NULL; + } + return rtnval; +} + +int cio_open(osm_console_t * p_oct, int new_fd, osm_log_t * p_log) +{ + /* returns zero if opened fine, -1 otherwise */ + char *p_line; + size_t len; + ssize_t n; + + if (p_oct->in_fd >= 0) { + FILE *file = fdopen(new_fd, "w+"); + + fprintf(file, "OpenSM Console connection already in use\n" + " kill other session (y/n)? "); + fflush(file); + p_line = NULL; + n = getline(&p_line, &len, file); + if (n > 0 && (p_line[0] == 'y' || p_line[0] == 'Y')) + cio_close(p_oct, p_log); + else { + OSM_LOG(p_log, OSM_LOG_INFO, + "Console connection aborted: %s (%s) - " + "already in use\n", + p_oct->client_hn, p_oct->client_ip); + fclose(file); + free(p_line); + return -1; + } + free(p_line); + } + p_oct->in_fd = new_fd; + p_oct->out_fd = p_oct->in_fd; + p_oct->in = fdopen(p_oct->in_fd, "w+"); + p_oct->out = p_oct->in; + osm_console_prompt(p_oct->out); + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Console connection accepted: %s (%s)\n", + p_oct->client_hn, p_oct->client_ip); + + return (p_oct->in == NULL) ? -1 : 0; +} + +/********************************************************************** + * Do authentication & authorization check + **********************************************************************/ +int is_authorized(osm_console_t * p_oct) +{ + /* allowed to use the console? */ + p_oct->authorized = !is_remote(p_oct->client_type) || + hosts_ctl((char *)OSM_DAEMON_NAME, p_oct->client_hn, p_oct->client_ip, + (char *)STRING_UNKNOWN); + return p_oct->authorized; +} +#endif + +void osm_console_prompt(FILE * out) +{ + if (out) { + fprintf(out, "OpenSM %s", OSM_COMMAND_PROMPT); + fflush(out); + } +} + +int osm_console_init(osm_subn_opt_t * opt, osm_console_t * p_oct, osm_log_t * p_log) +{ + p_oct->socket = -1; + strncpy(p_oct->client_type, opt->console, sizeof(p_oct->client_type) - 1); + p_oct->client_type[sizeof(p_oct->client_type) - 1] = '\0'; + + /* set up the file descriptors for the console */ + if (strcmp(opt->console, OSM_LOCAL_CONSOLE) == 0) { + p_oct->in = stdin; + p_oct->out = stdout; + p_oct->in_fd = fileno(stdin); + p_oct->out_fd = fileno(stdout); + + osm_console_prompt(p_oct->out); +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + } else if (strcmp(opt->console, OSM_LOOPBACK_CONSOLE) == 0 +#ifdef ENABLE_OSM_CONSOLE_SOCKET + || strcmp(opt->console, OSM_REMOTE_CONSOLE) == 0 +#endif + ) { + struct sockaddr_in sin; + int optval = 1; + + if ((p_oct->socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 4B01: Failed to open console socket: %s\n", + strerror(errno)); + return -1; + } + + if (setsockopt(p_oct->socket, SOL_SOCKET, SO_REUSEADDR, + &optval, sizeof(optval))) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 4B06: Failed to set socket option: %s\n", + strerror(errno)); + return -1; + } + + sin.sin_family = AF_INET; + sin.sin_port = htons(opt->console_port); +#ifdef ENABLE_OSM_CONSOLE_SOCKET + if (strcmp(opt->console, OSM_REMOTE_CONSOLE) == 0) + sin.sin_addr.s_addr = htonl(INADDR_ANY); + else +#endif + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + if (bind(p_oct->socket, &sin, sizeof(sin)) < 0) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 4B02: Failed to bind console socket: %s\n", + strerror(errno)); + return -1; + } + if (listen(p_oct->socket, 1) < 0) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 4B03: Failed to listen on console socket: %s\n", + strerror(errno)); + return -1; + } + + signal(SIGPIPE, SIG_IGN); /* protect ourselves from closed pipes */ + p_oct->in = NULL; + p_oct->out = NULL; + p_oct->in_fd = -1; + p_oct->out_fd = -1; + OSM_LOG(p_log, OSM_LOG_INFO, + "Console listening on port %d\n", opt->console_port); +#endif + } + + return 0; +} + +/* clean up and release resources */ +void osm_console_exit(osm_console_t * p_oct, osm_log_t * p_log) +{ +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + cio_close(p_oct, p_log); + if (p_oct->socket > 0) { + OSM_LOG(p_log, OSM_LOG_INFO, "Closing console socket\n"); + close(p_oct->socket); + p_oct->socket = -1; + } +#endif +} diff --git a/opensm/osm_db_files.c b/opensm/osm_db_files.c new file mode 100644 index 0000000..f93fac9 --- /dev/null +++ b/opensm/osm_db_files.c @@ -0,0 +1,750 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of the osm_db interface using simple text files + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_DB_FILES_C +#include +#include +#include + +/****d* Database/OSM_DB_MAX_LINE_LEN + * NAME + * OSM_DB_MAX_LINE_LEN + * + * DESCRIPTION + * The Maximal line length allowed for the file + * + * SYNOPSIS + */ +#define OSM_DB_MAX_LINE_LEN 1024 +/**********/ + +/****s* OpenSM: Database/osm_db_domain_imp + * NAME + * osm_db_domain_imp + * + * DESCRIPTION + * An implementation for domain of the database based on text files and + * hash tables. + * + * SYNOPSIS + */ +typedef struct osm_db_domain_imp { + char *file_name; + st_table *p_hash; + cl_spinlock_t lock; + boolean_t dirty; +} osm_db_domain_imp_t; +/* + * FIELDS + * + * SEE ALSO + * osm_db_domain_t + *********/ + +/****s* OpenSM: Database/osm_db_imp_t + * NAME + * osm_db_imp_t + * + * DESCRIPTION + * An implementation for file based database + * + * SYNOPSIS + */ +typedef struct osm_db_imp { + const char *db_dir_name; +} osm_db_imp_t; +/* + * FIELDS + * + * db_dir_name + * The directory holding the database + * + * SEE ALSO + * osm_db_t + *********/ + +void osm_db_construct(IN osm_db_t * p_db) +{ + memset(p_db, 0, sizeof(osm_db_t)); + cl_list_construct(&p_db->domains); +} + +void osm_db_domain_destroy(IN osm_db_domain_t * p_db_domain) +{ + osm_db_domain_imp_t *p_domain_imp; + p_domain_imp = (osm_db_domain_imp_t *) p_db_domain->p_domain_imp; + + osm_db_clear(p_db_domain); + + cl_spinlock_destroy(&p_domain_imp->lock); + + st_free_table(p_domain_imp->p_hash); + free(p_domain_imp->file_name); + free(p_domain_imp); +} + +void osm_db_destroy(IN osm_db_t * p_db) +{ + osm_db_domain_t *p_domain; + + while ((p_domain = cl_list_remove_head(&p_db->domains)) != NULL) { + osm_db_domain_destroy(p_domain); + free(p_domain); + } + cl_list_destroy(&p_db->domains); + free(p_db->p_db_imp); +} + +int osm_db_init(IN osm_db_t * p_db, IN osm_log_t * p_log) +{ + osm_db_imp_t *p_db_imp; + struct stat dstat; + + OSM_LOG_ENTER(p_log); + + p_db_imp = malloc(sizeof(osm_db_imp_t)); + if (!p_db_imp) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6100: " + "Failed to allocate db memory\n"); + return -1; + } + + p_db_imp->db_dir_name = getenv("OSM_CACHE_DIR"); + if (!p_db_imp->db_dir_name || !(*p_db_imp->db_dir_name)) + p_db_imp->db_dir_name = OSM_DEFAULT_CACHE_DIR; + + /* Create the directory if it doesn't exist */ + /* There is a difference in creating directory between windows and linux */ +#ifdef __WIN__ + { + int ret; + + ret = SHCreateDirectoryEx(NULL, p_db_imp->db_dir_name, NULL); + if (ret != ERROR_SUCCESS && ret != ERROR_ALREADY_EXISTS && + ret != ERROR_FILE_EXISTS) + goto err; + } +#else /* __WIN__ */ + /* make sure the directory exists */ + if (lstat(p_db_imp->db_dir_name, &dstat)) { + if (mkdir(p_db_imp->db_dir_name, 0755)) + goto err; + } +#endif + + p_db->p_log = p_log; + p_db->p_db_imp = (void *)p_db_imp; + + cl_list_init(&p_db->domains, 5); + + OSM_LOG_EXIT(p_log); + + return 0; + +err: + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6101: " + "Failed to create the db directory:%s\n", + p_db_imp->db_dir_name); + free(p_db_imp); + OSM_LOG_EXIT(p_log); + return 1; +} + +osm_db_domain_t *osm_db_domain_init(IN osm_db_t * p_db, IN const char *domain_name) +{ + osm_db_domain_t *p_domain; + osm_db_domain_imp_t *p_domain_imp; + size_t path_len; + osm_log_t *p_log = p_db->p_log; + FILE *p_file; + + OSM_LOG_ENTER(p_log); + + /* allocate a new domain object */ + p_domain = malloc(sizeof(osm_db_domain_t)); + if (p_domain == NULL) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 610C: " + "Failed to allocate domain memory\n"); + goto Exit; + } + + p_domain_imp = malloc(sizeof(osm_db_domain_imp_t)); + if (p_domain_imp == NULL) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 610D: " + "Failed to allocate domain_imp memory\n"); + free(p_domain); + p_domain = NULL; + goto Exit; + } + + path_len = strlen(((osm_db_imp_t *) p_db->p_db_imp)->db_dir_name) + + strlen(domain_name) + 2; + + /* set the domain file name */ + p_domain_imp->file_name = malloc(path_len); + if (p_domain_imp->file_name == NULL) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 610E: " + "Failed to allocate file_name memory\n"); + free(p_domain_imp); + free(p_domain); + p_domain = NULL; + goto Exit; + } + snprintf(p_domain_imp->file_name, path_len, "%s/%s", + ((osm_db_imp_t *) p_db->p_db_imp)->db_dir_name, domain_name); + + /* make sure the file exists - or exit if not writable */ + p_file = fopen(p_domain_imp->file_name, "a+"); + if (!p_file) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6102: " + "Failed to open the db file:%s\n", + p_domain_imp->file_name); + free(p_domain_imp); + free(p_domain); + p_domain = NULL; + goto Exit; + } + fclose(p_file); + + /* initialize the hash table object */ + p_domain_imp->p_hash = st_init_strtable(); + CL_ASSERT(p_domain_imp->p_hash != NULL); + p_domain_imp->dirty = FALSE; + + p_domain->p_db = p_db; + cl_list_insert_tail(&p_db->domains, p_domain); + p_domain->p_domain_imp = p_domain_imp; + cl_spinlock_construct(&p_domain_imp->lock); + cl_spinlock_init(&p_domain_imp->lock); + +Exit: + OSM_LOG_EXIT(p_log); + return p_domain; +} + +int osm_db_restore(IN osm_db_domain_t * p_domain) +{ + + osm_log_t *p_log = p_domain->p_db->p_log; + osm_db_domain_imp_t *p_domain_imp = + (osm_db_domain_imp_t *) p_domain->p_domain_imp; + FILE *p_file; + int status; + char sLine[OSM_DB_MAX_LINE_LEN]; + boolean_t before_key; + char *p_first_word, *p_rest_of_line, *p_last; + char *p_key = NULL; + char *p_prev_val = NULL, *p_accum_val = NULL; + char *endptr = NULL; + unsigned int line_num; + + OSM_LOG_ENTER(p_log); + + /* take the lock on the domain */ + cl_spinlock_acquire(&p_domain_imp->lock); + + /* open the file - read mode */ + p_file = fopen(p_domain_imp->file_name, "r"); + + if (!p_file) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6103: " + "Failed to open the db file:%s\n", + p_domain_imp->file_name); + status = 1; + goto Exit; + } + + /* parse the file allocating new hash tables as required */ + /* + states: + before_key (0) -> in_key (1) + + before_key: if a word on the first byte - it is the key. state=in_key + the rest of the line is start of the value. + in_key: unless the line is empty - add it (with newlines) to the value. + if empty: state=before_key + */ + status = 0; + before_key = TRUE; + line_num = 0; + /* if we got to EOF in the middle of a key we add a last newline */ + while ((fgets(sLine, OSM_DB_MAX_LINE_LEN, p_file) != NULL) || + ((before_key == FALSE) && strcpy(sLine, "\n")) + ) { + line_num++; + if (before_key) { + if ((sLine[0] != ' ') && (sLine[0] != '\t') + && (sLine[0] != '\n')) { + /* we got a new key */ + before_key = FALSE; + + /* handle the key */ + p_first_word = + strtok_r(sLine, " \t\n", &p_last); + if (!p_first_word) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 6104: " + "Failed to get key from line:%u : %s (file:%s)\n", + line_num, sLine, + p_domain_imp->file_name); + status = 1; + goto EndParsing; + } + + p_key = malloc(sizeof(char) * + (strlen(p_first_word) + 1)); + strcpy(p_key, p_first_word); + + p_rest_of_line = strtok_r(NULL, "\n", &p_last); + if (p_rest_of_line != NULL) { + p_accum_val = malloc(sizeof(char) * + (strlen(p_rest_of_line) + 1)); + strcpy(p_accum_val, p_rest_of_line); + } else { + p_accum_val = malloc(2); + strcpy(p_accum_val, "\0"); + } + } else if (sLine[0] != '\n') { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6105: " + "How did we get here? line:%u : %s (file:%s)\n", + line_num, sLine, + p_domain_imp->file_name); + status = 1; + goto EndParsing; + } + } /* before key */ + else { + /* we already have a key */ + + if (sLine[0] == '\n') { + /* got an end of key */ + before_key = TRUE; + + /* make sure the key was not previously used */ + if (st_lookup(p_domain_imp->p_hash, + (st_data_t) p_key, + (void *)&p_prev_val)) { + /* if previously used we ignore this guid */ + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 6106: " + "Key:%s already exists in:%s with value:%s." + " Removing it\n", p_key, + p_domain_imp->file_name, + p_prev_val); + free(p_key); + p_key = NULL; + free(p_accum_val); + p_accum_val = NULL; + continue; + } else { + p_prev_val = NULL; + } + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Got key:%s value:%s\n", p_key, + p_accum_val); + + /* check that the key is a number */ + if (!strtouq(p_key, &endptr, 0) + && *endptr != '\0') { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 610B: " + "Key:%s is invalid\n", p_key); + free(p_key); + p_key = NULL; + free(p_accum_val); + p_accum_val = NULL; + } else { + /* store our key and value */ + st_insert(p_domain_imp->p_hash, + (st_data_t) p_key, + (st_data_t) p_accum_val); + } + } else { + /* accumulate into the value */ + p_prev_val = p_accum_val; + p_accum_val = malloc(strlen(p_prev_val) + + strlen(sLine) + 1); + strcpy(p_accum_val, p_prev_val); + free(p_prev_val); + p_prev_val = NULL; + strcat(p_accum_val, sLine); + } + } /* in key */ + } /* while lines or last line */ + +EndParsing: + fclose(p_file); + +Exit: + cl_spinlock_release(&p_domain_imp->lock); + OSM_LOG_EXIT(p_log); + return status; +} + +static int dump_tbl_entry(st_data_t key, st_data_t val, st_data_t arg) +{ + FILE *p_file = (FILE *) arg; + char *p_key = (char *)key; + char *p_val = (char *)val; + + fprintf(p_file, "%s %s\n\n", p_key, p_val); + return ST_CONTINUE; +} + +int osm_db_store(IN osm_db_domain_t * p_domain, + IN boolean_t fsync_high_avail_files) +{ + osm_log_t *p_log = p_domain->p_db->p_log; + osm_db_domain_imp_t *p_domain_imp; + FILE *p_file = NULL; + int fd, status = 0; + char *p_tmp_file_name = NULL; + + OSM_LOG_ENTER(p_log); + + p_domain_imp = (osm_db_domain_imp_t *) p_domain->p_domain_imp; + + p_tmp_file_name = malloc(sizeof(char) * + (strlen(p_domain_imp->file_name) + 8)); + if (!p_tmp_file_name) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6113: " + "Failed to allocate memory for temporary file name\n"); + goto Exit2; + } + strcpy(p_tmp_file_name, p_domain_imp->file_name); + strcat(p_tmp_file_name, ".tmp"); + + cl_spinlock_acquire(&p_domain_imp->lock); + + if (p_domain_imp->dirty == FALSE) + goto Exit; + + /* open up the output file */ + p_file = fopen(p_tmp_file_name, "w"); + if (!p_file) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6107: " + "Failed to open the db file:%s for writing: err:%s\n", + p_domain_imp->file_name, strerror(errno)); + status = 1; + goto Exit; + } + + st_foreach(p_domain_imp->p_hash, dump_tbl_entry, (st_data_t) p_file); + + if (fsync_high_avail_files) { + if (fflush(p_file) == 0) { + fd = fileno(p_file); + if (fd != -1) { + if (fsync(fd) == -1) + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 6110: fsync() failed (%s) for %s\n", + strerror(errno), + p_domain_imp->file_name); + } else + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6111: " + "fileno() failed for %s\n", + p_domain_imp->file_name); + } else + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6112: " + "fflush() failed (%s) for %s\n", + strerror(errno), p_domain_imp->file_name); + } + + fclose(p_file); + + status = rename(p_tmp_file_name, p_domain_imp->file_name); + if (status) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6108: " + "Failed to rename the db file to:%s (err:%s)\n", + p_domain_imp->file_name, strerror(errno)); + goto Exit; + } + p_domain_imp->dirty = FALSE; +Exit: + cl_spinlock_release(&p_domain_imp->lock); + free(p_tmp_file_name); +Exit2: + OSM_LOG_EXIT(p_log); + return status; +} + +/* simply de-allocate the key and the value and return the code + that makes the st_foreach delete the entry */ +static int clear_tbl_entry(st_data_t key, st_data_t val, st_data_t arg) +{ + free((char *)key); + free((char *)val); + return ST_DELETE; +} + +int osm_db_clear(IN osm_db_domain_t * p_domain) +{ + osm_db_domain_imp_t *p_domain_imp = + (osm_db_domain_imp_t *) p_domain->p_domain_imp; + + cl_spinlock_acquire(&p_domain_imp->lock); + st_foreach(p_domain_imp->p_hash, clear_tbl_entry, (st_data_t) NULL); + cl_spinlock_release(&p_domain_imp->lock); + + return 0; +} + +static int get_key_of_tbl_entry(st_data_t key, st_data_t val, st_data_t arg) +{ + cl_list_t *p_list = (cl_list_t *) arg; + cl_list_insert_tail(p_list, (void *)key); + return ST_CONTINUE; +} + +int osm_db_keys(IN osm_db_domain_t * p_domain, OUT cl_list_t * p_key_list) +{ + osm_db_domain_imp_t *p_domain_imp = + (osm_db_domain_imp_t *) p_domain->p_domain_imp; + + cl_spinlock_acquire(&p_domain_imp->lock); + + st_foreach(p_domain_imp->p_hash, get_key_of_tbl_entry, + (st_data_t) p_key_list); + + cl_spinlock_release(&p_domain_imp->lock); + + return 0; +} + +char *osm_db_lookup(IN osm_db_domain_t * p_domain, IN char *p_key) +{ + osm_db_domain_imp_t *p_domain_imp = + (osm_db_domain_imp_t *) p_domain->p_domain_imp; + char *p_val = NULL; + + cl_spinlock_acquire(&p_domain_imp->lock); + + if (!st_lookup(p_domain_imp->p_hash, (st_data_t) p_key, (void *)&p_val)) + p_val = NULL; + + cl_spinlock_release(&p_domain_imp->lock); + + return p_val; +} + +int osm_db_update(IN osm_db_domain_t * p_domain, IN char *p_key, IN char *p_val) +{ + osm_log_t *p_log = p_domain->p_db->p_log; + osm_db_domain_imp_t *p_domain_imp = + (osm_db_domain_imp_t *) p_domain->p_domain_imp; + char *p_prev_val = NULL; + char *p_new_key; + char *p_new_val; + + cl_spinlock_acquire(&p_domain_imp->lock); + + if (st_lookup(p_domain_imp->p_hash, + (st_data_t) p_key, (void *)&p_prev_val)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Key:%s previously exists in:%s with value:%s\n", + p_key, p_domain_imp->file_name, p_prev_val); + p_new_key = p_key; + /* same key, same value - nothing to update */ + if (p_prev_val && !strcmp(p_val, p_prev_val)) + goto Exit; + } else { + /* need to allocate the key */ + p_new_key = malloc(sizeof(char) * (strlen(p_key) + 1)); + strcpy(p_new_key, p_key); + } + + /* need to arrange a new copy of the value */ + p_new_val = malloc(sizeof(char) * (strlen(p_val) + 1)); + strcpy(p_new_val, p_val); + + st_insert(p_domain_imp->p_hash, (st_data_t) p_new_key, + (st_data_t) p_new_val); + + if (p_prev_val) + free(p_prev_val); + + p_domain_imp->dirty = TRUE; + +Exit: + cl_spinlock_release(&p_domain_imp->lock); + + return 0; +} + +int osm_db_delete(IN osm_db_domain_t * p_domain, IN char *p_key) +{ + osm_log_t *p_log = p_domain->p_db->p_log; + osm_db_domain_imp_t *p_domain_imp = + (osm_db_domain_imp_t *) p_domain->p_domain_imp; + char *p_prev_val = NULL; + int res; + + OSM_LOG_ENTER(p_log); + + cl_spinlock_acquire(&p_domain_imp->lock); + if (st_delete(p_domain_imp->p_hash, + (void *)&p_key, (void *)&p_prev_val)) { + if (st_lookup(p_domain_imp->p_hash, + (st_data_t) p_key, (void *)&p_prev_val)) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "key:%s still exists in:%s with value:%s\n", + p_key, p_domain_imp->file_name, p_prev_val); + res = 1; + } else { + free(p_key); + free(p_prev_val); + p_domain_imp->dirty = TRUE; + res = 0; + } + } else { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "fail to find key:%s. delete failed\n", p_key); + res = 1; + } + cl_spinlock_release(&p_domain_imp->lock); + + OSM_LOG_EXIT(p_log); + return res; +} + +#ifdef TEST_OSMDB +#include +#include + +int main(int argc, char **argv) +{ + osm_db_t db; + osm_log_t log; + osm_db_domain_t *p_dbd; + cl_list_t keys; + cl_list_iterator_t kI; + char *p_key; + char *p_val; + int i; + + cl_list_construct(&keys); + cl_list_init(&keys, 10); + + osm_log_init_v2(&log, TRUE, 0xff, "/var/log/osm_db_test.log", 0, FALSE); + + osm_db_construct(&db); + if (osm_db_init(&db, &log)) { + printf("db init failed\n"); + exit(1); + } + + p_dbd = osm_db_domain_init(&db, "lid_by_guid"); + if (!p_dbd) { + printf("db domain init failed\n"); + exit(1); + } + + if (osm_db_restore(p_dbd)) { + printf("failed to restore\n"); + } + + if (osm_db_keys(p_dbd, &keys)) { + printf("failed to get keys\n"); + } else { + kI = cl_list_head(&keys); + while (kI != cl_list_end(&keys)) { + p_key = cl_list_obj(kI); + kI = cl_list_next(kI); + + p_val = osm_db_lookup(p_dbd, p_key); + printf("key = %s val = %s\n", p_key, p_val); + } + } + + cl_list_remove_all(&keys); + + /* randomly add and remove numbers */ + for (i = 0; i < 10; i++) { + int k; + float v; + int is_add; + char val_buf[16]; + char key_buf[16]; + + k = floor(1.0 * rand() / RAND_MAX * 100); + v = rand(); + sprintf(key_buf, "%u", k); + sprintf(val_buf, "%u", v); + + is_add = (rand() < RAND_MAX / 2); + + if (is_add) { + osm_db_update(p_dbd, key_buf, val_buf); + } else { + osm_db_delete(p_dbd, key_buf); + } + } + if (osm_db_keys(p_dbd, &keys)) { + printf("failed to get keys\n"); + } else { + kI = cl_list_head(&keys); + while (kI != cl_list_end(&keys)) { + p_key = cl_list_obj(kI); + kI = cl_list_next(kI); + + p_val = osm_db_lookup(p_dbd, p_key); + printf("key = %s val = %s\n", p_key, p_val); + } + } + if (osm_db_store(p_dbd, FALSE)) + printf("failed to store\n"); + + osm_db_destroy(&db); + cl_list_destroy(&keys); +} +#endif diff --git a/opensm/osm_db_pack.c b/opensm/osm_db_pack.c new file mode 100644 index 0000000..59ac1d5 --- /dev/null +++ b/opensm/osm_db_pack.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_DB_PACK_C +#include + +static inline void pack_guid(uint64_t guid, char *p_guid_str) +{ + sprintf(p_guid_str, "0x%016" PRIx64, guid); +} + +static inline uint64_t unpack_guid(char *p_guid_str) +{ + return strtoull(p_guid_str, NULL, 0); +} + +static inline void pack_lids(uint16_t min_lid, uint16_t max_lid, char *lid_str) +{ + sprintf(lid_str, "0x%04x 0x%04x", min_lid, max_lid); +} + +static inline int unpack_lids(IN char *p_lid_str, OUT uint16_t * p_min_lid, + OUT uint16_t * p_max_lid) +{ + unsigned long tmp; + char *p_next; + char *p_num; + char lids_str[24]; + + strncpy(lids_str, p_lid_str, 23); + lids_str[23] = '\0'; + p_num = strtok_r(lids_str, " \t", &p_next); + if (!p_num) + return 1; + tmp = strtoul(p_num, NULL, 0); + if (tmp >= 0xC000) + return 1; + + *p_min_lid = (uint16_t) tmp; + + p_num = strtok_r(NULL, " \t", &p_next); + if (!p_num) + return 1; + tmp = strtoul(p_num, NULL, 0); + if (tmp >= 0xC000) + return 1; + + *p_max_lid = (uint16_t) tmp; + + return 0; +} + +static inline void pack_mkey(uint64_t mkey, char *p_mkey_str) +{ + sprintf(p_mkey_str, "0x%016" PRIx64, mkey); +} + +static inline uint64_t unpack_mkey(char *p_mkey_str) +{ + return strtoull(p_mkey_str, NULL, 0); +} + +static inline void pack_neighbor(uint64_t guid, uint8_t portnum, char *p_str) +{ + sprintf(p_str, "0x%016" PRIx64 ":%u", guid, portnum); +} + +static inline int unpack_neighbor(char *p_str, uint64_t *guid, + uint8_t *portnum) +{ + char tmp_str[24]; + char *p_num, *p_next; + unsigned long tmp_port; + + strncpy(tmp_str, p_str, 23); + tmp_str[23] = '\0'; + p_num = strtok_r(tmp_str, ":", &p_next); + if (!p_num) + return 1; + if (guid) + *guid = strtoull(p_num, NULL, 0); + + p_num = strtok_r(NULL, ":", &p_next); + if (!p_num) + return 1; + if (portnum) { + tmp_port = strtoul(p_num, NULL, 0); + CL_ASSERT(tmp_port < 0x100); + *portnum = (uint8_t) tmp_port; + } + + return 0; +} + +int osm_db_guid2lid_guids(IN osm_db_domain_t * p_g2l, + OUT cl_qlist_t * p_guid_list) +{ + char *p_key; + cl_list_t keys; + osm_db_guid_elem_t *p_guid_elem; + + cl_list_construct(&keys); + cl_list_init(&keys, 10); + + if (osm_db_keys(p_g2l, &keys)) + return 1; + + while ((p_key = cl_list_remove_head(&keys)) != NULL) { + p_guid_elem = + (osm_db_guid_elem_t *) malloc(sizeof(osm_db_guid_elem_t)); + CL_ASSERT(p_guid_elem != NULL); + + p_guid_elem->guid = unpack_guid(p_key); + cl_qlist_insert_head(p_guid_list, &p_guid_elem->item); + } + + cl_list_destroy(&keys); + return 0; +} + +int osm_db_guid2lid_get(IN osm_db_domain_t * p_g2l, IN uint64_t guid, + OUT uint16_t * p_min_lid, OUT uint16_t * p_max_lid) +{ + char guid_str[20]; + char *p_lid_str; + uint16_t min_lid, max_lid; + + pack_guid(guid, guid_str); + p_lid_str = osm_db_lookup(p_g2l, guid_str); + if (!p_lid_str) + return 1; + if (unpack_lids(p_lid_str, &min_lid, &max_lid)) + return 1; + + if (p_min_lid) + *p_min_lid = min_lid; + if (p_max_lid) + *p_max_lid = max_lid; + + return 0; +} + +int osm_db_guid2lid_set(IN osm_db_domain_t * p_g2l, IN uint64_t guid, + IN uint16_t min_lid, IN uint16_t max_lid) +{ + char guid_str[20]; + char lid_str[16]; + + pack_guid(guid, guid_str); + pack_lids(min_lid, max_lid, lid_str); + + return osm_db_update(p_g2l, guid_str, lid_str); +} + +int osm_db_guid2lid_delete(IN osm_db_domain_t * p_g2l, IN uint64_t guid) +{ + char guid_str[20]; + pack_guid(guid, guid_str); + return osm_db_delete(p_g2l, guid_str); +} + +int osm_db_guid2mkey_guids(IN osm_db_domain_t * p_g2m, + OUT cl_qlist_t * p_guid_list) +{ + char *p_key; + cl_list_t keys; + osm_db_guid_elem_t *p_guid_elem; + + cl_list_construct(&keys); + cl_list_init(&keys, 10); + + if (osm_db_keys(p_g2m, &keys)) + return 1; + + while ((p_key = cl_list_remove_head(&keys)) != NULL) { + p_guid_elem = + (osm_db_guid_elem_t *) malloc(sizeof(osm_db_guid_elem_t)); + CL_ASSERT(p_guid_elem != NULL); + + p_guid_elem->guid = unpack_guid(p_key); + cl_qlist_insert_head(p_guid_list, &p_guid_elem->item); + } + + cl_list_destroy(&keys); + return 0; +} + +int osm_db_guid2mkey_get(IN osm_db_domain_t * p_g2m, IN uint64_t guid, + OUT uint64_t * p_mkey) +{ + char guid_str[20]; + char *p_mkey_str; + + pack_guid(guid, guid_str); + p_mkey_str = osm_db_lookup(p_g2m, guid_str); + if (!p_mkey_str) + return 1; + + if (p_mkey) + *p_mkey = unpack_mkey(p_mkey_str); + + return 0; +} + +int osm_db_guid2mkey_set(IN osm_db_domain_t * p_g2m, IN uint64_t guid, + IN uint64_t mkey) +{ + char guid_str[20]; + char mkey_str[20]; + + pack_guid(guid, guid_str); + pack_mkey(mkey, mkey_str); + + return osm_db_update(p_g2m, guid_str, mkey_str); +} + +int osm_db_guid2mkey_delete(IN osm_db_domain_t * p_g2m, IN uint64_t guid) +{ + char guid_str[20]; + pack_guid(guid, guid_str); + return osm_db_delete(p_g2m, guid_str); +} + +int osm_db_neighbor_guids(IN osm_db_domain_t * p_neighbor, + OUT cl_qlist_t * p_neighbor_list) +{ + char *p_key; + cl_list_t keys; + osm_db_neighbor_elem_t *p_neighbor_elem; + + cl_list_construct(&keys); + cl_list_init(&keys, 10); + + if (osm_db_keys(p_neighbor, &keys)) + return 1; + + while ((p_key = cl_list_remove_head(&keys)) != NULL) { + p_neighbor_elem = + (osm_db_neighbor_elem_t *) malloc(sizeof(osm_db_neighbor_elem_t)); + CL_ASSERT(p_neighbor_elem != NULL); + + unpack_neighbor(p_key, &p_neighbor_elem->guid, + &p_neighbor_elem->portnum); + cl_qlist_insert_head(p_neighbor_list, &p_neighbor_elem->item); + } + + cl_list_destroy(&keys); + return 0; +} + +int osm_db_neighbor_get(IN osm_db_domain_t * p_neighbor, IN uint64_t guid1, + IN uint8_t portnum1, OUT uint64_t * p_guid2, + OUT uint8_t * p_portnum2) +{ + char neighbor_str[24]; + char *p_other_str; + uint64_t temp_guid; + uint8_t temp_portnum; + + pack_neighbor(guid1, portnum1, neighbor_str); + p_other_str = osm_db_lookup(p_neighbor, neighbor_str); + if (!p_other_str) + return 1; + if (unpack_neighbor(p_other_str, &temp_guid, &temp_portnum)) + return 1; + + if (p_guid2) + *p_guid2 = temp_guid; + if (p_portnum2) + *p_portnum2 = temp_portnum; + + return 0; +} + +int osm_db_neighbor_set(IN osm_db_domain_t * p_neighbor, IN uint64_t guid1, + IN uint8_t portnum1, IN uint64_t guid2, + IN uint8_t portnum2) +{ + char n1_str[24], n2_str[24]; + + pack_neighbor(guid1, portnum1, n1_str); + pack_neighbor(guid2, portnum2, n2_str); + + return osm_db_update(p_neighbor, n1_str, n2_str); +} + +int osm_db_neighbor_delete(IN osm_db_domain_t * p_neighbor, IN uint64_t guid, + IN uint8_t portnum) +{ + char n_str[24]; + + pack_neighbor(guid, portnum, n_str); + return osm_db_delete(p_neighbor, n_str); +} diff --git a/opensm/osm_drop_mgr.c b/opensm/osm_drop_mgr.c new file mode 100644 index 0000000..f7ff0c1 --- /dev/null +++ b/opensm/osm_drop_mgr.c @@ -0,0 +1,611 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_drop_mgr_t. + * This object represents the Drop Manager object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_DROP_MGR_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid) +{ + osm_router_t *p_rtr; + cl_qmap_t *p_rtr_guid_tbl; + + p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; + p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid); + if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Cleaned router for port guid 0x%016" PRIx64 "\n", + cl_ntoh64(portguid)); + osm_router_delete(&p_rtr); + } +} + +static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp) +{ + osm_physp_t *p_remote_physp; + osm_port_t *p_remote_port; + + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_physp) { + p_remote_port = osm_get_port_by_guid(sm->p_subn, + p_remote_physp->port_guid); + + if (p_remote_port) { + /* Let's check if this is a case of link that is lost + (both ports weren't recognized), or a "hiccup" in the + subnet - in which case the remote port was + recognized, and its state is ACTIVE. + If this is just a "hiccup" - force a heavy sweep in + the next sweep. We don't want to lose that part of + the subnet. */ + if (p_remote_port->discovery_count && + osm_physp_get_port_state(p_remote_physp) == + IB_LINK_ACTIVE) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Forcing new heavy sweep. Remote " + "port 0x%016" PRIx64 " port num: %u " + "was recognized in ACTIVE state\n", + cl_ntoh64(p_remote_physp->port_guid), + p_remote_physp->port_num); + sm->p_subn->force_heavy_sweep = TRUE; + } + + /* If the remote node is ca or router - need to remove + the remote port, since it is no longer reachable. + This can be done if we reset the discovery count + of the remote port. */ + if (!p_remote_physp->p_node->sw && + p_remote_physp->port_guid != sm->p_subn->sm_port_guid) { + p_remote_port->discovery_count = 0; + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Resetting discovery count of node: " + "0x%016" PRIx64 " port num:%u\n", + cl_ntoh64(osm_node_get_node_guid + (p_remote_physp->p_node)), + p_remote_physp->port_num); + } + } + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Unlinking local node 0x%016" PRIx64 ", port %u" + "\n\t\t\t\tand remote node 0x%016" PRIx64 + ", port %u\n", + cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)), + p_physp->port_num, + cl_ntoh64(osm_node_get_node_guid + (p_remote_physp->p_node)), + p_remote_physp->port_num); + + if (sm->ucast_mgr.cache_valid) + osm_ucast_cache_add_link(&sm->ucast_mgr, p_physp, + p_remote_physp); + + osm_physp_unlink(p_physp, p_remote_physp); + + } + + /* Make port as undiscovered */ + p_physp->p_node->physp_discovered[p_physp->port_num] = 0; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Clearing node 0x%016" PRIx64 " physical port number %u\n", + cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)), + p_physp->port_num); + + osm_physp_destroy(p_physp); +} + +static void drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port) +{ + ib_net64_t port_guid; + osm_port_t *p_port_check; + cl_qmap_t *p_alias_guid_tbl; + cl_qmap_t *p_sm_guid_tbl; + osm_mcm_port_t *mcm_port; + cl_ptr_vector_t *p_port_lid_tbl; + uint16_t min_lid_ho; + uint16_t max_lid_ho; + uint16_t lid_ho; + osm_node_t *p_node; + osm_remote_sm_t *p_sm; + osm_alias_guid_t *p_alias_guid, *p_alias_guid_check; + osm_guidinfo_work_obj_t *wobj; + cl_list_item_t *item, *next_item; + ib_gid_t port_gid; + ib_mad_notice_attr_t notice; + ib_api_status_t status; + + OSM_LOG_ENTER(sm->p_log); + + port_guid = osm_port_get_guid(p_port); + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); + + p_port_check = + (osm_port_t *) cl_qmap_get(&sm->p_subn->port_guid_tbl, port_guid); + if (p_port_check != p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: " + "Port 0x%016" PRIx64 " not in guid table\n", + cl_ntoh64(port_guid)); + goto Exit; + } + + /* issue a notice - trap 65 (SM_GID_OUT_OF_SERVICE_TRAP) */ + /* details of the notice */ + notice.generic_type = 0x80 | IB_NOTICE_TYPE_SUBN_MGMT; /* is generic subn mgt type */ + ib_notice_set_prod_type_ho(¬ice, 4); /* A class manager generator */ + /* endport ceases to be reachable */ + notice.g_or_v.generic.trap_num = CL_HTON16(SM_GID_OUT_OF_SERVICE_TRAP); /* 65 */ + /* The sm_base_lid is saved in network order already. */ + notice.issuer_lid = sm->p_subn->sm_base_lid; + /* following C14-72.1.2 and table 119 p725 */ + /* we need to provide the GID */ + port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; + port_gid.unicast.interface_id = port_guid; + memcpy(&(notice.data_details.ntc_64_67.gid), + &(port_gid), sizeof(ib_gid_t)); + + /* According to page 653 - the issuer gid in this case of trap + is the SM gid, since the SM is the initiator of this trap. */ + notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; + notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid; + + status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: " + "Error sending trap reports (%s)\n", + ib_get_err_str(status)); + } + + next_item = cl_qlist_head(&sm->p_subn->alias_guid_list); + while (next_item != cl_qlist_end(&sm->p_subn->alias_guid_list)) { + item = next_item; + next_item = cl_qlist_next(item); + wobj = cl_item_obj(item, wobj, list_item); + if (wobj->p_port == p_port) { + cl_qlist_remove_item(&sm->p_subn->alias_guid_list, + &wobj->list_item); + osm_guid_work_obj_delete(wobj); + } + } + + while (!cl_is_qlist_empty(&p_port->mcm_list)) { + mcm_port = cl_item_obj(cl_qlist_head(&p_port->mcm_list), + mcm_port, list_item); + osm_mgrp_delete_port(sm->p_subn, sm->p_log, mcm_port->mgrp, + p_port); + } + + p_alias_guid_tbl = &sm->p_subn->alias_port_guid_tbl; + p_alias_guid_check = (osm_alias_guid_t *) cl_qmap_head(p_alias_guid_tbl); + while (p_alias_guid_check != (osm_alias_guid_t *) cl_qmap_end(p_alias_guid_tbl)) { + if (p_alias_guid_check->p_base_port == p_port) + p_alias_guid = p_alias_guid_check; + else + p_alias_guid = NULL; + p_alias_guid_check = (osm_alias_guid_t *) cl_qmap_next(&p_alias_guid_check->map_item); + if (p_alias_guid) { + cl_qmap_remove_item(p_alias_guid_tbl, + &p_alias_guid->map_item); + osm_alias_guid_delete(&p_alias_guid); + } + } + + cl_qmap_remove(&sm->p_subn->port_guid_tbl, port_guid); + + p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl; + p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid); + if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) { + /* need to remove this item */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Cleaned SM for port guid 0x%016" PRIx64 "\n", + cl_ntoh64(port_guid)); + free(p_sm); + } + + drop_mgr_remove_router(sm, port_guid); + + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Clearing abandoned LID range [%u,%u]\n", + min_lid_ho, max_lid_ho); + + p_port_lid_tbl = &sm->p_subn->port_lid_tbl; + for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++) + cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL); + + drop_mgr_clean_physp(sm, p_port->p_physp); + + /* Delete event forwarding subscriptions */ + if (sm->p_subn->opt.drop_event_subscriptions) { + if (osm_infr_remove_subscriptions(sm->p_subn, sm->p_log, port_guid) + == CL_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Removed event subscriptions for port 0x%016" PRIx64 "\n", + cl_ntoh64(port_guid)); + } + + /* initialize the p_node - may need to get node_desc later */ + p_node = p_port->p_node; + + osm_port_delete(&p_port); + + OSM_LOG(sm->p_log, OSM_LOG_INFO, + "Removed port with GUID:0x%016" PRIx64 + " LID range [%u, %u] of node:%s\n", + cl_ntoh64(port_gid.unicast.interface_id), + min_lid_ho, max_lid_ho, + p_node ? p_node->print_desc : "UNKNOWN"); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node) +{ + osm_switch_t *p_sw; + cl_qmap_t *p_sw_guid_tbl; + ib_net64_t node_guid; + + OSM_LOG_ENTER(sm->p_log); + + node_guid = osm_node_get_node_guid(p_node); + p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl; + + p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid); + if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: " + "Node 0x%016" PRIx64 " not in switch table\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + } else { + p_node->sw = NULL; + osm_switch_delete(&p_sw); + } + + OSM_LOG_EXIT(sm->p_log); +} + +static boolean_t drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node) +{ + osm_physp_t *p_physp; + osm_port_t *p_port; + osm_node_t *p_node_check; + uint32_t port_num; + uint32_t max_ports; + ib_net64_t port_guid; + boolean_t return_val = FALSE; + + OSM_LOG_ENTER(sm->p_log); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Unreachable node 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + if (sm->ucast_mgr.cache_valid) + osm_ucast_cache_add_node(&sm->ucast_mgr, p_node); + + /* + Delete all the logical and physical port objects + associated with this node. + */ + max_ports = osm_node_get_num_physp(p_node); + for (port_num = 0; port_num < max_ports; port_num++) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (p_physp) { + port_guid = osm_physp_get_port_guid(p_physp); + + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + + if (p_port) + drop_mgr_remove_port(sm, p_port); + else + drop_mgr_clean_physp(sm, p_physp); + } + } + + return_val = TRUE; + + if (p_node->sw) + drop_mgr_remove_switch(sm, p_node); + + p_node_check = + (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl, + osm_node_get_node_guid(p_node)); + if (p_node_check != p_node) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: " + "Node 0x%016" PRIx64 " not in guid table\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + } + + /* free memory allocated to node */ + osm_node_delete(&p_node); + + OSM_LOG_EXIT(sm->p_log); + return return_val; +} + +static void drop_mgr_check_switch_node(osm_sm_t * sm, IN osm_node_t * p_node) +{ + ib_net64_t node_guid; + osm_physp_t *p_physp, *p_remote_physp; + osm_node_t *p_remote_node; + osm_port_t *p_port; + ib_net64_t port_guid; + uint8_t port_num, remote_port_num; + + OSM_LOG_ENTER(sm->p_log); + + node_guid = osm_node_get_node_guid(p_node); + + /* Make sure we have a switch object for this node */ + if (!p_node->sw) { + /* We do not have switch info for this node */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Node 0x%016" PRIx64 " no switch in table\n", + cl_ntoh64(node_guid)); + + drop_mgr_process_node(sm, p_node); + goto Exit; + } + + /* Make sure we have a port object for port zero */ + p_physp = osm_node_get_physp_ptr(p_node, 0); + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Node 0x%016" PRIx64 " no valid physical port 0\n", + cl_ntoh64(node_guid)); + + drop_mgr_process_node(sm, p_node); + goto Exit; + } + + port_guid = osm_physp_get_port_guid(p_physp); + + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Node 0x%016" PRIx64 " has no port object\n", + cl_ntoh64(node_guid)); + + drop_mgr_process_node(sm, p_node); + goto Exit; + } + + if (!p_node->physp_discovered[0]) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Node 0x%016" PRIx64 " port has discovery count zero\n", + cl_ntoh64(node_guid)); + + drop_mgr_process_node(sm, p_node); + goto Exit; + } + + /* + * Unlink all ports that havn't been discovered during the last sweep. + * Optimization: Skip the check if discovered all the ports of the switch. + */ + if (p_port->discovery_count < p_node->physp_tbl_size) { + for (port_num = 1; port_num < p_node->physp_tbl_size; port_num++) { + if (!p_node->physp_discovered[port_num]) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) + continue; + p_remote_physp = osm_physp_get_remote(p_physp); + if (!p_remote_physp) + continue; + + p_remote_node = + osm_physp_get_node_ptr(p_remote_physp); + remote_port_num = + osm_physp_get_port_num(p_remote_physp); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Unlinking local node 0x%" PRIx64 + ", port %u" + "\n\t\t\t\tand remote node 0x%" PRIx64 + ", port %u due to missing PortInfo\n", + cl_ntoh64(osm_node_get_node_guid + (p_node)), port_num, + cl_ntoh64(osm_node_get_node_guid + (p_remote_node)), + remote_port_num); + + if (sm->ucast_mgr.cache_valid) + osm_ucast_cache_add_link(&sm->ucast_mgr, + p_physp, + p_remote_physp); + + osm_node_unlink(p_node, (uint8_t) port_num, + p_remote_node, + (uint8_t) remote_port_num); + } + } + } +Exit: + OSM_LOG_EXIT(sm->p_log); + return; +} + +void osm_drop_mgr_process(osm_sm_t * sm) +{ + cl_qmap_t *p_node_guid_tbl, *p_port_guid_tbl; + osm_port_t *p_port, *p_next_port; + osm_node_t *p_node, *p_next_node; + int max_ports, port_num; + osm_physp_t *p_physp; + ib_net64_t port_guid; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + p_node_guid_tbl = &sm->p_subn->node_guid_tbl; + p_port_guid_tbl = &sm->p_subn->port_guid_tbl; + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + + p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); + while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) { + p_node = p_next_node; + p_next_node = + (osm_node_t *) cl_qmap_next(&p_next_node->map_item); + + CL_ASSERT(cl_qmap_key(&p_node->map_item) == + osm_node_get_node_guid(p_node)); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Checking node 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + /* + Check if this node was discovered during the last sweep. + If not, it is unreachable in the current subnet, and + should therefore be removed from the subnet object. + */ + if (p_node->discovery_count == 0) + drop_mgr_process_node(sm, p_node); + else { + /* + * We want to preserve the configured pkey indexes, + * so if we don't receive GetResp P_KeyTable for some block, + * do the following: + * 1. Drop node if the node is sw and got timeout for port 0. + * 2. Drop node if node is HCA/RTR. + * 3. Drop only physp if got timeout for sw when the port isn't 0. + * We'll set error during initialization in order to + * cause an immediate heavy sweep and try to get the + * configured P_KeyTable again. + */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) + port_num = 0; + else + port_num = 1; + max_ports = osm_node_get_num_physp(p_node); + for (; port_num < max_ports; port_num++) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp || p_physp->pkeys.rcv_blocks_cnt == 0) + continue; + p_physp->pkeys.rcv_blocks_cnt = 0; + p_physp->need_update = 2; + sm->p_subn->subnet_initialization_error = TRUE; + port_guid = osm_physp_get_port_guid(p_physp); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + CL_ASSERT(p_port); + if (p_node->physp_discovered[port_num]) { + p_node->physp_discovered[port_num] = 0; + p_port->discovery_count--; + } + } + } + } + + /* + Go over all the nodes. If the node is a switch - make sure + there is also a switch record for it, and a portInfo record for + port zero of of the node. + If not - this means that there was some error in getting the data + of this node. Drop the node. + */ + p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); + while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) { + p_node = p_next_node; + p_next_node = + (osm_node_t *) cl_qmap_next(&p_next_node->map_item); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Checking full discovery of node 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) + continue; + + /* We are handling a switch node */ + drop_mgr_check_switch_node(sm, p_node); + } + + p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); + while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) { + p_port = p_next_port; + p_next_port = + (osm_port_t *) cl_qmap_next(&p_next_port->map_item); + + CL_ASSERT(cl_qmap_key(&p_port->map_item) == + osm_port_get_guid(p_port)); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Checking port 0x%016" PRIx64 "\n", + cl_ntoh64(osm_port_get_guid(p_port))); + + /* + If the port is unreachable, remove it from the guid table. + */ + if (p_port->discovery_count == 0) + drop_mgr_remove_port(sm, p_port); + } + + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_dump.c b/opensm/osm_dump.c new file mode 100644 index 0000000..b98a1b9 --- /dev/null +++ b/opensm/osm_dump.c @@ -0,0 +1,752 @@ +/* + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Various OpenSM dumpers + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_DUMP_C +#include +#include +#include +#include +#include +#include +#include + +static void dump_ucast_path_distribution(cl_map_item_t * item, FILE * file, + void *cxt) +{ + osm_node_t *p_node; + osm_node_t *p_remote_node; + uint8_t i; + uint8_t num_ports; + uint32_t num_paths; + ib_net64_t remote_guid_ho; + osm_switch_t *p_sw = (osm_switch_t *) item; + + p_node = p_sw->p_node; + num_ports = p_sw->num_ports; + + fprintf(file, "dump_ucast_path_distribution: Switch 0x%" PRIx64 "\n" + "Port : Path Count Through Port", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + for (i = 0; i < num_ports; i++) { + num_paths = osm_switch_path_count_get(p_sw, i); + fprintf(file, "\n %03u : %u", i, num_paths); + if (i == 0) { + fprintf(file, " (switch management port)"); + continue; + } + + p_remote_node = osm_node_get_remote_node(p_node, i, NULL); + if (p_remote_node == NULL) + continue; + + remote_guid_ho = + cl_ntoh64(osm_node_get_node_guid(p_remote_node)); + + switch (osm_node_get_type(p_remote_node)) { + case IB_NODE_TYPE_SWITCH: + fprintf(file, " (link to switch"); + break; + case IB_NODE_TYPE_ROUTER: + fprintf(file, " (link to router"); + break; + case IB_NODE_TYPE_CA: + fprintf(file, " (link to CA"); + break; + default: + fprintf(file, " (link to unknown node type"); + break; + } + + fprintf(file, " 0x%" PRIx64 ")", remote_guid_ho); + } + + fprintf(file, "\n"); +} + +static void dump_ucast_routes(cl_map_item_t * item, FILE * file, void *cxt) +{ + const osm_node_t *p_node; + osm_port_t *p_port; + uint8_t port_num; + uint8_t num_hops; + uint8_t best_hops; + uint8_t best_port; + uint16_t max_lid_ho; + uint16_t lid_ho, base_lid; + boolean_t direct_route_exists = FALSE; + boolean_t dor; + osm_switch_t *p_sw = (osm_switch_t *) item; + osm_opensm_t *p_osm = cxt; + + p_node = p_sw->p_node; + + max_lid_ho = p_sw->max_lid_ho; + + fprintf(file, "dump_ucast_routes: " + "Switch 0x%016" PRIx64 "\nLID : Port : Hops : Optimal\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + dor = (p_osm->routing_engine_used && + p_osm->routing_engine_used->type == OSM_ROUTING_ENGINE_TYPE_DOR); + + for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++) { + fprintf(file, "0x%04X : ", lid_ho); + + p_port = osm_get_port_by_lid_ho(&p_osm->subn, lid_ho); + if (!p_port) { + fprintf(file, "UNREACHABLE\n"); + continue; + } + + port_num = osm_switch_get_port_by_lid(p_sw, lid_ho, + OSM_NEW_LFT); + if (port_num == OSM_NO_PATH) { + /* + This may occur if there are 'holes' in the existing + LID assignments. Running SM with --reassign_lids + will reassign and compress the LID range. The + subnet should work fine either way. + */ + fprintf(file, "UNREACHABLE\n"); + continue; + } + /* + Switches can lie about which port routes a given + lid due to a recent reconfiguration of the subnet. + Therefore, ensure that the hop count is better than + OSM_NO_PATH. + */ + if (p_port->p_node->sw) { + /* Target LID is switch. + Get its base lid and check hop count for this base LID only. */ + base_lid = osm_node_get_base_lid(p_port->p_node, 0); + base_lid = cl_ntoh16(base_lid); + num_hops = + osm_switch_get_hop_count(p_sw, base_lid, port_num); + } else { + /* Target LID is not switch (CA or router). + Check if we have route to this target from current switch. */ + num_hops = + osm_switch_get_hop_count(p_sw, lid_ho, port_num); + if (num_hops != OSM_NO_PATH) { + direct_route_exists = TRUE; + base_lid = lid_ho; + } else { + osm_physp_t *p_physp = p_port->p_physp; + + if (!p_physp || !p_physp->p_remote_physp || + !p_physp->p_remote_physp->p_node->sw) + num_hops = OSM_NO_PATH; + else { + base_lid = + osm_node_get_base_lid(p_physp-> + p_remote_physp-> + p_node, 0); + base_lid = cl_ntoh16(base_lid); + num_hops = + p_physp->p_remote_physp->p_node-> + sw == + p_sw ? 0 : + osm_switch_get_hop_count(p_sw, + base_lid, + port_num); + } + } + } + + if (num_hops == OSM_NO_PATH) { + fprintf(file, "%03u : HOPS UNKNOWN\n", port_num); + continue; + } + + best_hops = osm_switch_get_least_hops(p_sw, base_lid); + if (!p_port->p_node->sw && !direct_route_exists) { + best_hops++; + num_hops++; + } + + fprintf(file, "%03u : %02u : ", port_num, num_hops); + + if (best_hops == num_hops) + fprintf(file, "yes"); + else { + /* No LMC Optimization */ + best_port = osm_switch_recommend_path(p_sw, p_port, + lid_ho, 1, TRUE, + FALSE, dor, + p_osm->subn.opt.port_shifting, + p_osm->subn.opt.scatter_ports, + OSM_NEW_LFT); + fprintf(file, "No %u hop path possible via port %u!", + best_hops, best_port); + } + + fprintf(file, "\n"); + } +} + +static void dump_mcast_routes(cl_map_item_t * item, FILE * file, void *cxt) +{ + osm_switch_t *p_sw = (osm_switch_t *) item; + osm_mcast_tbl_t *p_tbl; + int16_t mlid_ho = 0; + int16_t mlid_start_ho; + uint8_t position = 0; + int16_t block_num = 0; + boolean_t first_mlid; + boolean_t first_port; + const osm_node_t *p_node; + uint16_t i, j; + uint16_t mask_entry; + char sw_hdr[256]; + char mlid_hdr[32]; + + p_node = p_sw->p_node; + + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + + sprintf(sw_hdr, "\nSwitch 0x%016" PRIx64 "\nLID : Out Port(s)\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + first_mlid = TRUE; + while (block_num <= p_tbl->max_block_in_use) { + mlid_start_ho = (uint16_t) (block_num * IB_MCAST_BLOCK_SIZE); + for (i = 0; i < IB_MCAST_BLOCK_SIZE; i++) { + mlid_ho = mlid_start_ho + i; + position = 0; + first_port = TRUE; + sprintf(mlid_hdr, "0x%04X :", + mlid_ho + IB_LID_MCAST_START_HO); + while (position <= p_tbl->max_position) { + mask_entry = + cl_ntoh16((*p_tbl-> + p_mask_tbl)[mlid_ho][position]); + if (mask_entry == 0) { + position++; + continue; + } + for (j = 0; j < 16; j++) { + if ((1 << j) & mask_entry) { + if (first_mlid) { + fprintf(file, "%s", + sw_hdr); + first_mlid = FALSE; + } + if (first_port) { + fprintf(file, "%s", + mlid_hdr); + first_port = FALSE; + } + fprintf(file, " 0x%03X ", + j + (position * 16)); + } + } + position++; + } + if (first_port == FALSE) + fprintf(file, "\n"); + } + block_num++; + } +} + +static void dump_lid_matrix(cl_map_item_t * item, FILE * file, void *cxt) +{ + osm_switch_t *p_sw = (osm_switch_t *) item; + osm_opensm_t *p_osm = cxt; + osm_node_t *p_node = p_sw->p_node; + unsigned max_lid = p_sw->max_lid_ho; + unsigned max_port = p_sw->num_ports; + uint16_t lid; + uint8_t port; + + fprintf(file, "Switch: guid 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + for (lid = 1; lid <= max_lid; lid++) { + osm_port_t *p_port; + if (osm_switch_get_least_hops(p_sw, lid) == OSM_NO_PATH) + continue; + fprintf(file, "0x%04x:", lid); + for (port = 0; port < max_port; port++) + fprintf(file, " %02x", + osm_switch_get_hop_count(p_sw, lid, port)); + p_port = osm_get_port_by_lid_ho(&p_osm->subn, lid); + if (p_port) + fprintf(file, " # portguid 0x%016" PRIx64, + cl_ntoh64(osm_port_get_guid(p_port))); + fprintf(file, "\n"); + } +} + +static void dump_ucast_lfts(cl_map_item_t * item, FILE * file, void *cxt) +{ + osm_switch_t *p_sw = (osm_switch_t *) item; + osm_opensm_t *p_osm = cxt; + osm_node_t *p_node = p_sw->p_node; + unsigned max_lid = p_sw->max_lid_ho; + unsigned max_port = p_sw->num_ports; + uint16_t lid; + uint8_t port; + + fprintf(file, "Unicast lids [0-%u] of switch Lid %u guid 0x%016" + PRIx64 " (\'%s\'):\n", + max_lid, cl_ntoh16(osm_node_get_base_lid(p_node, 0)), + cl_ntoh64(osm_node_get_node_guid(p_node)), p_node->print_desc); + for (lid = 0; lid <= max_lid; lid++) { + osm_port_t *p_port; + port = osm_switch_get_port_by_lid(p_sw, lid, OSM_NEW_LFT); + + if (port >= max_port) + continue; + + fprintf(file, "0x%04x %03u # ", lid, port); + + p_port = osm_get_port_by_lid_ho(&p_osm->subn, lid); + if (p_port) { + p_node = p_port->p_node; + fprintf(file, "%s portguid 0x%016" PRIx64 ": \'%s\'", + ib_get_node_type_str(osm_node_get_type(p_node)), + cl_ntoh64(osm_port_get_guid(p_port)), + p_node->print_desc); + } else + fprintf(file, "unknown node and type"); + fprintf(file, "\n"); + } + fprintf(file, "%u lids dumped\n", max_lid); +} + +static void dump_topology_node(cl_map_item_t * item, FILE * file, void *cxt) +{ + osm_node_t *p_node = (osm_node_t *) item; + uint32_t cPort; + osm_node_t *p_nbnode; + osm_physp_t *p_physp, *p_default_physp, *p_rphysp; + uint8_t link_speed_act; + const char *link_speed_act_str, *link_width_act_str; + + if (!p_node->node_info.num_ports) + return; + + for (cPort = 1; cPort < osm_node_get_num_physp(p_node); cPort++) { + uint8_t port_state; + + p_physp = osm_node_get_physp_ptr(p_node, cPort); + if (!p_physp) + continue; + + p_rphysp = p_physp->p_remote_physp; + if (!p_rphysp) + continue; + + CL_ASSERT(cPort == p_physp->port_num); + + if (p_node->node_info.node_type == IB_NODE_TYPE_SWITCH) + p_default_physp = osm_node_get_physp_ptr(p_node, 0); + else + p_default_physp = p_physp; + + fprintf(file, "{ %s%s Ports:%02X SystemGUID:%016" PRIx64 + " NodeGUID:%016" PRIx64 " PortGUID:%016" PRIx64 + " VenID:%06X DevID:%04X Rev:%08X {%s} LID:%04X PN:%02X } ", + p_node->node_info.node_type == IB_NODE_TYPE_SWITCH ? + "SW" : p_node->node_info.node_type == + IB_NODE_TYPE_CA ? "CA" : p_node->node_info.node_type == + IB_NODE_TYPE_ROUTER ? "Rt" : "**", + p_default_physp->port_info.base_lid == + p_default_physp->port_info. + master_sm_base_lid ? "-SM" : "", + p_node->node_info.num_ports, + cl_ntoh64(p_node->node_info.sys_guid), + cl_ntoh64(p_node->node_info.node_guid), + cl_ntoh64(p_physp->port_guid), + cl_ntoh32(ib_node_info_get_vendor_id + (&p_node->node_info)), + cl_ntoh16(p_node->node_info.device_id), + cl_ntoh32(p_node->node_info.revision), + p_node->print_desc, + cl_ntoh16(p_default_physp->port_info.base_lid), cPort); + + p_nbnode = p_rphysp->p_node; + + if (p_nbnode->node_info.node_type == IB_NODE_TYPE_SWITCH) + p_default_physp = osm_node_get_physp_ptr(p_nbnode, 0); + else + p_default_physp = p_rphysp; + + fprintf(file, "{ %s%s Ports:%02X SystemGUID:%016" PRIx64 + " NodeGUID:%016" PRIx64 " PortGUID:%016" PRIx64 + " VenID:%08X DevID:%04X Rev:%08X {%s} LID:%04X PN:%02X } ", + p_nbnode->node_info.node_type == IB_NODE_TYPE_SWITCH ? + "SW" : p_nbnode->node_info.node_type == + IB_NODE_TYPE_CA ? "CA" : + p_nbnode->node_info.node_type == IB_NODE_TYPE_ROUTER ? + "Rt" : "**", + p_default_physp->port_info.base_lid == + p_default_physp->port_info. + master_sm_base_lid ? "-SM" : "", + p_nbnode->node_info.num_ports, + cl_ntoh64(p_nbnode->node_info.sys_guid), + cl_ntoh64(p_nbnode->node_info.node_guid), + cl_ntoh64(p_rphysp->port_guid), + cl_ntoh32(ib_node_info_get_vendor_id + (&p_nbnode->node_info)), + cl_ntoh32(p_nbnode->node_info.device_id), + cl_ntoh32(p_nbnode->node_info.revision), + p_nbnode->print_desc, + cl_ntoh16(p_default_physp->port_info.base_lid), + p_rphysp->port_num); + + port_state = ib_port_info_get_port_state(&p_physp->port_info); + link_speed_act = + ib_port_info_get_link_speed_active(&p_physp->port_info); + if (link_speed_act == IB_LINK_SPEED_ACTIVE_2_5) + link_speed_act_str = "2.5"; + else if (link_speed_act == IB_LINK_SPEED_ACTIVE_5) + link_speed_act_str = "5"; + else if (link_speed_act == IB_LINK_SPEED_ACTIVE_10) + link_speed_act_str = "10"; + else + link_speed_act_str = "??"; + + if (p_physp->ext_port_info.link_speed_active & FDR10) + link_speed_act_str = "FDR10"; + + if (p_default_physp->port_info.capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) { + link_speed_act = + ib_port_info_get_link_speed_ext_active(&p_physp->port_info); + if (link_speed_act == IB_LINK_SPEED_EXT_ACTIVE_14) + link_speed_act_str = "14"; + else if (link_speed_act == IB_LINK_SPEED_EXT_ACTIVE_25) + link_speed_act_str = "25"; + else if (link_speed_act == IB_LINK_SPEED_EXT_ACTIVE_50) + link_speed_act_str = "50"; + else if (link_speed_act != IB_LINK_SPEED_EXT_ACTIVE_NONE) + link_speed_act_str = "??"; + } + + if (p_physp->port_info.link_width_active == 1) + link_width_act_str = "1x"; + else if (p_physp->port_info.link_width_active == 2) + link_width_act_str = "4x"; + else if (p_physp->port_info.link_width_active == 4) + link_width_act_str = "8x"; + else if (p_physp->port_info.link_width_active == 8) + link_width_act_str = "12x"; + else link_width_act_str = "??"; + + if (p_default_physp->port_info.capability_mask2 & + IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED) { + if (p_physp->port_info.link_width_active == 16) + link_width_act_str = "2x"; + } + + fprintf(file, "PHY=%s LOG=%s SPD=%s\n", + link_width_act_str, + port_state == IB_LINK_ACTIVE ? "ACT" : + port_state == IB_LINK_ARMED ? "ARM" : + port_state == IB_LINK_INIT ? "INI" : "DWN", + link_speed_act_str); + } +} + +static void dump_sl2vl_tbl(cl_map_item_t * item, FILE * file, void *cxt) +{ + osm_port_t *p_port = (osm_port_t *) item; + osm_node_t *p_node = p_port->p_node; + uint32_t in_port, out_port, + num_ports = p_node->node_info.num_ports; + ib_net16_t base_lid = osm_port_get_base_lid(p_port); + osm_physp_t *p_physp; + ib_slvl_table_t *p_tbl; + int i, n; + char buf[1024]; + const char * header_line = "#in out : 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15"; + const char * separator_line = "#--------------------------------------------------------"; + + if (!num_ports) + return; + + fprintf(file, "%s 0x%016" PRIx64 ", base LID %d, " + "\"%s\"\n%s\n%s\n", + ib_get_node_type_str(p_node->node_info.node_type), + cl_ntoh64(p_port->guid), cl_ntoh16(base_lid), + p_node->print_desc, header_line, separator_line); + + if (p_node->node_info.node_type == IB_NODE_TYPE_SWITCH) { + for (out_port = 0; out_port <= num_ports; out_port++){ + p_physp = osm_node_get_physp_ptr(p_node, out_port); + + /* no need to print SL2VL table for port that is down */ + if (!p_physp || !p_physp->p_remote_physp) + continue; + + for (in_port = 0; in_port <= num_ports; in_port++) { + p_tbl = osm_physp_get_slvl_tbl(p_physp, in_port); + for (i = 0, n = 0; i < 16; i++) + n += sprintf(buf + n, " %-2d", + ib_slvl_table_get(p_tbl, i)); + fprintf(file, "%-3d %-3d :%s\n", + in_port, out_port, buf); + } + } + } else { + p_physp = p_port->p_physp; + p_tbl = osm_physp_get_slvl_tbl(p_physp, 0); + for (i = 0, n = 0; i < 16; i++) + n += sprintf(buf + n, " %-2d", + ib_slvl_table_get(p_tbl, i)); + fprintf(file, "%-3d %-3d :%s\n", 0, 0, buf); + } + + fprintf(file, "%s\n\n", separator_line); +} + +static void print_node_report(cl_map_item_t * item, FILE * file, void *cxt) +{ + osm_node_t *p_node = (osm_node_t *) item; + osm_opensm_t *osm = cxt; + const osm_physp_t *p_physp, *p_remote_physp; + const ib_port_info_t *p_pi; + uint8_t port_num; + uint32_t num_ports; + uint8_t node_type; + + node_type = osm_node_get_type(p_node); + + num_ports = osm_node_get_num_physp(p_node); + port_num = node_type == IB_NODE_TYPE_SWITCH ? 0 : 1; + for (; port_num < num_ports; port_num++) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) + continue; + + fprintf(file, "%-11s : %s : %02X :", + osm_get_manufacturer_str(cl_ntoh64 + (osm_node_get_node_guid + (p_node))), + osm_get_node_type_str_fixed_width(node_type), port_num); + + p_pi = &p_physp->port_info; + + /* + * Port state is not defined for base switch port 0 + */ + if (port_num == 0 && + ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) == FALSE) + fprintf(file, " :"); + else + fprintf(file, " %s :", + osm_get_port_state_str_fixed_width + (ib_port_info_get_port_state(p_pi))); + + /* + * LID values are only meaningful in select cases. + */ + if (ib_port_info_get_port_state(p_pi) != IB_LINK_DOWN + && ((node_type == IB_NODE_TYPE_SWITCH && port_num == 0) + || node_type != IB_NODE_TYPE_SWITCH)) + fprintf(file, " %04X : %01X :", + cl_ntoh16(p_pi->base_lid), + ib_port_info_get_lmc(p_pi)); + else + fprintf(file, " : :"); + + if (port_num == 0 && + ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) == FALSE) + fprintf(file, " : : "); + else + fprintf(file, " %s : %s : %s ", + osm_get_mtu_str + (ib_port_info_get_neighbor_mtu(p_pi)), + osm_get_lwa_str(p_pi->link_width_active), + osm_get_lsa_str + (ib_port_info_get_link_speed_active(p_pi), + ib_port_info_get_link_speed_ext_active(p_pi), + ib_port_info_get_port_state(p_pi), + p_physp->ext_port_info.link_speed_active & FDR10)); + + if (osm_physp_get_port_guid(p_physp) == osm->subn.sm_port_guid) + fprintf(file, "* %016" PRIx64 " *", + cl_ntoh64(osm_physp_get_port_guid(p_physp))); + else + fprintf(file, ": %016" PRIx64 " :", + cl_ntoh64(osm_physp_get_port_guid(p_physp))); + + if (port_num + && (ib_port_info_get_port_state(p_pi) != IB_LINK_DOWN)) { + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_physp) + fprintf(file, " %016" PRIx64 " (%02X)", + cl_ntoh64(osm_physp_get_port_guid + (p_remote_physp)), + osm_physp_get_port_num(p_remote_physp)); + else + fprintf(file, " UNKNOWN"); + } + + fprintf(file, "\n"); + } + + fprintf(file, "------------------------------------------------------" + "------------------------------------------------\n"); +} + +struct dump_context { + osm_opensm_t *p_osm; + FILE *file; + void (*func) (cl_map_item_t *, FILE *, void *); + void *cxt; +}; + +static void dump_item(cl_map_item_t * item, void *cxt) +{ + ((struct dump_context *)cxt)->func(item, + ((struct dump_context *)cxt)->file, + ((struct dump_context *)cxt)->cxt); +} + +static void dump_qmap(FILE * file, cl_qmap_t * map, + void (*func) (cl_map_item_t *, FILE *, void *), void *cxt) +{ + struct dump_context dump_context; + + dump_context.file = file; + dump_context.func = func; + dump_context.cxt = cxt; + + cl_qmap_apply_func(map, dump_item, &dump_context); +} + +void osm_dump_qmap_to_file(osm_opensm_t * p_osm, const char *file_name, + cl_qmap_t * map, + void (*func) (cl_map_item_t *, FILE *, void *), + void *cxt) +{ + char path[1024]; + FILE *file; + + snprintf(path, sizeof(path), "%s/%s", + p_osm->subn.opt.dump_files_dir, file_name); + + file = fopen(path, "w"); + if (!file) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot create file \'%s\': %s\n", + path, strerror(errno)); + return; + } + + dump_qmap(file, map, func, cxt); + + fclose(file); +} + + +static void print_report(osm_opensm_t * osm, FILE * file) +{ + fprintf(file, "\n===================================================" + "====================================================\n" + "Vendor : Ty : # : Sta : LID : LMC : MTU : LWA :" + " LSA : Port GUID : Neighbor Port (Port #)\n"); + dump_qmap(stdout, &osm->subn.node_guid_tbl, print_node_report, osm); +} + +void osm_dump_mcast_routes(osm_opensm_t * osm) +{ + if (OSM_LOG_IS_ACTIVE_V2(&osm->log, OSM_LOG_ROUTING)) + /* multicast routes */ + osm_dump_qmap_to_file(osm, "opensm.mcfdbs", + &osm->subn.sw_guid_tbl, + dump_mcast_routes, osm); +} + +void osm_dump_all(osm_opensm_t * osm) +{ + if (OSM_LOG_IS_ACTIVE_V2(&osm->log, OSM_LOG_ROUTING)) { + /* unicast routes */ + osm_dump_qmap_to_file(osm, "opensm-lid-matrix.dump", + &osm->subn.sw_guid_tbl, dump_lid_matrix, + osm); + osm_dump_qmap_to_file(osm, "opensm-lfts.dump", + &osm->subn.sw_guid_tbl, dump_ucast_lfts, + osm); + if (OSM_LOG_IS_ACTIVE_V2(&osm->log, OSM_LOG_DEBUG)) + dump_qmap(stdout, &osm->subn.sw_guid_tbl, + dump_ucast_path_distribution, osm); + + /* An attempt to get osm_switch_recommend_path to report the + same routes that a sweep would assign. */ + if (osm->subn.opt.scatter_ports) + srandom(osm->subn.opt.scatter_ports); + + osm_dump_qmap_to_file(osm, "opensm.fdbs", + &osm->subn.sw_guid_tbl, + dump_ucast_routes, osm); + /* multicast routes */ + osm_dump_qmap_to_file(osm, "opensm.mcfdbs", + &osm->subn.sw_guid_tbl, + dump_mcast_routes, osm); + /* SL2VL tables */ + if (osm->subn.opt.qos || + (osm->routing_engine_used && + osm->routing_engine_used->update_sl2vl)) + osm_dump_qmap_to_file(osm, "opensm-sl2vl.dump", + &osm->subn.port_guid_tbl, + dump_sl2vl_tbl, osm); + } + osm_dump_qmap_to_file(osm, "opensm-subnet.lst", + &osm->subn.node_guid_tbl, dump_topology_node, + osm); + if (OSM_LOG_IS_ACTIVE_V2(&osm->log, OSM_LOG_VERBOSE)) + print_report(osm, stdout); +} diff --git a/opensm/osm_event_plugin.c b/opensm/osm_event_plugin.c new file mode 100644 index 0000000..ee62a2f --- /dev/null +++ b/opensm/osm_event_plugin.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 The Regents of the University of California. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/****h* OpenSM Event plugin interface +* DESCRIPTION +* Database interface to record subnet events +* +* Implementations of this object _MUST_ be thread safe. +* +* AUTHOR +* Ira Weiny, LLNL +* +*********/ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_EVENT_PLUGIN_C +#include +#include + +#if defined(PATH_MAX) +#define OSM_PATH_MAX (PATH_MAX + 1) +#elif defined (_POSIX_PATH_MAX) +#define OSM_PATH_MAX (_POSIX_PATH_MAX + 1) +#else +#define OSM_PATH_MAX 256 +#endif + +/** + * functions + */ +osm_epi_plugin_t *osm_epi_construct(osm_opensm_t *osm, char *plugin_name) +{ + char lib_name[OSM_PATH_MAX]; + struct old_if { unsigned ver; } *old_impl; + osm_epi_plugin_t *rc = NULL; + + if (!plugin_name || !*plugin_name) + return NULL; + + /* find the plugin */ + snprintf(lib_name, sizeof(lib_name), "lib%s.so", plugin_name); + + rc = malloc(sizeof(*rc)); + if (!rc) + return NULL; + + rc->handle = dlopen(lib_name, RTLD_LAZY); + if (!rc->handle) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "Failed to open event plugin \"%s\" : \"%s\"\n", + lib_name, dlerror()); + goto DLOPENFAIL; + } + + rc->impl = + (osm_event_plugin_t *) dlsym(rc->handle, + OSM_EVENT_PLUGIN_IMPL_NAME); + if (!rc->impl) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "Failed to find \"%s\" symbol in \"%s\" : \"%s\"\n", + OSM_EVENT_PLUGIN_IMPL_NAME, lib_name, dlerror()); + goto Exit; + } + + /* check for old interface */ + old_impl = (struct old_if *) rc->impl; + if (old_impl->ver == OSM_ORIG_EVENT_PLUGIN_INTERFACE_VER) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, "Error loading plugin: " + "\'%s\' contains a deprecated interface version %d\n" + " Please recompile with the new interface.\n", + plugin_name, old_impl->ver); + goto Exit; + } + + /* Check the version to make sure this module will work with us */ + if (strcmp(rc->impl->osm_version, osm->osm_version)) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, "Error loading plugin" + " \'%s\': OpenSM version mismatch - plugin was built" + " against %s version of OpenSM. Skip loading.\n", + plugin_name, rc->impl->osm_version); + goto Exit; + } + + if (!rc->impl->create) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "Error loading plugin \'%s\': no create() method.\n", + plugin_name); + goto Exit; + } + + rc->plugin_data = rc->impl->create(osm); + + if (!rc->plugin_data) + goto Exit; + + rc->plugin_name = strdup(plugin_name); + return rc; + +Exit: + dlclose(rc->handle); +DLOPENFAIL: + free(rc); + return NULL; +} + +void osm_epi_destroy(osm_epi_plugin_t * plugin) +{ + if (plugin) { + if (plugin->impl->delete) + plugin->impl->delete(plugin->plugin_data); + dlclose(plugin->handle); + free(plugin->plugin_name); + free(plugin); + } +} diff --git a/opensm/osm_guid_info_rcv.c b/opensm/osm_guid_info_rcv.c new file mode 100644 index 0000000..f4d0592 --- /dev/null +++ b/opensm/osm_guid_info_rcv.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_gi_rcv_t. + * This object represents the GUIDInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_GUID_INFO_RCV_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void osm_gi_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_guid_info_t *p_gi; + ib_smp_t *p_smp; + osm_port_t *p_port; + osm_gi_context_t *p_context; + ib_net64_t port_guid, node_guid; + uint8_t block_num; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_context = osm_madw_get_gi_context_ptr(p_madw); + p_gi = ib_smp_get_payload_ptr(p_smp); + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_GUID_INFO); + + block_num = (uint8_t) cl_ntoh32(p_smp->attr_mod); + + port_guid = p_context->port_guid; + node_guid = p_context->node_guid; + + osm_dump_guid_info_v2(sm->p_log, node_guid, port_guid, block_num, p_gi, + FILE_ID, OSM_LOG_DEBUG); + + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit; + } + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4701: " + "No port object for port with GUID 0x%" PRIx64 + "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 + ", TID 0x%" PRIx64 "\n", + cl_ntoh64(port_guid), cl_ntoh64(node_guid), + cl_ntoh64(p_smp->trans_id)); + goto Exit; + } + + CL_PLOCK_RELEASE(sm->p_lock); + +Exit: + /* + Release the lock before jumping here!! + */ + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_guid_mgr.c b/opensm/osm_guid_mgr.c new file mode 100644 index 0000000..0fa8017 --- /dev/null +++ b/opensm/osm_guid_mgr.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_guid_mgr_t. + * This object implements the GUID manager object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_GUID_MGR_C +#include +#include +#include +#include + +static void guidinfo_set(IN osm_sa_t *sa, IN osm_port_t *p_port, + IN uint8_t block_num) +{ + uint8_t payload[IB_SMP_DATA_SIZE]; + osm_madw_context_t context; + ib_api_status_t status; + + memcpy(payload, + &((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES]), + sizeof(ib_guid_info_t)); + + context.gi_context.node_guid = osm_node_get_node_guid(p_port->p_node); + context.gi_context.port_guid = osm_physp_get_port_guid(p_port->p_physp); + context.gi_context.set_method = TRUE; + context.gi_context.port_num = osm_physp_get_port_num(p_port->p_physp); + + status = osm_req_set(sa->sm, osm_physp_get_dr_path_ptr(p_port->p_physp), + payload, sizeof(payload), IB_MAD_ATTR_GUID_INFO, + cl_hton32((uint32_t)block_num), FALSE, + ib_port_info_get_m_key(&p_port->p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5109: " + "Failure initiating GUIDInfo request (%s)\n", + ib_get_err_str(status)); +} + +osm_guidinfo_work_obj_t *osm_guid_work_obj_new(IN osm_port_t * p_port, + IN uint8_t block_num) +{ + osm_guidinfo_work_obj_t *p_obj; + + /* + clean allocated memory to avoid assertion when trying to insert to + qlist. + see cl_qlist_insert_tail(): CL_ASSERT(p_list_item->p_list != p_list) + */ + p_obj = calloc(1, sizeof(*p_obj)); + if (p_obj) { + p_obj->p_port = p_port; + p_obj->block_num = block_num; + } + + return p_obj; +} + +void osm_guid_work_obj_delete(IN osm_guidinfo_work_obj_t * p_wobj) +{ + free(p_wobj); +} + +int osm_queue_guidinfo(IN osm_sa_t *sa, IN osm_port_t *p_port, + IN uint8_t block_num) +{ + osm_guidinfo_work_obj_t *p_obj; + int status = 1; + + p_obj = osm_guid_work_obj_new(p_port, block_num); + if (p_obj) + cl_qlist_insert_tail(&sa->p_subn->alias_guid_list, + &p_obj->list_item); + else { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 510F: " + "Memory allocation of guid work object failed\n"); + status = 0; + } + + return status; +} + +void osm_guid_mgr_process(IN osm_sm_t * sm) { + osm_guidinfo_work_obj_t *p_obj; + + OSM_LOG_ENTER(sm->p_log); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Processing alias guid list\n"); + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + while (cl_qlist_count(&sm->p_subn->alias_guid_list)) { + p_obj = (osm_guidinfo_work_obj_t *) cl_qlist_remove_head(&sm->p_subn->alias_guid_list); + guidinfo_set(&sm->p_subn->p_osm->sa, p_obj->p_port, + p_obj->block_num); + osm_guid_work_obj_delete(p_obj); + } + + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_indent b/opensm/osm_indent new file mode 100755 index 0000000..5c4a182 --- /dev/null +++ b/opensm/osm_indent @@ -0,0 +1,56 @@ +#!/bin/bash +# +# Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. +# Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. +# Copyright (c) 1996-2003 Intel Corporation. All rights reserved. +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +######################################################################### +# +# Abstract: +# Indent script for source code formatting. +# +# Environment: +# Linux User Mode +# +# This is the indent format used for OpenSM (similar to one used in +# linux/scripts/Lindent). + +indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@" + +# indent doesn't have an option for label indentation, so do it with sed +for f in $@ ; do + test -f $f || continue + temp=`mktemp -t osm_indent.XXXXXXXX` + cat $f \ + | sed -e 's/^ \([A-Za-z_]\+[A-Za-z_0-9]*:\)$/\1/' > $temp + diff $f $temp > /dev/null || cat $temp > $f + rm -f $temp +done diff --git a/opensm/osm_inform.c b/opensm/osm_inform.c new file mode 100644 index 0000000..3b293b9 --- /dev/null +++ b/opensm/osm_inform.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of inform record functions. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_INFORM_C +#include +#include +#include +#include +#include +#include + +typedef struct osm_infr_match_ctxt { + cl_list_t *p_remove_infr_list; + ib_mad_notice_attr_t *p_ntc; +} osm_infr_match_ctxt_t; + +void osm_infr_delete(IN osm_infr_t * p_infr) +{ + free(p_infr); +} + +osm_infr_t *osm_infr_new(IN const osm_infr_t * p_infr_rec) +{ + osm_infr_t *p_infr; + + CL_ASSERT(p_infr_rec); + + p_infr = (osm_infr_t *) malloc(sizeof(osm_infr_t)); + if (p_infr) + memcpy(p_infr, p_infr_rec, sizeof(osm_infr_t)); + + return p_infr; +} + +static void dump_all_informs(IN const osm_subn_t * p_subn, IN osm_log_t * p_log) +{ + cl_list_item_t *p_list_item; + + if (!OSM_LOG_IS_ACTIVE_V2(p_log, OSM_LOG_DEBUG)) + return; + + p_list_item = cl_qlist_head(&p_subn->sa_infr_list); + while (p_list_item != cl_qlist_end(&p_subn->sa_infr_list)) { + osm_dump_inform_info_v2(p_log, + &((osm_infr_t *) p_list_item)-> + inform_record.inform_info, FILE_ID, OSM_LOG_DEBUG); + p_list_item = cl_qlist_next(p_list_item); + } +} + +/********************************************************************** + * Match an infr by the InformInfo and Address vector + **********************************************************************/ +static cl_status_t match_inf_rec(IN const cl_list_item_t * p_list_item, + IN void *context) +{ + osm_infr_t *p_infr_rec = (osm_infr_t *) context; + osm_infr_t *p_infr = (osm_infr_t *) p_list_item; + ib_inform_info_t *p_ii_rec = &p_infr_rec->inform_record.inform_info; + ib_inform_info_t *p_ii = &p_infr->inform_record.inform_info; + osm_log_t *p_log = p_infr_rec->sa->p_log; + cl_status_t status = CL_NOT_FOUND; + + OSM_LOG_ENTER(p_log); + + if (memcmp(&p_infr->report_addr, &p_infr_rec->report_addr, + sizeof(p_infr_rec->report_addr))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by Address\n"); + goto Exit; + } + + /* if inform_info.gid is not zero, ignore lid range */ + if (ib_gid_is_notzero(&p_ii_rec->gid)) { + if (memcmp(&p_ii->gid, &p_ii_rec->gid, sizeof(p_ii->gid))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.gid\n"); + goto Exit; + } + } else { + if ((p_ii->lid_range_begin != p_ii_rec->lid_range_begin) || + (p_ii->lid_range_end != p_ii_rec->lid_range_end)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.LIDRange\n"); + goto Exit; + } + } + + if (p_ii->trap_type != p_ii_rec->trap_type) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.TrapType\n"); + goto Exit; + } + + if (p_ii->is_generic != p_ii_rec->is_generic) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.IsGeneric\n"); + goto Exit; + } + + if (p_ii->is_generic) { + if (p_ii->g_or_v.generic.trap_num != + p_ii_rec->g_or_v.generic.trap_num) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Generic.TrapNumber\n"); + else if (p_ii->g_or_v.generic.qpn_resp_time_val != + p_ii_rec->g_or_v.generic.qpn_resp_time_val) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Generic.QPNRespTimeVal\n"); + else if (p_ii->g_or_v.generic.node_type_msb != + p_ii_rec->g_or_v.generic.node_type_msb) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Generic.NodeTypeMSB\n"); + else if (p_ii->g_or_v.generic.node_type_lsb != + p_ii_rec->g_or_v.generic.node_type_lsb) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Generic.NodeTypeLSB\n"); + else + status = CL_SUCCESS; + } else { + if (p_ii->g_or_v.vend.dev_id != p_ii_rec->g_or_v.vend.dev_id) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Vendor.DeviceID\n"); + else if (p_ii->g_or_v.vend.qpn_resp_time_val != + p_ii_rec->g_or_v.vend.qpn_resp_time_val) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Vendor.QPNRespTimeVal\n"); + else if (p_ii->g_or_v.vend.vendor_id_msb != + p_ii_rec->g_or_v.vend.vendor_id_msb) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Vendor.VendorIdMSB\n"); + else if (p_ii->g_or_v.vend.vendor_id_lsb != + p_ii_rec->g_or_v.vend.vendor_id_lsb) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.Vendor.VendorIdLSB\n"); + else + status = CL_SUCCESS; + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +osm_infr_t *osm_infr_get_by_rec(IN osm_subn_t const *p_subn, + IN osm_log_t * p_log, + IN osm_infr_t * p_infr_rec) +{ + cl_list_item_t *p_list_item; + + OSM_LOG_ENTER(p_log); + + dump_all_informs(p_subn, p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, "Looking for Inform Record\n"); + osm_dump_inform_info_v2(p_log, &(p_infr_rec->inform_record.inform_info), + FILE_ID, OSM_LOG_DEBUG); + OSM_LOG(p_log, OSM_LOG_DEBUG, "InformInfo list size %d\n", + cl_qlist_count(&p_subn->sa_infr_list)); + + p_list_item = cl_qlist_find_from_head(&p_subn->sa_infr_list, + match_inf_rec, p_infr_rec); + + if (p_list_item == cl_qlist_end(&p_subn->sa_infr_list)) + p_list_item = NULL; + + OSM_LOG_EXIT(p_log); + return (osm_infr_t *) p_list_item; +} + +void osm_infr_insert_to_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_infr_t * p_infr) +{ + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Inserting new InformInfo Record into Database\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Dump before insertion (size %d)\n", + cl_qlist_count(&p_subn->sa_infr_list)); + dump_all_informs(p_subn, p_log); + +#if 0 + osm_dump_inform_info_v2(p_log, + &(p_infr->inform_record.inform_info), + FILE_ID, OSM_LOG_DEBUG); +#endif + + cl_qlist_insert_head(&p_subn->sa_infr_list, &p_infr->list_item); + p_subn->p_osm->sa.dirty = TRUE; + + OSM_LOG(p_log, OSM_LOG_DEBUG, "Dump after insertion (size %d)\n", + cl_qlist_count(&p_subn->sa_infr_list)); + dump_all_informs(p_subn, p_log); + OSM_LOG_EXIT(p_log); +} + +void osm_infr_remove_from_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_infr_t * p_infr) +{ + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, "Removing InformInfo Subscribing GID:%s" + " Enum:0x%X from Database\n", + inet_ntop(AF_INET6, p_infr->inform_record.subscriber_gid.raw, + gid_str, sizeof gid_str), + p_infr->inform_record.subscriber_enum); + + osm_dump_inform_info_v2(p_log, &(p_infr->inform_record.inform_info), + FILE_ID, OSM_LOG_DEBUG); + + cl_qlist_remove_item(&p_subn->sa_infr_list, &p_infr->list_item); + p_subn->p_osm->sa.dirty = TRUE; + + osm_infr_delete(p_infr); + + OSM_LOG_EXIT(p_log); +} + +ib_api_status_t osm_infr_remove_subscriptions(IN osm_subn_t * p_subn, + IN osm_log_t * p_log, + IN ib_net64_t port_guid) +{ + cl_list_item_t *p_list_item; + osm_infr_t *p_infr; + ib_api_status_t status = CL_NOT_FOUND; + + OSM_LOG_ENTER(p_log); + + /* go over all inform info available at the subnet */ + /* match to the given GID and delete subscriptions if match */ + p_list_item = cl_qlist_head(&p_subn->sa_infr_list); + while (p_list_item != cl_qlist_end(&p_subn->sa_infr_list)) { + + p_infr = (osm_infr_t *)p_list_item; + p_list_item = cl_qlist_next(p_list_item); + + if (port_guid != p_infr->inform_record.subscriber_gid.unicast.interface_id) + continue; + + /* Remove this event subscription */ + osm_infr_remove_from_db(p_subn, p_log, p_infr); + + status = CL_SUCCESS; + } + + OSM_LOG_EXIT(p_log); + return (status); +} + +/********************************************************************** + * Send a report: + * Given a target address to send to and the notice. + * We need to send SubnAdmReport + **********************************************************************/ +static ib_api_status_t send_report(IN osm_infr_t * p_infr_rec, /* the informinfo */ + IN ib_mad_notice_attr_t * p_ntc /* notice to send */ + ) +{ + osm_madw_t *p_report_madw; + ib_mad_notice_attr_t *p_report_ntc; + ib_mad_t *p_mad; + ib_sa_mad_t *p_sa_mad; + static atomic32_t trap_fwd_trans_id = 0x02DAB000; + ib_api_status_t status = IB_SUCCESS; + osm_log_t *p_log = p_infr_rec->sa->p_log; + ib_net64_t tid; + + OSM_LOG_ENTER(p_log); + + /* HACK: who switches or uses the src and dest GIDs in the grh_info ?? */ + + /* it is better to use LIDs since the GIDs might not be there for SMI traps */ + OSM_LOG(p_log, OSM_LOG_DEBUG, "Forwarding Notice Event from LID %u" + " to InformInfo LID %u GUID 0x%" PRIx64 ", TID 0x%X\n", + cl_ntoh16(p_ntc->issuer_lid), + cl_ntoh16(p_infr_rec->report_addr.dest_lid), + cl_ntoh64(p_infr_rec->inform_record.subscriber_gid.unicast.interface_id), + trap_fwd_trans_id); + + /* get the MAD to send */ + p_report_madw = osm_mad_pool_get(p_infr_rec->sa->p_mad_pool, + p_infr_rec->h_bind, MAD_BLOCK_SIZE, + &(p_infr_rec->report_addr)); + + if (!p_report_madw) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0203: " + "Cannot send report to LID %u, osm_mad_pool_get failed\n", + cl_ntoh16(p_infr_rec->report_addr.dest_lid)); + status = IB_ERROR; + goto Exit; + } + + p_report_madw->resp_expected = TRUE; + + /* advance trap trans id (cant simply ++ on some systems inside ntoh) */ + tid = cl_hton64((uint64_t) cl_atomic_inc(&trap_fwd_trans_id) & + (uint64_t) (0xFFFFFFFF)); + if (trap_fwd_trans_id == 0) + tid = cl_hton64((uint64_t) cl_atomic_inc(&trap_fwd_trans_id) & + (uint64_t) (0xFFFFFFFF)); + p_mad = osm_madw_get_mad_ptr(p_report_madw); + ib_mad_init_new(p_mad, IB_MCLASS_SUBN_ADM, 2, IB_MAD_METHOD_REPORT, + tid, IB_MAD_ATTR_NOTICE, 0); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_report_madw); + + p_report_ntc = (ib_mad_notice_attr_t *) & (p_sa_mad->data); + + /* copy the notice */ + *p_report_ntc = *p_ntc; + + /* The TRUE is for: response is expected */ + osm_sa_send(p_infr_rec->sa, p_report_madw, TRUE); + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +static int is_access_permitted(osm_infr_t *p_infr_rec, + osm_infr_match_ctxt_t *p_infr_match ) +{ + cl_list_t *p_infr_to_remove_list = p_infr_match->p_remove_infr_list; + ib_inform_info_t *p_ii = &(p_infr_rec->inform_record.inform_info); + ib_mad_notice_attr_t *p_ntc = p_infr_match->p_ntc; + uint16_t trap_num = cl_ntoh16(p_ntc->g_or_v.generic.trap_num); + osm_subn_t *p_subn = p_infr_rec->sa->p_subn; + osm_log_t *p_log = p_infr_rec->sa->p_log; + osm_mgrp_t *p_mgrp; + ib_gid_t source_gid; + osm_port_t *p_src_port; + osm_port_t *p_dest_port; + + /* In case of SM_GID_IN_SERVICE_TRAP(64) or SM_GID_OUT_OF_SERVICE_TRAP(65) traps + the source gid comparison should be done on the trap source (saved + as the gid in the data details field). + For traps SM_MGID_CREATED_TRAP(66) or SM_MGID_DESTROYED_TRAP(67) + the data details gid is the MGID. + We need to check whether the subscriber has a compatible + pkey with MC group. + In all other cases the issuer gid is the trap source. + */ + if (trap_num >= SM_GID_IN_SERVICE_TRAP && + trap_num <= SM_MGID_DESTROYED_TRAP) + /* The issuer of these traps is the SM so source_gid + is the gid saved on the data details */ + source_gid = p_ntc->data_details.ntc_64_67.gid; + else + source_gid = p_ntc->issuer_gid; + + p_dest_port = osm_get_port_by_lid(p_subn, + p_infr_rec->report_addr.dest_lid); + if (!p_dest_port) { + OSM_LOG(p_log, OSM_LOG_INFO, + "Cannot find destination port with LID:%u\n", + cl_ntoh16(p_infr_rec->report_addr.dest_lid)); + goto Exit; + } + + /* Check if there is a pkey match. o13-17.1.1 */ + switch (trap_num) { + case SM_MGID_CREATED_TRAP: + case SM_MGID_DESTROYED_TRAP: + p_mgrp = osm_get_mgrp_by_mgid(p_subn, &source_gid); + if (!p_mgrp) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(p_log, OSM_LOG_INFO, + "Cannot find MGID %s\n", + inet_ntop(AF_INET6, source_gid.raw, gid_str, sizeof gid_str)); + goto Exit; + } + + if (!osm_physp_has_pkey(p_log, + p_mgrp->mcmember_rec.pkey, + p_dest_port->p_physp)) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(p_log, OSM_LOG_INFO, + "MGID %s and port GUID:0x%016" PRIx64 " do not share same pkey\n", + inet_ntop(AF_INET6, source_gid.raw, gid_str, sizeof gid_str), + cl_ntoh64(p_dest_port->guid)); + goto Exit; + } + break; + + default: + p_src_port = + osm_get_port_by_guid(p_subn, source_gid.unicast.interface_id); + if (!p_src_port) { + OSM_LOG(p_log, OSM_LOG_INFO, + "Cannot find source port with GUID:0x%016" PRIx64 "\n", + cl_ntoh64(source_gid.unicast.interface_id)); + goto Exit; + } + + + if (osm_port_share_pkey(p_log, p_src_port, p_dest_port, + p_subn->opt.allow_both_pkeys) == FALSE) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Pkey\n"); + /* According to o13-17.1.2 - If this informInfo + does not have lid_range_begin of 0xFFFF, + then this informInfo request should be + removed from database */ + if (p_ii->lid_range_begin != 0xFFFF) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Pkey mismatch on lid_range_begin != 0xFFFF. " + "Need to remove this informInfo from db\n"); + /* add the informInfo record to the remove_infr list */ + cl_list_insert_tail(p_infr_to_remove_list, p_infr_rec); + } + goto Exit; + } + break; + } + + return 1; +Exit: + return 0; +} + + +/********************************************************************** + * This routine compares a given Notice and a ListItem of InformInfo type. + * PREREQUISITE: + * The Notice.GID should be pre-filled with the trap generator GID + **********************************************************************/ +static void match_notice_to_inf_rec(IN cl_list_item_t * p_list_item, + IN void *context) +{ + osm_infr_match_ctxt_t *p_infr_match = (osm_infr_match_ctxt_t *) context; + ib_mad_notice_attr_t *p_ntc = p_infr_match->p_ntc; + osm_infr_t *p_infr_rec = (osm_infr_t *) p_list_item; + ib_inform_info_t *p_ii = &(p_infr_rec->inform_record.inform_info); + osm_log_t *p_log = p_infr_rec->sa->p_log; + + OSM_LOG_ENTER(p_log); + + /* matching rules + * InformInfo Notice + * GID IssuerGID if non zero must match the trap + * LIDRange IssuerLID apply only if GID=0 + * IsGeneric IsGeneric is compulsory and must match the trap + * Type Type if not 0xFFFF must match + * TrapNumber TrapNumber if not 0xFFFF must match + * DeviceId DeviceID if not 0xFFFF must match + * QPN dont care + * ProducerType ProducerType match or 0xFFFFFF // EZ: actually my interpretation + * VendorID VendorID match or 0xFFFFFF + */ + + /* GID IssuerGID if non zero must match the trap */ + if (p_ii->gid.unicast.prefix != 0 + || p_ii->gid.unicast.interface_id != 0) { + /* match by GID */ + if (memcmp(&(p_ii->gid), &(p_ntc->issuer_gid), + sizeof(ib_gid_t))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by GID\n"); + goto Exit; + } + } else { + /* LIDRange IssuerLID apply only if GID=0 */ + /* If lid_range_begin of the informInfo is 0xFFFF - then it should be ignored. */ + if (p_ii->lid_range_begin != 0xFFFF) { + /* a real lid range is given - check it */ + if ((cl_hton16(p_ii->lid_range_begin) > + cl_hton16(p_ntc->issuer_lid)) + || (cl_hton16(p_ntc->issuer_lid) > + cl_hton16(p_ii->lid_range_end))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Mismatch by LID Range. Needed: %u <= %u <= %u\n", + cl_hton16(p_ii->lid_range_begin), + cl_hton16(p_ntc->issuer_lid), + cl_hton16(p_ii->lid_range_end)); + goto Exit; + } + } + } + + /* IsGeneric IsGeneric is compulsory and must match the trap */ + if ((p_ii->is_generic && !ib_notice_is_generic(p_ntc)) || + (!p_ii->is_generic && ib_notice_is_generic(p_ntc))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Generic/Vendor\n"); + goto Exit; + } + + /* Type Type if not 0xFFFF must match */ + if ((p_ii->trap_type != 0xFFFF) && + (cl_ntoh16(p_ii->trap_type) != ib_notice_get_type(p_ntc))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Type\n"); + goto Exit; + } + + /* based on generic type */ + if (p_ii->is_generic) { + /* TrapNumber TrapNumber if not 0xFFFF must match */ + if ((p_ii->g_or_v.generic.trap_num != 0xFFFF) && + (p_ii->g_or_v.generic.trap_num != + p_ntc->g_or_v.generic.trap_num)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Trap Num\n"); + goto Exit; + } + + /* ProducerType ProducerType match or 0xFFFFFF */ + if ((cl_ntoh32(ib_inform_info_get_prod_type(p_ii)) != 0xFFFFFF) + && (ib_inform_info_get_prod_type(p_ii) != + ib_notice_get_prod_type(p_ntc))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Mismatch by Node Type: II=0x%06X (%s) Trap=0x%06X (%s)\n", + cl_ntoh32(ib_inform_info_get_prod_type(p_ii)), + ib_get_producer_type_str + (ib_inform_info_get_prod_type(p_ii)), + cl_ntoh32(ib_notice_get_prod_type(p_ntc)), + ib_get_producer_type_str(ib_notice_get_prod_type + (p_ntc))); + goto Exit; + } + } else { + /* DeviceId DeviceID if not 0xFFFF must match */ + if ((p_ii->g_or_v.vend.dev_id != 0xFFFF) && + (p_ii->g_or_v.vend.dev_id != p_ntc->g_or_v.vend.dev_id)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Dev Id\n"); + goto Exit; + } + + /* VendorID VendorID match or 0xFFFFFF */ + if ((ib_inform_info_get_vend_id(p_ii) != CL_HTON32(0xFFFFFF)) && + (ib_inform_info_get_vend_id(p_ii) != + ib_notice_get_vend_id(p_ntc))) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Mismatch by Vendor ID\n"); + goto Exit; + } + } + + if (!is_access_permitted(p_infr_rec, p_infr_match)) + goto Exit; + + /* send the report to the address provided in the inform record */ + OSM_LOG(p_log, OSM_LOG_DEBUG, "MATCH! Sending Report...\n"); + send_report(p_infr_rec, p_ntc); + +Exit: + OSM_LOG_EXIT(p_log); +} + +/********************************************************************** + * Once a Trap was received by osm_trap_rcv, or a Trap sourced by + * the SM was sent (Traps 64-67), this routine is called with a copy of + * the notice data. + * Given a notice attribute - compare and see if it matches the InformInfo + * element and if it does - call the Report(Notice) for the + * target QP registered by the address stored in the InformInfo element + **********************************************************************/ +static void log_notice(osm_log_t * log, osm_log_level_t level, + ib_mad_notice_attr_t * ntc) +{ + char gid_str[INET6_ADDRSTRLEN], gid_str2[INET6_ADDRSTRLEN]; + ib_gid_t *gid; + ib_gid_t *gid1, *gid2; + + /* an official Event information log */ + if (ib_notice_is_generic(ntc)) { + if ((ntc->g_or_v.generic.trap_num == CL_HTON16(SM_GID_IN_SERVICE_TRAP)) || + (ntc->g_or_v.generic.trap_num == CL_HTON16(SM_GID_OUT_OF_SERVICE_TRAP)) || + (ntc->g_or_v.generic.trap_num == CL_HTON16(SM_MGID_CREATED_TRAP)) || + (ntc->g_or_v.generic.trap_num == CL_HTON16(SM_MGID_DESTROYED_TRAP))) + gid = &ntc->data_details.ntc_64_67.gid; + else + gid = &ntc->issuer_gid; + + switch (cl_ntoh16(ntc->g_or_v.generic.trap_num)) { + case SM_GID_IN_SERVICE_TRAP: + case SM_GID_OUT_OF_SERVICE_TRAP: + OSM_LOG(log, level, + "Reporting Informational Notice \"%s\", GID:%s\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + inet_ntop(AF_INET6, gid->raw, gid_str, sizeof gid_str)); + break; + case SM_MGID_CREATED_TRAP: + case SM_MGID_DESTROYED_TRAP: + OSM_LOG(log, level, + "Reporting Informational Notice \"%s\", MGID:%s\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + inet_ntop(AF_INET6, gid->raw, gid_str, sizeof gid_str)); + break; + case SM_UNPATH_TRAP: + case SM_REPATH_TRAP: + /* TODO: Fill in details once SM starts to use these traps */ + OSM_LOG(log, level, + "Reporting Informational Notice \"%s\"n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num)); + break; + case SM_LINK_STATE_CHANGED_TRAP: + OSM_LOG(log, level, + "Reporting Urgent Notice \"%s\" from switch LID %u, " + "GUID 0x%016" PRIx64 "\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id)); + break; + case SM_LINK_INTEGRITY_THRESHOLD_TRAP: + case SM_BUFFER_OVERRUN_THRESHOLD_TRAP: + case SM_WATCHDOG_TIMER_EXPIRED_TRAP: + OSM_LOG(log, level, + "Reporting Urgent Notice \"%s\" from LID %u, " + "GUID 0x%016" PRIx64 ", port %u\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id), + ntc->data_details.ntc_129_131.port_num); + break; + case SM_LOCAL_CHANGES_TRAP: + if (ntc->data_details.ntc_144.local_changes == 1) + OSM_LOG(log, level, + "Reporting Informational Notice \"%s\" from LID %u, " + "GUID 0x%016" PRIx64 ", ChangeFlags 0x%04x, " + "CapabilityMask2 0x%04x\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id), + cl_ntoh16(ntc->data_details.ntc_144.change_flgs), + cl_ntoh16(ntc->data_details.ntc_144.cap_mask2)); + else + OSM_LOG(log, level, + "Reporting Informational Notice \"%s\" from LID %u, " + "GUID 0x%016" PRIx64 ", new CapabilityMask 0x%08x\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id), + cl_ntoh32(ntc->data_details.ntc_144.new_cap_mask)); + break; + case SM_SYS_IMG_GUID_CHANGED_TRAP: + OSM_LOG(log, level, + "Reporting Informational Notice \"%s\" from LID %u, " + "GUID 0x%016" PRIx64 ", new SysImageGUID 0x%016" PRIx64 "\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id), + cl_ntoh64(ntc->data_details.ntc_145.new_sys_guid)); + break; + case SM_BAD_MKEY_TRAP: + OSM_LOG(log, level, + "Reporting Security Notice \"%s\" from LID %u, " + "GUID 0x%016" PRIx64 ", Method 0x%x, Attribute 0x%x, " + "AttrMod 0x%x, M_Key 0x%016" PRIx64 "\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id), + ntc->data_details.ntc_256.method, + cl_ntoh16(ntc->data_details.ntc_256.attr_id), + cl_ntoh32(ntc->data_details.ntc_256.attr_mod), + cl_ntoh64(ntc->data_details.ntc_256.mkey)); + break; + case SM_BAD_PKEY_TRAP: + case SM_BAD_QKEY_TRAP: + gid1 = &ntc->data_details.ntc_257_258.gid1; + gid2 = &ntc->data_details.ntc_257_258.gid2; + OSM_LOG(log, level, + "Reporting Security Notice \"%s\" from LID %u, " + "GUID 0x%016" PRIx64 " : LID1 %u, LID2 %u, %s 0x%x, " + "SL %d, QP1 0x%x, QP2 0x%x, GID1 %s, GID2 %s\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id), + cl_ntoh16(ntc->data_details.ntc_257_258.lid1), + cl_ntoh16(ntc->data_details.ntc_257_258.lid2), + cl_ntoh16(ntc->g_or_v.generic.trap_num) == SM_BAD_QKEY_TRAP ? + "Q_Key" : "P_Key", + cl_ntoh32(ntc->data_details.ntc_257_258.key), + cl_ntoh32(ntc->data_details.ntc_257_258.qp1) >> 28, + cl_ntoh32(ntc->data_details.ntc_257_258.qp1) & 0xffffff, + cl_ntoh32(ntc->data_details.ntc_257_258.qp2) & 0xffffff, + inet_ntop(AF_INET6, gid1->raw, gid_str, sizeof gid_str), + inet_ntop(AF_INET6, gid2->raw, gid_str2, sizeof gid_str2)); + break; + case SM_BAD_SWITCH_PKEY_TRAP: + gid1 = &ntc->data_details.ntc_259.gid1; + gid2 = &ntc->data_details.ntc_259.gid2; + OSM_LOG(log, level, + "Reporting Security Notice \"%s\" from switch LID %u, " + "GUID 0x%016" PRIx64 " port %d : data_valid 0x%04x, " + "LID1 %u, LID2 %u, PKey 0x%04x, " + "SL %d, QP1 0x%x, QP2 0x%x, GID1 %s, GID2 %s\n", + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + cl_ntoh64(gid->unicast.interface_id), + ntc->data_details.ntc_259.port_no, + cl_ntoh16(ntc->data_details.ntc_259.data_valid), + cl_ntoh16(ntc->data_details.ntc_259.lid1), + cl_ntoh16(ntc->data_details.ntc_259.lid2), + cl_ntoh16(ntc->data_details.ntc_257_258.key), + cl_ntoh32(ntc->data_details.ntc_259.sl_qp1) >> 28, + cl_ntoh32(ntc->data_details.ntc_259.sl_qp1) & 0xffffff, + cl_ntoh32(ntc->data_details.ntc_259.qp2), + inet_ntop(AF_INET6, gid1->raw, gid_str, sizeof gid_str), + inet_ntop(AF_INET6, gid2->raw, gid_str2, sizeof gid_str2)); + break; + default: + OSM_LOG(log, level, + "Reporting Generic Notice type:%u num:%u (%s)" + " from LID:%u GID:%s\n", + ib_notice_get_type(ntc), + cl_ntoh16(ntc->g_or_v.generic.trap_num), + ib_get_trap_str(ntc->g_or_v.generic.trap_num), + cl_ntoh16(ntc->issuer_lid), + inet_ntop(AF_INET6, gid->raw, gid_str, sizeof gid_str)); + break; + } + } else + OSM_LOG(log, level, + "Reporting Vendor Notice type:%u vend:%u dev:%u" + " from LID:%u GID:%s\n", + ib_notice_get_type(ntc), + cl_ntoh32(ib_notice_get_vend_id(ntc)), + cl_ntoh16(ntc->g_or_v.vend.dev_id), + cl_ntoh16(ntc->issuer_lid), + inet_ntop(AF_INET6, ntc->issuer_gid.raw, gid_str, + sizeof gid_str)); +} + +ib_api_status_t osm_report_notice(IN osm_log_t * p_log, IN osm_subn_t * p_subn, + IN ib_mad_notice_attr_t * p_ntc) +{ + osm_infr_match_ctxt_t context; + cl_list_t infr_to_remove_list; + osm_infr_t *p_infr_rec; + osm_infr_t *p_next_infr_rec; + + OSM_LOG_ENTER(p_log); + + /* + * we must make sure we are ready for this... + * note that the trap receivers might be initialized before + * the osm_infr_init call is performed. + */ + if (p_subn->sa_infr_list.state != CL_INITIALIZED) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Ignoring Notice Reports since Inform List is not initialized yet!\n"); + return IB_ERROR; + } + + if (OSM_LOG_IS_ACTIVE_V2(p_log, OSM_LOG_INFO)) + log_notice(p_log, OSM_LOG_INFO, p_ntc); + + /* Create a list that will hold all the infr records that should + be removed due to violation. o13-17.1.2 */ + cl_list_construct(&infr_to_remove_list); + cl_list_init(&infr_to_remove_list, 5); + context.p_remove_infr_list = &infr_to_remove_list; + context.p_ntc = p_ntc; + + /* go over all inform info available at the subnet */ + /* try match to the given notice and send if match */ + cl_qlist_apply_func(&p_subn->sa_infr_list, match_notice_to_inf_rec, + &context); + + /* If we inserted items into the infr_to_remove_list - we need to + remove them */ + p_infr_rec = (osm_infr_t *) cl_list_remove_head(&infr_to_remove_list); + while (p_infr_rec != NULL) { + p_next_infr_rec = + (osm_infr_t *) cl_list_remove_head(&infr_to_remove_list); + osm_infr_remove_from_db(p_subn, p_log, p_infr_rec); + p_infr_rec = p_next_infr_rec; + } + cl_list_destroy(&infr_to_remove_list); + + /* report IB traps to plugin */ + osm_opensm_report_event(p_subn->p_osm, OSM_EVENT_ID_TRAP, p_ntc); + + OSM_LOG_EXIT(p_log); + + return IB_SUCCESS; +} diff --git a/opensm/osm_lid_mgr.c b/opensm/osm_lid_mgr.c new file mode 100644 index 0000000..e8be268 --- /dev/null +++ b/opensm/osm_lid_mgr.c @@ -0,0 +1,1250 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_lid_mgr_t. + * This file implements the LID Manager object which is responsible for + * assigning LIDs to all ports on the subnet. + * + * DATA STRUCTURES: + * p_subn->port_lid_tbl : a vector pointing from lid to its port. + * osm db guid2lid domain : a hash from guid to lid (min lid). + * p_subn->port_guid_tbl : a map from guid to discovered port obj. + * + * ALGORITHM: + * + * 0. we define a function to obtain the correct port lid: + * lid_mgr_get_port_lid( p_mgr, port, &min_lid, &max_lid ): + * 0.1 if the port info lid matches the guid2lid return 0 + * 0.2 if the port info has a lid and that range is empty in + * port_lid_tbl, return 0 and update the port_lid_tbl and + * guid2lid + * 0.3 else find an empty space in port_lid_tbl, update the + * port_lid_tbl and guid2lid, return 1 to flag a change required. + * + * 1. During initialization: + * 1.1 initialize the guid2lid database domain. + * 1.2 if reassign_lid is not set: + * 1.2.1 read the persistent data for the domain. + * 1.2.2 validate no duplicate use of lids and lids are 2^(lmc-1) + * + * 2. During SM port lid assignment: + * 2.1 if reassign_lids is set, make it 2^lmc + * 2.2 cleanup all port_lid_tbl and re-fill it according to guid2lid + * 2.3 call lid_mgr_get_port_lid for the SM port + * 2.4 set the port info + * + * 3. During all other ports lid assignment: + * 3.1 go through all ports in the subnet + * 3.1.1 call lid_mgr_get_port_lid + * 3.1.2 if a change required send the port info + * 3.2 if any change send the signal PENDING... + * + * 4. Store the guid2lid + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_LID_MGR_C +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/********************************************************************** + lid range item of qlist + **********************************************************************/ +typedef struct osm_lid_mgr_range { + cl_list_item_t item; + uint16_t min_lid; + uint16_t max_lid; +} osm_lid_mgr_range_t; + +void osm_lid_mgr_construct(IN osm_lid_mgr_t * p_mgr) +{ + memset(p_mgr, 0, sizeof(*p_mgr)); +} + +void osm_lid_mgr_destroy(IN osm_lid_mgr_t * p_mgr) +{ + cl_list_item_t *p_item; + + OSM_LOG_ENTER(p_mgr->p_log); + + while ((p_item = cl_qlist_remove_head(&p_mgr->free_ranges)) != + cl_qlist_end(&p_mgr->free_ranges)) + free((osm_lid_mgr_range_t *) p_item); + OSM_LOG_EXIT(p_mgr->p_log); +} + +/********************************************************************** +Validate the guid to lid data by making sure that under the current +LMC we did not get duplicates. If we do flag them as errors and remove +the entry. +**********************************************************************/ +static void lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) +{ + cl_qlist_t guids; + osm_db_guid_elem_t *p_item; + uint16_t lid; + uint16_t min_lid; + uint16_t max_lid; + uint16_t lmc_mask; + boolean_t lids_ok; + uint8_t lmc_num_lids = (uint8_t) (1 << p_mgr->p_subn->opt.lmc); + + OSM_LOG_ENTER(p_mgr->p_log); + + lmc_mask = ~(lmc_num_lids - 1); + + cl_qlist_init(&guids); + + if (osm_db_guid2lid_guids(p_mgr->p_g2l, &guids)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0310: " + "could not get guid list\n"); + goto Exit; + } + + while ((p_item = (osm_db_guid_elem_t *) cl_qlist_remove_head(&guids)) + != (osm_db_guid_elem_t *) cl_qlist_end(&guids)) { + if (osm_db_guid2lid_get(p_mgr->p_g2l, p_item->guid, + &min_lid, &max_lid)) + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0311: " + "could not get lid for guid:0x%016" PRIx64 "\n", + p_item->guid); + else { + lids_ok = TRUE; + + if (min_lid > max_lid || min_lid == 0 + || p_item->guid == 0 + || max_lid > p_mgr->p_subn->max_ucast_lid_ho) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR 0312: " + "Illegal LID range [%u:%u] for " + "guid:0x%016" PRIx64 "\n", min_lid, + max_lid, p_item->guid); + lids_ok = FALSE; + } else if (min_lid != max_lid + && (min_lid & lmc_mask) != min_lid) { + /* check that if the lids define a range that is + valid for the current LMC mask */ + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR 0313: " + "LID range [%u:%u] for guid:0x%016" + PRIx64 + " is not aligned according to mask:0x%04x\n", + min_lid, max_lid, p_item->guid, + lmc_mask); + lids_ok = FALSE; + } else { + /* check if the lids were not previously assigned */ + for (lid = min_lid; lid <= max_lid; lid++) { + if (p_mgr->used_lids[lid]) { + OSM_LOG(p_mgr->p_log, + OSM_LOG_ERROR, + "ERR 0314: " + "0x%04x for guid:0x%016" + PRIx64 + " was previously used\n", + lid, p_item->guid); + lids_ok = FALSE; + } + } + } + + if (lids_ok) + /* mark that it was visited */ + for (lid = min_lid; lid <= max_lid; lid++) { + if (lid < min_lid + lmc_num_lids) + p_mgr->used_lids[lid] = 1; + } + else if (osm_db_guid2lid_delete(p_mgr->p_g2l, + p_item->guid)) + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR 0315: failed to delete entry for " + "guid:0x%016" PRIx64 "\n", + p_item->guid); + } /* got a lid */ + free(p_item); + } /* all guids */ +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} + +ib_api_status_t osm_lid_mgr_init(IN osm_lid_mgr_t * p_mgr, IN osm_sm_t * sm) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sm->p_log); + + osm_lid_mgr_construct(p_mgr); + + p_mgr->sm = sm; + p_mgr->p_log = sm->p_log; + p_mgr->p_subn = sm->p_subn; + p_mgr->p_db = sm->p_db; + p_mgr->p_lock = sm->p_lock; + + /* we initialize and restore the db domain of guid to lid map */ + p_mgr->p_g2l = osm_db_domain_init(p_mgr->p_db, "guid2lid"); + if (!p_mgr->p_g2l) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0316: " + "Error initializing Guid-to-Lid persistent database\n"); + status = IB_ERROR; + goto Exit; + } + + cl_qlist_init(&p_mgr->free_ranges); + + /* we use the stored guid to lid table if not forced to reassign */ + if (!p_mgr->p_subn->opt.reassign_lids) { + if (osm_db_restore(p_mgr->p_g2l)) { +#ifndef __WIN__ + /* + * When Windows is BSODing, it might corrupt files that + * were previously opened for writing, even if the files + * are closed, so we might see corrupted guid2lid file. + */ + if (p_mgr->p_subn->opt.exit_on_fatal) { + osm_log_v2(p_mgr->p_log, OSM_LOG_SYS, FILE_ID, + "FATAL: Error restoring Guid-to-Lid " + "persistent database\n"); + status = IB_ERROR; + goto Exit; + } else +#endif + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR 0317: Error restoring Guid-to-Lid " + "persistent database\n"); + } + + /* we need to make sure we did not get duplicates with + current lmc */ + lid_mgr_validate_db(p_mgr); + } + +Exit: + OSM_LOG_EXIT(p_mgr->p_log); + return status; +} + +static uint16_t trim_lid(IN uint16_t lid) +{ + if (lid > IB_LID_UCAST_END_HO || lid < IB_LID_UCAST_START_HO) + return 0; + return lid; +} + +/********************************************************************** + initialize the manager for a new sweep: + scans the known persistent assignment and port_lid_tbl + re-calculate all empty ranges. + cleanup invalid port_lid_tbl entries +**********************************************************************/ +static int lid_mgr_init_sweep(IN osm_lid_mgr_t * p_mgr) +{ + cl_ptr_vector_t *p_discovered_vec = &p_mgr->p_subn->port_lid_tbl; + uint16_t max_defined_lid, max_persistent_lid, max_discovered_lid; + uint16_t disc_min_lid, disc_max_lid, db_min_lid, db_max_lid; + int status = 0; + cl_list_item_t *p_item; + boolean_t is_free; + osm_lid_mgr_range_t *p_range = NULL; + osm_port_t *p_port; + cl_qmap_t *p_port_guid_tbl; + uint8_t lmc_num_lids = (uint8_t) (1 << p_mgr->p_subn->opt.lmc); + uint16_t lmc_mask, req_lid, num_lids, lid; + + OSM_LOG_ENTER(p_mgr->p_log); + + lmc_mask = ~((1 << p_mgr->p_subn->opt.lmc) - 1); + + /* We must discard previous guid2lid db if this is the first master + * sweep and reassign_lids option is TRUE. + * If we came out of standby and honor_guid2lid_file option is TRUE, we + * must restore guid2lid db. Otherwise if honor_guid2lid_file option is + * FALSE we must discard previous guid2lid db. + */ + if (p_mgr->p_subn->first_time_master_sweep == TRUE && + p_mgr->p_subn->opt.reassign_lids == TRUE) { + osm_db_clear(p_mgr->p_g2l); + memset(p_mgr->used_lids, 0, sizeof(p_mgr->used_lids)); + } else if (p_mgr->p_subn->coming_out_of_standby == TRUE) { + osm_db_clear(p_mgr->p_g2l); + memset(p_mgr->used_lids, 0, sizeof(p_mgr->used_lids)); + if (p_mgr->p_subn->opt.honor_guid2lid_file == FALSE) + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Ignore guid2lid file when coming out of standby\n"); + else { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Honor current guid2lid file when coming out " + "of standby\n"); + if (osm_db_restore(p_mgr->p_g2l)) + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR 0306: " + "Error restoring Guid-to-Lid " + "persistent database. Ignoring it\n"); + lid_mgr_validate_db(p_mgr); + } + } + + /* we need to cleanup the empty ranges list */ + while ((p_item = cl_qlist_remove_head(&p_mgr->free_ranges)) != + cl_qlist_end(&p_mgr->free_ranges)) + free((osm_lid_mgr_range_t *) p_item); + + /* first clean up the port_by_lid_tbl */ + for (lid = 0; lid < cl_ptr_vector_get_size(p_discovered_vec); lid++) + cl_ptr_vector_set(p_discovered_vec, lid, NULL); + + /* we if are in the first sweep and in reassign lids mode + we should ignore all the available info and simply define one + huge empty range */ + if (p_mgr->p_subn->first_time_master_sweep == TRUE && + p_mgr->p_subn->opt.reassign_lids == TRUE) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Skipping all lids as we are reassigning them\n"); + p_range = malloc(sizeof(osm_lid_mgr_range_t)); + if (p_range) + p_range->min_lid = 1; + goto AfterScanningLids; + } + + /* go over all discovered ports and mark their entries */ + p_port_guid_tbl = &p_mgr->p_subn->port_guid_tbl; + + for (p_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); + p_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl); + p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item)) { + osm_port_get_lid_range_ho(p_port, &disc_min_lid, &disc_max_lid); + disc_min_lid = trim_lid(disc_min_lid); + disc_max_lid = trim_lid(disc_max_lid); + for (lid = disc_min_lid; lid <= disc_max_lid; lid++) { + if (lid < disc_min_lid + lmc_num_lids) + cl_ptr_vector_set(p_discovered_vec, lid, p_port); + else + cl_ptr_vector_set(p_discovered_vec, lid, NULL); + } + /* make sure the guid2lid entry is valid. If not, clean it. */ + if (osm_db_guid2lid_get(p_mgr->p_g2l, + cl_ntoh64(osm_port_get_guid(p_port)), + &db_min_lid, &db_max_lid)) + continue; + + if (!p_port->p_node->sw || + osm_switch_sp0_is_lmc_capable(p_port->p_node->sw, + p_mgr->p_subn)) + num_lids = lmc_num_lids; + else + num_lids = 1; + + if (num_lids != 1 && + ((db_min_lid & lmc_mask) != db_min_lid || + db_max_lid - db_min_lid + 1 < num_lids)) { + /* Not aligned, or not wide enough, then remove the entry */ + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Cleaning persistent entry for guid:" + "0x%016" PRIx64 " illegal range:[0x%x:0x%x]\n", + cl_ntoh64(osm_port_get_guid(p_port)), + db_min_lid, db_max_lid); + osm_db_guid2lid_delete(p_mgr->p_g2l, + cl_ntoh64 + (osm_port_get_guid(p_port))); + for (lid = db_min_lid; lid <= db_max_lid; lid++) + p_mgr->used_lids[lid] = 0; + } + } + + /* + Our task is to find free lid ranges. + A lid can be used if + 1. a persistent assignment exists + 2. the lid is used by a discovered port that does not have a + persistent assignment. + + scan through all lid values of both the persistent table and + discovered table. + If the lid has an assigned port in the discovered table: + * make sure the lid matches the persistent table, or + * there is no other persistent assignment for that lid. + * else cleanup the port_by_lid_tbl, mark this as empty range. + Else if the lid does not have an entry in the persistent table + mark it as free. + */ + + /* find the range of lids to scan */ + max_discovered_lid = + (uint16_t) cl_ptr_vector_get_size(p_discovered_vec); + max_persistent_lid = sizeof(p_mgr->used_lids) - 1; + + /* but the vectors have one extra entry for lid=0 */ + if (max_discovered_lid) + max_discovered_lid--; + + if (max_persistent_lid > max_discovered_lid) + max_defined_lid = max_persistent_lid; + else + max_defined_lid = max_discovered_lid; + + for (lid = 1; lid <= max_defined_lid; lid++) { + is_free = TRUE; + /* first check to see if the lid is used by a persistent assignment */ + if (lid <= max_persistent_lid && p_mgr->used_lids[lid]) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%04x is not free as its mapped by the " + "persistent db\n", lid); + is_free = FALSE; + /* check this is a discovered port */ + } else if (lid <= max_discovered_lid && + (p_port = cl_ptr_vector_get(p_discovered_vec, + lid))) { + /* we have a port. Now lets see if we can preserve its lid range. */ + /* For that, we need to make sure: + 1. The port has a (legal) persistency entry. Then the + local lid is free (we will use the persistency value). + 2. Can the port keep its local assignment? + a. Make sure the lid is aligned. + b. Make sure all needed lids (for the lmc) are free + according to persistency table. + */ + /* qualify the guid of the port is not persistently + mapped to another range */ + if (!osm_db_guid2lid_get(p_mgr->p_g2l, + cl_ntoh64 + (osm_port_get_guid(p_port)), + &db_min_lid, &db_max_lid)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%04x is free as it was " + "discovered but mapped by the " + "persistent db to [0x%04x:0x%04x]\n", + lid, db_min_lid, db_max_lid); + } else { + /* can the port keep its assignment ? */ + /* get the lid range of that port, and the + required number of lids we are about to + assign to it */ + osm_port_get_lid_range_ho(p_port, + &disc_min_lid, + &disc_max_lid); + if (!p_port->p_node->sw || + osm_switch_sp0_is_lmc_capable + (p_port->p_node->sw, p_mgr->p_subn)) { + disc_max_lid = + disc_min_lid + lmc_num_lids - 1; + num_lids = lmc_num_lids; + } else + num_lids = 1; + + /* Make sure the lid is aligned */ + if (num_lids != 1 + && (disc_min_lid & lmc_mask) != + disc_min_lid) { + /* The lid cannot be used */ + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%04x is free as it was " + "discovered but not aligned\n", + lid); + } else { + /* check that all needed lids are not persistently mapped */ + is_free = FALSE; + for (req_lid = disc_min_lid + 1; + req_lid <= disc_max_lid; + req_lid++) { + if (req_lid <= + max_persistent_lid && + p_mgr->used_lids[req_lid]) { + OSM_LOG(p_mgr->p_log, + OSM_LOG_DEBUG, + "0x%04x is free as it was discovered " + "but mapped\n", + lid); + is_free = TRUE; + break; + } + } + + if (is_free == FALSE) { + /* This port will use its local lid, and consume the entire required lid range. + Thus we can skip that range. */ + /* If the disc_max_lid is greater then lid, we can skip right to it, + since we've done all neccessary checks on the lids in between. */ + if (disc_max_lid > lid) + lid = disc_max_lid; + } + } + } + } + + if (is_free) { + if (p_range) + p_range->max_lid = lid; + else { + p_range = malloc(sizeof(osm_lid_mgr_range_t)); + if (p_range) { + p_range->min_lid = lid; + p_range->max_lid = lid; + } + } + /* this lid is used so we need to finalize the previous free range */ + } else if (p_range) { + cl_qlist_insert_tail(&p_mgr->free_ranges, + &p_range->item); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "new free lid range [%u:%u]\n", + p_range->min_lid, p_range->max_lid); + p_range = NULL; + } + } + +AfterScanningLids: + /* after scanning all known lids we need to extend the last range + to the max allowed lid */ + if (!p_range) { + p_range = malloc(sizeof(osm_lid_mgr_range_t)); + /* + The p_range can be NULL in one of 2 cases: + 1. If max_defined_lid == 0. In this case, we want the + entire range. + 2. If all lids discovered in the loop where mapped. In this + case, no free range exists and we want to define it after the + last mapped lid. + */ + if (p_range) + p_range->min_lid = lid; + } + if (p_range) { + p_range->max_lid = p_mgr->p_subn->max_ucast_lid_ho; + cl_qlist_insert_tail(&p_mgr->free_ranges, &p_range->item); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "final free lid range [%u:%u]\n", + p_range->min_lid, p_range->max_lid); + } + + OSM_LOG_EXIT(p_mgr->p_log); + return status; +} + +/********************************************************************** + check if the given range of lids is free +**********************************************************************/ +static boolean_t lid_mgr_is_range_not_persistent(IN osm_lid_mgr_t * p_mgr, + IN uint16_t lid, + IN uint16_t num_lids) +{ + uint16_t i; + + for (i = lid; i < lid + num_lids; i++) + if (p_mgr->used_lids[i]) + return FALSE; + + return TRUE; +} + +/********************************************************************** +find a free lid range +**********************************************************************/ +static void lid_mgr_find_free_lid_range(IN osm_lid_mgr_t * p_mgr, + IN uint8_t num_lids, + OUT uint16_t * p_min_lid, + OUT uint16_t * p_max_lid) +{ + uint16_t lid; + cl_list_item_t *p_item; + cl_list_item_t *p_next_item; + osm_lid_mgr_range_t *p_range = NULL; + uint8_t lmc_num_lids; + uint16_t lmc_mask; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "LMC = %u, number LIDs = %u\n", + p_mgr->p_subn->opt.lmc, num_lids); + + lmc_num_lids = (1 << p_mgr->p_subn->opt.lmc); + lmc_mask = ~((1 << p_mgr->p_subn->opt.lmc) - 1); + + /* + Search the list of free lid ranges for a range which is big enough + */ + p_item = cl_qlist_head(&p_mgr->free_ranges); + while (p_item != cl_qlist_end(&p_mgr->free_ranges)) { + p_next_item = cl_qlist_next(p_item); + p_range = (osm_lid_mgr_range_t *) p_item; + + lid = p_range->min_lid; + + /* if we require more then one lid we must align to LMC */ + if (num_lids > 1) { + if ((lid & lmc_mask) != lid) + lid = (lid + lmc_num_lids) & lmc_mask; + } + + /* but we can be out of the range */ + if (lid + num_lids - 1 <= p_range->max_lid) { + /* ok let us use that range */ + if (lid + num_lids - 1 == p_range->max_lid) { + /* we consumed the entire range */ + cl_qlist_remove_item(&p_mgr->free_ranges, + p_item); + free(p_item); + } else + /* only update the available range */ + p_range->min_lid = lid + num_lids; + + *p_min_lid = lid; + *p_max_lid = (uint16_t) (lid + num_lids - 1); + return; + } + p_item = p_next_item; + } + + /* + Couldn't find a free range of lids. + */ + *p_min_lid = *p_max_lid = 0; + /* if we run out of lids, give an error and abort! */ + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0307: " + "OPENSM RAN OUT OF LIDS!!!\n"); + CL_ASSERT(0); +} + +static void lid_mgr_cleanup_discovered_port_lid_range(IN osm_lid_mgr_t * p_mgr, + IN osm_port_t * p_port) +{ + cl_ptr_vector_t *p_discovered_vec = &p_mgr->p_subn->port_lid_tbl; + uint16_t lid, min_lid, max_lid; + uint16_t max_tbl_lid = + (uint16_t) (cl_ptr_vector_get_size(p_discovered_vec)); + + osm_port_get_lid_range_ho(p_port, &min_lid, &max_lid); + min_lid = trim_lid(min_lid); + max_lid = trim_lid(max_lid); + for (lid = min_lid; lid <= max_lid; lid++) + if (lid < max_tbl_lid && + p_port == cl_ptr_vector_get(p_discovered_vec, lid)) + cl_ptr_vector_set(p_discovered_vec, lid, NULL); +} + +/********************************************************************** + 0.1 if the port info lid matches the guid2lid return 0 + 0.2 if the port info has a lid and that range is empty in + port_lid_tbl, return 0 and update the port_lid_tbl and + guid2lid + 0.3 else find an empty space in port_lid_tbl, update the + port_lid_tbl and guid2lid, return 1 to flag a change required. +**********************************************************************/ +static int lid_mgr_get_port_lid(IN osm_lid_mgr_t * p_mgr, + IN osm_port_t * p_port, + OUT uint16_t * p_min_lid, + OUT uint16_t * p_max_lid) +{ + uint16_t lid, min_lid, max_lid; + uint64_t guid; + uint8_t num_lids = (1 << p_mgr->p_subn->opt.lmc); + int lid_changed = 0; + uint16_t lmc_mask; + + OSM_LOG_ENTER(p_mgr->p_log); + + /* get the lid from the guid2lid */ + guid = cl_ntoh64(osm_port_get_guid(p_port)); + + /* if the port is a base switch port 0 then we only need one lid */ + if (p_port->p_node->sw && + !osm_switch_sp0_is_lmc_capable(p_port->p_node->sw, p_mgr->p_subn)) + num_lids = 1; + + if (p_mgr->p_subn->first_time_master_sweep == TRUE && + p_mgr->p_subn->opt.reassign_lids == TRUE) + goto AssignLid; + + lmc_mask = ~(num_lids - 1); + + /* if the port matches the guid2lid */ + if (!osm_db_guid2lid_get(p_mgr->p_g2l, guid, &min_lid, &max_lid)) { + *p_min_lid = min_lid; + *p_max_lid = min_lid + num_lids - 1; + if (min_lid == cl_ntoh16(osm_port_get_base_lid(p_port))) + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%016" PRIx64 + " matches its known lid:%u\n", guid, min_lid); + else { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%016" PRIx64 " with lid:%u " + "does not match its known lid:%u\n", + guid, cl_ntoh16(osm_port_get_base_lid(p_port)), + min_lid); + lid_mgr_cleanup_discovered_port_lid_range(p_mgr, + p_port); + /* we still need to send the setting to the target port */ + lid_changed = 1; + } + goto NewLidSet; + } else + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%016" PRIx64 " has no persistent lid assigned\n", + guid); + + /* if the port info carries a lid it must be lmc aligned and not mapped + by the persistent storage */ + min_lid = cl_ntoh16(osm_port_get_base_lid(p_port)); + + /* we want to ignore the discovered lid if we are also on first sweep of + reassign lids flow */ + if (min_lid) { + /* make sure lid is valid */ + if ((min_lid & lmc_mask) == min_lid) { + /* is it free */ + if (lid_mgr_is_range_not_persistent + (p_mgr, min_lid, num_lids)) { + *p_min_lid = min_lid; + *p_max_lid = min_lid + num_lids - 1; + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%016" PRIx64 + " lid range:[%u-%u] is free\n", + guid, *p_min_lid, *p_max_lid); + goto NewLidSet; + } else + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%016" PRIx64 " existing lid " + "range:[%u:%u] is not free\n", + guid, min_lid, min_lid + num_lids - 1); + } else + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%016" PRIx64 " existing lid range:" + "[%u:%u] is not lmc aligned\n", + guid, min_lid, min_lid + num_lids - 1); + } + +AssignLid: + /* first cleanup the existing discovered lid range */ + lid_mgr_cleanup_discovered_port_lid_range(p_mgr, p_port); + + /* find an empty space */ + lid_mgr_find_free_lid_range(p_mgr, num_lids, p_min_lid, p_max_lid); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%016" PRIx64 " assigned a new lid range:[%u-%u]\n", + guid, *p_min_lid, *p_max_lid); + lid_changed = 1; + +NewLidSet: + /* update the guid2lid db and used_lids */ + osm_db_guid2lid_set(p_mgr->p_g2l, guid, *p_min_lid, *p_max_lid); + for (lid = *p_min_lid; lid <= *p_max_lid; lid++) + p_mgr->used_lids[lid] = 1; + + /* make sure the assigned lids are marked in port_lid_tbl */ + for (lid = *p_min_lid; lid <= *p_max_lid; lid++) + cl_ptr_vector_set(&p_mgr->p_subn->port_lid_tbl, lid, p_port); + + OSM_LOG_EXIT(p_mgr->p_log); + return lid_changed; +} + +/********************************************************************** + Set to INIT the remote port of the given physical port + **********************************************************************/ +static void lid_mgr_set_remote_pi_state_to_init(IN osm_lid_mgr_t * p_mgr, + IN osm_physp_t * p_physp) +{ + osm_physp_t *p_rem_physp = osm_physp_get_remote(p_physp); + + if (p_rem_physp == NULL) + return; + + /* but in some rare cases the remote side might be non responsive */ + ib_port_info_set_port_state(&p_rem_physp->port_info, IB_LINK_INIT); +} + +static int lid_mgr_set_physp_pi(IN osm_lid_mgr_t * p_mgr, + IN osm_port_t * p_port, + IN osm_physp_t * p_physp, IN ib_net16_t lid) +{ + uint8_t payload[IB_SMP_DATA_SIZE]; + ib_port_info_t *p_pi = (ib_port_info_t *) payload; + const ib_port_info_t *p_old_pi; + osm_madw_context_t context; + osm_node_t *p_node; + ib_api_status_t status; + uint8_t mtu; + uint8_t op_vls; + uint8_t port_num; + boolean_t send_set = FALSE; + boolean_t send_client_rereg = FALSE; + boolean_t update_mkey = FALSE; + int ret = 0; + + OSM_LOG_ENTER(p_mgr->p_log); + + /* + Don't bother doing anything if this Physical Port is not valid. + This allows simplified code in the caller. + */ + if (!p_physp) + goto Exit; + + port_num = osm_physp_get_port_num(p_physp); + p_node = osm_physp_get_node_ptr(p_physp); + + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num != 0) { + /* + Switch ports that are not numbered 0 should not be set + with the following attributes as they are set later + (during NO_CHANGE state in link mgr). + */ + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Skipping switch port %u, GUID 0x%016" PRIx64 "\n", + port_num, cl_ntoh64(osm_physp_get_port_guid(p_physp))); + goto Exit; + } + + p_old_pi = &p_physp->port_info; + + /* + First, copy existing parameters from the PortInfo attribute we + already have for this node. + + Second, update with default values that we know must be set for + every Physical Port and the LID and set the neighbor MTU field + appropriately. + + Third, send the SMP to this physical port. + */ + + memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); + + /* + Should never write back a value that is bigger then 3 in + the PortPhysicalState field, so cannot simply copy! + + Actually we want to write there: + port physical state - no change + link down default state = polling + port state - no change + */ + p_pi->state_info2 = 0x02; + ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); + + if (ib_port_info_get_link_down_def_state(p_pi) != + ib_port_info_get_link_down_def_state(p_old_pi)) + send_set = TRUE; + + /* didn't get PortInfo before */ + if (!ib_port_info_get_port_state(p_old_pi)) + send_set = TRUE; + + p_pi->m_key = p_mgr->p_subn->opt.m_key; + if (memcmp(&p_pi->m_key, &p_old_pi->m_key, sizeof(p_pi->m_key))) { + update_mkey = TRUE; + send_set = TRUE; + } + + p_pi->subnet_prefix = p_mgr->p_subn->opt.subnet_prefix; + if (memcmp(&p_pi->subnet_prefix, &p_old_pi->subnet_prefix, + sizeof(p_pi->subnet_prefix))) + send_set = TRUE; + + p_port->lid = lid; + p_pi->base_lid = lid; + if (memcmp(&p_pi->base_lid, &p_old_pi->base_lid, + sizeof(p_pi->base_lid))) { + /* + * Reset stored base_lid. + * On successful send, we'll update it when we'll get a reply. + */ + osm_physp_set_base_lid(p_physp, 0); + send_set = TRUE; + p_mgr->dirty = TRUE; + } + + /* + We are updating the ports with our local sm_base_lid + if for some reason currently received SM LID is different from our SM LID, + need to send client reregister to this port + */ + p_pi->master_sm_base_lid = p_mgr->p_subn->sm_base_lid; + if (memcmp(&p_pi->master_sm_base_lid, &p_old_pi->master_sm_base_lid, + sizeof(p_pi->master_sm_base_lid))) { + send_client_rereg = TRUE; + send_set = TRUE; + } + + p_pi->m_key_lease_period = p_mgr->p_subn->opt.m_key_lease_period; + if (memcmp(&p_pi->m_key_lease_period, &p_old_pi->m_key_lease_period, + sizeof(p_pi->m_key_lease_period))) + send_set = TRUE; + + p_pi->mkey_lmc = 0; + ib_port_info_set_mpb(p_pi, p_mgr->p_subn->opt.m_key_protect_bits); + if (ib_port_info_get_mpb(p_pi) != ib_port_info_get_mpb(p_old_pi)) + send_set = TRUE; + + /* + we want to set the timeout for both the switch port 0 + and the CA ports + */ + ib_port_info_set_timeout(p_pi, p_mgr->p_subn->opt.subnet_timeout); + if (ib_port_info_get_timeout(p_pi) != + ib_port_info_get_timeout(p_old_pi)) + send_set = TRUE; + + if (port_num != 0) { + /* + CAs don't have a port 0, and for switch port 0, + the state bits are ignored. + This is not the switch management port + */ + + /* p_pi->mkey_lmc is initialized earlier */ + ib_port_info_set_lmc(p_pi, p_mgr->p_subn->opt.lmc); + if (ib_port_info_get_lmc(p_pi) != + ib_port_info_get_lmc(p_old_pi)) + send_set = TRUE; + + /* calc new op_vls and mtu */ + op_vls = osm_physp_calc_link_op_vls(p_mgr->p_log, p_mgr->p_subn, + p_physp, + ib_port_info_get_op_vls(p_old_pi)); + mtu = osm_physp_calc_link_mtu(p_mgr->p_log, p_physp, + ib_port_info_get_neighbor_mtu(p_old_pi)); + + ib_port_info_set_neighbor_mtu(p_pi, mtu); + + if (ib_port_info_get_neighbor_mtu(p_pi) != + ib_port_info_get_neighbor_mtu(p_old_pi)) + send_set = TRUE; + + ib_port_info_set_op_vls(p_pi, op_vls); + if (ib_port_info_get_op_vls(p_pi) != + ib_port_info_get_op_vls(p_old_pi)) + send_set = TRUE; + + /* + Several timeout mechanisms: + */ + ib_port_info_set_phy_and_overrun_err_thd(p_pi, + p_mgr->p_subn->opt. + local_phy_errors_threshold, + p_mgr->p_subn->opt. + overrun_errors_threshold); + + if (p_pi->error_threshold != p_old_pi->error_threshold) + send_set = TRUE; + + /* + To reset the port state machine we can send + PortInfo.State = DOWN. (see: 7.2.7 p171 lines:10-19) + */ + if (mtu != ib_port_info_get_neighbor_mtu(p_old_pi) || + op_vls != ib_port_info_get_op_vls(p_old_pi)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Sending Link Down to GUID 0x%016" + PRIx64 " port %d due to op_vls or " + "mtu change. MTU:%u,%u VL_CAP:%u,%u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + port_num, mtu, + ib_port_info_get_neighbor_mtu(p_old_pi), + op_vls, ib_port_info_get_op_vls(p_old_pi)); + + /* + we need to make sure the internal DB will follow the + fact that the remote port is also going through + "down" state into "init"... + */ + lid_mgr_set_remote_pi_state_to_init(p_mgr, p_physp); + + ib_port_info_set_port_state(p_pi, IB_LINK_DOWN); + if (ib_port_info_get_port_state(p_pi) != + ib_port_info_get_port_state(p_old_pi)) + send_set = TRUE; + } + } else if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)) { + /* + * Configure Enh. SP0: + * Set MTU according to the mtu_cap. + * Set LMC if lmc_esp0 is defined. + */ + ib_port_info_set_neighbor_mtu(p_pi, + ib_port_info_get_mtu_cap + (p_old_pi)); + if (ib_port_info_get_neighbor_mtu(p_pi) != + ib_port_info_get_neighbor_mtu(p_old_pi)) + send_set = TRUE; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Updating neighbor_mtu on switch GUID 0x%016" PRIx64 + " port 0 to:%u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + ib_port_info_get_neighbor_mtu(p_pi)); + + /* Configure LMC on enhanced SP0 */ + if (p_mgr->p_subn->opt.lmc_esp0) { + /* p_pi->mkey_lmc is initialized earlier */ + ib_port_info_set_lmc(p_pi, p_mgr->p_subn->opt.lmc); + if (ib_port_info_get_lmc(p_pi) != + ib_port_info_get_lmc(p_old_pi)) + send_set = TRUE; + } + } + + context.pi_context.node_guid = osm_node_get_node_guid(p_node); + context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); + context.pi_context.set_method = TRUE; + context.pi_context.light_sweep = FALSE; + context.pi_context.active_transition = FALSE; + + /* + For ports supporting the ClientReregistration Vol1 (v1.2) p811 14.4.11: + need to set the cli_rereg bit when current SM LID at the Host + is different from our SM LID, + also if we are in first_time_master_sweep, + also if this port was just now discovered, then we should also set + the cli_rereg bit (we know that the port was just discovered + if its is_new field is set). + */ + if ((send_client_rereg || + p_mgr->p_subn->first_time_master_sweep == TRUE || p_port->is_new) + && !p_mgr->p_subn->opt.no_clients_rereg + && (p_old_pi->capability_mask & IB_PORT_CAP_HAS_CLIENT_REREG)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Setting client rereg on %s, port %d\n", + p_port->p_node->print_desc, p_port->p_physp->port_num); + ib_port_info_set_client_rereg(p_pi, 1); + context.pi_context.client_rereg = TRUE; + send_set = TRUE; + } else { + ib_port_info_set_client_rereg(p_pi, 0); + context.pi_context.client_rereg = FALSE; + } + + /* We need to send the PortInfo Set request with the new sm_lid + in the following cases: + 1. There is a change in the values (send_set == TRUE) + 2. first_time_master_sweep flag on the subnet is TRUE. This means the + SM just became master, and it then needs to send a PortInfo Set to + every port. + */ + if (p_mgr->p_subn->first_time_master_sweep == TRUE) + send_set = TRUE; + + if (!send_set) + goto Exit; + + status = osm_req_set(p_mgr->sm, osm_physp_get_dr_path_ptr(p_physp), + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, + cl_hton32(osm_physp_get_port_num(p_physp)), + FALSE, ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + ret = -1; + /* If we sent a new mkey above, update our guid2mkey map + now, on the assumption that the SubnSet succeeds + */ + if (update_mkey) + osm_db_guid2mkey_set(p_mgr->p_subn->p_g2m, + cl_ntoh64(p_physp->port_guid), + cl_ntoh64(p_pi->m_key)); + +Exit: + OSM_LOG_EXIT(p_mgr->p_log); + return ret; +} + +/********************************************************************** + Processes our own node + Lock must already be held. +**********************************************************************/ +static int lid_mgr_process_our_sm_node(IN osm_lid_mgr_t * p_mgr) +{ + osm_port_t *p_port; + uint16_t min_lid_ho; + uint16_t max_lid_ho; + int ret; + + OSM_LOG_ENTER(p_mgr->p_log); + + /* + Acquire our own port object. + */ + p_port = osm_get_port_by_guid(p_mgr->p_subn, + p_mgr->p_subn->sm_port_guid); + if (!p_port) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0308: " + "Can't acquire SM's port object, GUID 0x%016" PRIx64 + "\n", cl_ntoh64(p_mgr->p_subn->sm_port_guid)); + ret = -1; + goto Exit; + } + + /* + Determine the LID this SM will use for its own port. + Be careful. With an LMC > 0, the bottom of the LID range becomes + unusable, since port hardware will mask off least significant bits, + leaving a LID of 0 (invalid). Therefore, make sure that we always + configure the SM with a LID that has non-zero bits, even after + LMC masking by hardware. + */ + lid_mgr_get_port_lid(p_mgr, p_port, &min_lid_ho, &max_lid_ho); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Current base LID is %u\n", min_lid_ho); + /* + Update subnet object. + */ + p_mgr->p_subn->master_sm_base_lid = cl_hton16(min_lid_ho); + p_mgr->p_subn->sm_base_lid = cl_hton16(min_lid_ho); + + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Assigning SM's port 0x%016" PRIx64 + "\n\t\t\t\tto LID range [%u,%u]\n", + cl_ntoh64(osm_port_get_guid(p_port)), min_lid_ho, max_lid_ho); + + /* + Set the PortInfo the Physical Port associated with this Port. + */ + ret = lid_mgr_set_physp_pi(p_mgr, p_port, p_port->p_physp, + cl_hton16(min_lid_ho)); + +Exit: + OSM_LOG_EXIT(p_mgr->p_log); + return ret; +} + +int osm_lid_mgr_process_sm(IN osm_lid_mgr_t * p_mgr) +{ + int ret; + + OSM_LOG_ENTER(p_mgr->p_log); + + CL_ASSERT(p_mgr->p_subn->sm_port_guid); + + CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); + + /* initialize the port_lid_tbl and empty ranges list following the + persistent db */ + lid_mgr_init_sweep(p_mgr); + + ret = lid_mgr_process_our_sm_node(p_mgr); + + CL_PLOCK_RELEASE(p_mgr->p_lock); + + OSM_LOG_EXIT(p_mgr->p_log); + return ret; +} + +/********************************************************************** + 1 go through all ports in the subnet. + 1.1 call lid_mgr_get_port_lid + 1.2 if a change is required send the port info + 2 if any change send the signal PENDING... +**********************************************************************/ +int osm_lid_mgr_process_subnet(IN osm_lid_mgr_t * p_mgr) +{ + cl_qmap_t *p_port_guid_tbl; + osm_port_t *p_port; + ib_net64_t port_guid; + int lid_changed, ret = 0; + uint16_t min_lid_ho, max_lid_ho; + + CL_ASSERT(p_mgr); + + OSM_LOG_ENTER(p_mgr->p_log); + + CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); + + CL_ASSERT(p_mgr->p_subn->sm_port_guid); + + p_port_guid_tbl = &p_mgr->p_subn->port_guid_tbl; + + for (p_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); + p_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl); + p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item)) { + port_guid = osm_port_get_guid(p_port); + + /* + Our own port is a special case in that we want to + assign a LID to ourselves first, since we have to + advertise that LID value to the other ports. + + For that reason, our node is treated separately and + we will not add it to any of these lists. + */ + if (port_guid == p_mgr->p_subn->sm_port_guid) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Skipping our own port 0x%016" PRIx64 "\n", + cl_ntoh64(port_guid)); + continue; + } + + /* + get the port lid range - we need to send it on first active + sweep or if there was a change (the result of + lid_mgr_get_port_lid) + */ + lid_changed = lid_mgr_get_port_lid(p_mgr, p_port, + &min_lid_ho, &max_lid_ho); + + /* we can call the function to update the port info as it known + to look for any field change and will only send an updated + if required */ + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Assigned port 0x%016" PRIx64 ", %s LID [%u,%u]\n", + cl_ntoh64(port_guid), lid_changed ? "new" : "", + min_lid_ho, max_lid_ho); + + /* the proc returns the fact it sent a set port info */ + if (lid_mgr_set_physp_pi(p_mgr, p_port, p_port->p_physp, + cl_hton16(min_lid_ho))) + ret = -1; + } /* all ports */ + + /* store the guid to lid table in persistent db */ + osm_db_store(p_mgr->p_g2l, p_mgr->p_subn->opt.fsync_high_avail_files); + + CL_PLOCK_RELEASE(p_mgr->p_lock); + + OSM_LOG_EXIT(p_mgr->p_log); + return ret; +} diff --git a/opensm/osm_lin_fwd_rcv.c b/opensm/osm_lin_fwd_rcv.c new file mode 100644 index 0000000..b798ce5 --- /dev/null +++ b/opensm/osm_lin_fwd_rcv.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_lft_rcv_t. + * This object represents the Linear Forwarding Table Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_LIN_FWD_RCV_C +#include +#include +#include +#include + +void osm_lft_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_smp_t *p_smp; + uint32_t block_num; + osm_switch_t *p_sw; + osm_lft_context_t *p_lft_context; + uint8_t *p_block; + ib_net64_t node_guid; + osm_epi_lft_change_event_t lft_change; + ib_api_status_t status; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_block = ib_smp_get_payload_ptr(p_smp); + block_num = cl_ntoh32(p_smp->attr_mod); + + /* + Acquire the switch object for this switch. + */ + p_lft_context = osm_madw_get_lft_context_ptr(p_madw); + node_guid = p_lft_context->node_guid; + + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit; + } + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_sw = osm_get_switch_by_guid(sm->p_subn, node_guid); + + if (!p_sw) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0401: " + "LFT received for nonexistent node " + "0x%" PRIx64 "\n", cl_ntoh64(node_guid)); + } else { + status = osm_switch_set_lft_block(p_sw, p_block, block_num); + if (status == IB_SUCCESS) { + if (sm->p_subn->first_time_master_sweep == FALSE) { + lft_change.p_sw = p_sw; + lft_change.flags = LFT_CHANGED_BLOCK; + lft_change.lft_top = 0; + lft_change.block_num = block_num; + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_LFT_CHANGE, + &lft_change); + } + } else { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0402: " + "Setting forwarding table block %u failed (%s)" + ", Switch 0x%" PRIx64 " %s\n", block_num, + ib_get_err_str(status), cl_ntoh64(node_guid), + p_sw->p_node->print_desc); + } + } + + CL_PLOCK_RELEASE(sm->p_lock); +Exit: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_link_mgr.c b/opensm/osm_link_mgr.c new file mode 100644 index 0000000..ca69375 --- /dev/null +++ b/opensm/osm_link_mgr.c @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_link_mgr_t. + * This file implements the Link Manager object. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_LINK_MGR_C +#include +#include +#include +#include +#include +#include +#include + +static uint8_t link_mgr_get_smsl(IN osm_sm_t * sm, IN osm_physp_t * p_physp) +{ + osm_opensm_t *p_osm = sm->p_subn->p_osm; + struct osm_routing_engine *re = p_osm->routing_engine_used; + ib_net16_t slid; + ib_net16_t smlid; + uint8_t sl; + + OSM_LOG_ENTER(sm->p_log); + + if (!(re && re->path_sl && + (slid = osm_physp_get_base_lid(p_physp)))) { + /* + * Use default SL if routing engine does not provide a + * path SL lookup callback. + */ + OSM_LOG_EXIT(sm->p_log); + return sm->p_subn->opt.sm_sl; + } + + smlid = sm->p_subn->sm_base_lid; + + /* Call into routing engine to find proper SL */ + sl = re->path_sl(re->context, sm->p_subn->opt.sm_sl, + slid, smlid); + + OSM_LOG_EXIT(sm->p_log); + return sl; +} + +static int link_mgr_set_physp_pi(osm_sm_t * sm, IN osm_physp_t * p_physp, + IN uint8_t port_state) +{ + uint8_t payload[IB_SMP_DATA_SIZE], payload2[IB_SMP_DATA_SIZE]; + ib_port_info_t *p_pi = (ib_port_info_t *) payload; + ib_mlnx_ext_port_info_t *p_epi = (ib_mlnx_ext_port_info_t *) payload2; + const ib_port_info_t *p_old_pi; + const ib_mlnx_ext_port_info_t *p_old_epi; + osm_madw_context_t context; + osm_node_t *p_node; + ib_api_status_t status; + uint8_t port_num, mtu, op_vls, smsl = OSM_DEFAULT_SL; + boolean_t esp0 = FALSE, send_set = FALSE, send_set2 = FALSE; + osm_physp_t *p_remote_physp, *physp0 = NULL; + int issue_ext = 0, fdr10_change = 0; + int ret = 0; + ib_net32_t attr_mod, cap_mask; + boolean_t update_mkey = FALSE; + ib_net64_t m_key = 0; + osm_port_t *p_port; + + OSM_LOG_ENTER(sm->p_log); + + p_node = osm_physp_get_node_ptr(p_physp); + CL_ASSERT(p_node); + + p_old_pi = &p_physp->port_info; + + port_num = osm_physp_get_port_num(p_physp); + + memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); + + if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || + port_num == 0) { + /* Need to make sure LID and SMLID fields in PortInfo are not 0 */ + if (!p_pi->base_lid) { + p_port = osm_get_port_by_guid(sm->p_subn, + osm_physp_get_port_guid(p_physp)); + CL_ASSERT(p_port); + p_pi->base_lid = p_port->lid; + sm->lid_mgr.dirty = TRUE; + send_set = TRUE; + } + + /* we are initializing the ports with our local sm_base_lid */ + p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid; + if (p_pi->master_sm_base_lid != p_old_pi->master_sm_base_lid) + send_set = TRUE; + } + + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) + physp0 = osm_node_get_physp_ptr(p_node, 0); + + if (port_num == 0) { + /* + CAs don't have a port 0, and for switch port 0, + we need to check if this is enhanced or base port 0. + For base port 0 the following parameters are not valid + (IBA 1.2.1 p.830 table 146). + */ + if (!p_node->sw) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: " + "Cannot find switch by guid: 0x%" PRIx64 "\n", + cl_ntoh64(p_node->node_info.node_guid)); + goto Exit; + } + + if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) + == FALSE) { + + /* Even for base port 0 we might have to set smsl + (if we are using lash routing) */ + smsl = link_mgr_get_smsl(sm, p_physp); + if (smsl != ib_port_info_get_master_smsl(p_old_pi)) { + send_set = TRUE; + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Setting SMSL to %d on port 0 GUID 0x%016" + PRIx64 "\n", smsl, + cl_ntoh64(osm_physp_get_port_guid + (p_physp))); + /* Enter if base lid and master_sm_lid didn't change */ + } else if (send_set == FALSE) { + /* This means the switch doesn't support + enhanced port 0 and we don't need to + change SMSL. Can skip it. */ + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Skipping port 0, GUID 0x%016" PRIx64 + "\n", + cl_ntoh64(osm_physp_get_port_guid + (p_physp))); + goto Exit; + } + } else + esp0 = TRUE; + } + + /* + Should never write back a value that is bigger then 3 in + the PortPhysicalState field - so can not simply copy! + + Actually we want to write there: + port physical state - no change, + link down default state = polling + port state - as requested. + */ + p_pi->state_info2 = 0x02; + ib_port_info_set_port_state(p_pi, port_state); + + /* Determine ports' M_Key */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && + osm_physp_get_port_num(p_physp) != 0) + m_key = ib_port_info_get_m_key(&physp0->port_info); + else + m_key = ib_port_info_get_m_key(p_pi); + + /* Check whether this is base port0 smsl handling only */ + if (port_num == 0 && esp0 == FALSE) { + ib_port_info_set_master_smsl(p_pi, smsl); + goto Send; + } + + /* + PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0 + */ + + if (ib_port_info_get_link_down_def_state(p_pi) != + ib_port_info_get_link_down_def_state(p_old_pi)) + send_set = TRUE; + + /* didn't get PortInfo before */ + if (!ib_port_info_get_port_state(p_old_pi)) + send_set = TRUE; + + /* we only change port fields if we do not change state */ + if (port_state == IB_LINK_NO_CHANGE) { + /* The following fields are relevant only for CA port, router, or Enh. SP0 */ + if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || + port_num == 0) { + p_pi->m_key = sm->p_subn->opt.m_key; + if (memcmp(&p_pi->m_key, &p_old_pi->m_key, + sizeof(p_pi->m_key))) { + update_mkey = TRUE; + send_set = TRUE; + } + + p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix; + if (memcmp(&p_pi->subnet_prefix, + &p_old_pi->subnet_prefix, + sizeof(p_pi->subnet_prefix))) + send_set = TRUE; + + smsl = link_mgr_get_smsl(sm, p_physp); + if (smsl != ib_port_info_get_master_smsl(p_old_pi)) { + + ib_port_info_set_master_smsl(p_pi, smsl); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Setting SMSL to %d on GUID 0x%016" + PRIx64 ", port %d\n", smsl, + cl_ntoh64(osm_physp_get_port_guid + (p_physp)), port_num); + + send_set = TRUE; + } + + p_pi->m_key_lease_period = + sm->p_subn->opt.m_key_lease_period; + if (memcmp(&p_pi->m_key_lease_period, + &p_old_pi->m_key_lease_period, + sizeof(p_pi->m_key_lease_period))) + send_set = TRUE; + + p_pi->mkey_lmc = 0; + ib_port_info_set_mpb(p_pi, sm->p_subn->opt.m_key_protect_bits); + if (esp0 == FALSE || sm->p_subn->opt.lmc_esp0) + ib_port_info_set_lmc(p_pi, sm->p_subn->opt.lmc); + if (ib_port_info_get_lmc(p_old_pi) != + ib_port_info_get_lmc(p_pi) || + ib_port_info_get_mpb(p_old_pi) != + ib_port_info_get_mpb(p_pi)) + send_set = TRUE; + + ib_port_info_set_timeout(p_pi, + sm->p_subn->opt. + subnet_timeout); + if (ib_port_info_get_timeout(p_pi) != + ib_port_info_get_timeout(p_old_pi)) + send_set = TRUE; + } + + /* + Several timeout mechanisms: + */ + p_remote_physp = osm_physp_get_remote(p_physp); + if (port_num != 0 && p_remote_physp) { + if (osm_node_get_type(osm_physp_get_node_ptr(p_physp)) + == IB_NODE_TYPE_ROUTER) { + ib_port_info_set_hoq_lifetime(p_pi, + sm->p_subn-> + opt. + leaf_head_of_queue_lifetime); + } else + if (osm_node_get_type + (osm_physp_get_node_ptr(p_physp)) == + IB_NODE_TYPE_SWITCH) { + /* Is remote end CA or router (a leaf port) ? */ + if (osm_node_get_type + (osm_physp_get_node_ptr(p_remote_physp)) != + IB_NODE_TYPE_SWITCH) { + ib_port_info_set_hoq_lifetime(p_pi, + sm-> + p_subn-> + opt. + leaf_head_of_queue_lifetime); + ib_port_info_set_vl_stall_count(p_pi, + sm-> + p_subn-> + opt. + leaf_vl_stall_count); + } else { + ib_port_info_set_hoq_lifetime(p_pi, + sm-> + p_subn-> + opt. + head_of_queue_lifetime); + ib_port_info_set_vl_stall_count(p_pi, + sm-> + p_subn-> + opt. + vl_stall_count); + } + } + if (ib_port_info_get_hoq_lifetime(p_pi) != + ib_port_info_get_hoq_lifetime(p_old_pi) || + ib_port_info_get_vl_stall_count(p_pi) != + ib_port_info_get_vl_stall_count(p_old_pi)) + send_set = TRUE; + } + + ib_port_info_set_phy_and_overrun_err_thd(p_pi, + sm->p_subn->opt. + local_phy_errors_threshold, + sm->p_subn->opt. + overrun_errors_threshold); + if (p_pi->error_threshold != p_old_pi->error_threshold) + send_set = TRUE; + + /* + Set the easy common parameters for all port types, + then determine the neighbor MTU. + */ + if (sm->p_subn->opt.force_link_width && + (sm->p_subn->opt.force_link_width < IB_LINK_WIDTH_ACTIVE_2X || + (p_pi->capability_mask2 & + IB_PORT_CAP2_IS_LINK_WIDTH_2X_SUPPORTED)) && + (sm->p_subn->opt.force_link_width != IB_LINK_WIDTH_SET_LWS || + p_pi->link_width_enabled != p_pi->link_width_supported)) { + p_pi->link_width_enabled = sm->p_subn->opt.force_link_width; + if (p_pi->link_width_enabled != p_old_pi->link_width_enabled) + send_set = TRUE; + } + + if (sm->p_subn->opt.force_link_speed && + (sm->p_subn->opt.force_link_speed != IB_LINK_SPEED_SET_LSS || + ib_port_info_get_link_speed_enabled(p_pi) != + ib_port_info_get_link_speed_sup(p_pi))) { + ib_port_info_set_link_speed_enabled(p_pi, + sm->p_subn->opt. + force_link_speed); + if (p_pi->link_speed != p_old_pi->link_speed) + send_set = TRUE; + } + + if (sm->p_subn->opt.fdr10 && + p_physp->ext_port_info.link_speed_supported & FDR10) { + if (sm->p_subn->opt.fdr10 == 1) { /* enable */ + if (!(p_physp->ext_port_info.link_speed_enabled & FDR10)) + fdr10_change = 1; + } else { /* disable */ + if (p_physp->ext_port_info.link_speed_enabled & FDR10) + fdr10_change = 1; + } + if (fdr10_change) { + p_old_epi = &p_physp->ext_port_info; + memcpy(payload2, p_old_epi, + sizeof(ib_mlnx_ext_port_info_t)); + p_epi->state_change_enable = 0x01; + if (sm->p_subn->opt.fdr10 == 1) + p_epi->link_speed_enabled = FDR10; + else + p_epi->link_speed_enabled = 0; + send_set2 = TRUE; + } + } + + if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH && + osm_physp_get_port_num(p_physp) != 0) { + cap_mask = physp0->port_info.capability_mask; + } else + cap_mask = p_pi->capability_mask; + + if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) + issue_ext = 1; + + /* Do peer ports support extended link speeds ? */ + if (port_num != 0 && p_remote_physp) { + osm_physp_t *rphysp0; + ib_net32_t rem_cap_mask; + + if (osm_node_get_type(p_remote_physp->p_node) == + IB_NODE_TYPE_SWITCH) { + rphysp0 = osm_node_get_physp_ptr(p_remote_physp->p_node, 0); + rem_cap_mask = rphysp0->port_info.capability_mask; + } else + rem_cap_mask = p_remote_physp->port_info.capability_mask; + + if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS && + rem_cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) { + if (sm->p_subn->opt.force_link_speed_ext && + (sm->p_subn->opt.force_link_speed_ext != IB_LINK_SPEED_EXT_SET_LSES || + p_pi->link_speed_ext_enabled != + ib_port_info_get_link_speed_ext_sup(p_pi))) { + p_pi->link_speed_ext_enabled = sm->p_subn->opt.force_link_speed_ext; + if (p_pi->link_speed_ext_enabled != + p_old_pi->link_speed_ext_enabled) + send_set = TRUE; + } + } + } + + /* calc new op_vls and mtu */ + op_vls = + osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp, + ib_port_info_get_op_vls(p_old_pi)); + mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp, + ib_port_info_get_neighbor_mtu(p_old_pi)); + + ib_port_info_set_neighbor_mtu(p_pi, mtu); + if (ib_port_info_get_neighbor_mtu(p_pi) != + ib_port_info_get_neighbor_mtu(p_old_pi)) + send_set = TRUE; + + ib_port_info_set_op_vls(p_pi, op_vls); + if (ib_port_info_get_op_vls(p_pi) != + ib_port_info_get_op_vls(p_old_pi)) + send_set = TRUE; + + /* provide the vl_high_limit from the qos mgr */ + if (sm->p_subn->opt.qos && + p_physp->vl_high_limit != p_old_pi->vl_high_limit) { + send_set = TRUE; + p_pi->vl_high_limit = p_physp->vl_high_limit; + } + } + +Send: + context.pi_context.active_transition = FALSE; + if (port_state != IB_LINK_NO_CHANGE && + port_state != ib_port_info_get_port_state(p_old_pi)) { + send_set = TRUE; + if (port_state == IB_LINK_ACTIVE) + context.pi_context.active_transition = TRUE; + } + + context.pi_context.node_guid = osm_node_get_node_guid(p_node); + context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); + context.pi_context.set_method = TRUE; + context.pi_context.light_sweep = FALSE; + context.pi_context.client_rereg = FALSE; + + /* We need to send the PortInfoSet request with the new sm_lid + in the following cases: + 1. There is a change in the values (send_set == TRUE) + 2. This is a switch external port (so it wasn't handled yet by + osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE, + which means the SM just became master, and it then needs to send at + PortInfoSet to every port. + */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num + && sm->p_subn->first_time_master_sweep == TRUE) + send_set = TRUE; + + if (!send_set) + goto SEND_EPI; + + attr_mod = cl_hton32(port_num); + if (issue_ext) + attr_mod |= cl_hton32(1 << 31); /* AM SMSupportExtendedSpeeds */ + status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, + attr_mod, FALSE, m_key, + 0, CL_DISP_MSGID_NONE, &context); + if (status) + ret = -1; + + /* If we sent a new mkey above, update our guid2mkey map + now, on the assumption that the SubnSet succeeds + */ + if (update_mkey) + osm_db_guid2mkey_set(sm->p_subn->p_g2m, + cl_ntoh64(p_physp->port_guid), + cl_ntoh64(p_pi->m_key)); + +SEND_EPI: + if (send_set2) { + status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), + payload2, sizeof(payload2), + IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO, + cl_hton32(port_num), FALSE, m_key, + 0, CL_DISP_MSGID_NONE, &context); + if (status) + ret = -1; + } + +Exit: + OSM_LOG_EXIT(sm->p_log); + return ret; +} + +static int link_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node, + IN const uint8_t link_state) +{ + osm_physp_t *p_physp, *p_physp_remote; + uint32_t i, num_physp; + int ret = 0; + uint8_t current_state; + + OSM_LOG_ENTER(sm->p_log); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Node 0x%" PRIx64 " going to %s\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), + ib_get_port_state_str(link_state)); + + /* + Set the PortInfo for every Physical Port associated + with this Port. Start iterating with port 1, since the linkstate + is not applicable to the management port on switches. + */ + num_physp = osm_node_get_num_physp(p_node); + for (i = 0; i < num_physp; i++) { + /* + Don't bother doing anything if this Physical Port is not valid. + or if the state of the port is already better then the + specified state. + */ + p_physp = osm_node_get_physp_ptr(p_node, (uint8_t) i); + if (!p_physp) + continue; + + current_state = osm_physp_get_port_state(p_physp); + if (current_state == IB_LINK_DOWN) + continue; + + /* + Set PortState to DOWN in case Remote Physical Port is + unreachable. We have to check this for all ports, except + port zero. + */ + p_physp_remote = osm_physp_get_remote(p_physp); + if ((i != 0) && (!p_physp_remote || + !osm_physp_is_valid(p_physp_remote))) { + if (current_state != IB_LINK_INIT) + link_mgr_set_physp_pi(sm, p_physp, IB_LINK_DOWN); + continue; + } + + /* + Normally we only send state update if state is lower + then required state. However, we need to send update if + no state change required. + */ + if (link_state != IB_LINK_NO_CHANGE && + link_state <= current_state) + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Physical port %u already %s. Skipping\n", + p_physp->port_num, + ib_get_port_state_str(current_state)); + else if (link_mgr_set_physp_pi(sm, p_physp, link_state)) + ret = -1; + } + + OSM_LOG_EXIT(sm->p_log); + return ret; +} + +int osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state) +{ + cl_qmap_t *p_node_guid_tbl; + osm_node_t *p_node; + int ret = 0; + + OSM_LOG_ENTER(sm->p_log); + + p_node_guid_tbl = &sm->p_subn->node_guid_tbl; + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + + for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); + p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl); + p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item)) + if (link_mgr_process_node(sm, p_node, link_state)) + ret = -1; + + CL_PLOCK_RELEASE(sm->p_lock); + + OSM_LOG_EXIT(sm->p_log); + return ret; +} diff --git a/opensm/osm_mcast_fwd_rcv.c b/opensm/osm_mcast_fwd_rcv.c new file mode 100644 index 0000000..6404f8c --- /dev/null +++ b/opensm/osm_mcast_fwd_rcv.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mft_rcv_t. + * This object represents the Multicast Forwarding Table Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_MCAST_FWD_RCV_C +#include +#include +#include +#include +#include + +void osm_mft_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_smp_t *p_smp; + uint32_t block_num; + uint8_t position; + osm_switch_t *p_sw; + osm_mft_context_t *p_mft_context; + uint16_t *p_block; + ib_net64_t node_guid; + ib_api_status_t status; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_block = ib_smp_get_payload_ptr(p_smp); + block_num = cl_ntoh32(p_smp->attr_mod) & IB_MCAST_BLOCK_ID_MASK_HO; + position = (uint8_t) ((cl_ntoh32(p_smp->attr_mod) & + IB_MCAST_POSITION_MASK_HO) >> + IB_MCAST_POSITION_SHIFT); + + /* + Acquire the switch object for this switch. + */ + p_mft_context = osm_madw_get_mft_context_ptr(p_madw); + node_guid = p_mft_context->node_guid; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Setting MFT block %u, position %u, " + "Switch 0x%016" PRIx64 ", TID 0x%" PRIx64 "\n", + block_num, position, cl_ntoh64(node_guid), + cl_ntoh64(p_smp->trans_id)); + + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit; + } + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_sw = osm_get_switch_by_guid(sm->p_subn, node_guid); + + if (!p_sw) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0801: " + "MFT received for nonexistent node " + "0x%016" PRIx64 "\n", cl_ntoh64(node_guid)); + } else { + status = osm_switch_set_mft_block(p_sw, p_block, + (uint16_t) block_num, + position); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0802: " + "Setting MFT block failed (%s)" + ", Switch 0x%016" PRIx64 + " (%s), block %u, position %u\n", + ib_get_err_str(status), cl_ntoh64(node_guid), + p_sw->p_node->print_desc, block_num, position); + } + } + + CL_PLOCK_RELEASE(sm->p_lock); +Exit: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_mcast_mgr.c b/opensm/osm_mcast_mgr.c new file mode 100644 index 0000000..6a7a138 --- /dev/null +++ b/opensm/osm_mcast_mgr.c @@ -0,0 +1,1225 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010 HNR Consulting. All rights reserved. + * Copyright (C) 2012-2013 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mcast_mgr_t. + * This file implements the Multicast Manager object. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_MCAST_MGR_C +#include +#include +#include +#include +#include +#include +#include +#include + +static osm_mcast_work_obj_t *mcast_work_obj_new(IN osm_port_t * p_port) +{ + osm_mcast_work_obj_t *p_obj; + + /* + clean allocated memory to avoid assertion when trying to insert to + qlist. + see cl_qlist_insert_tail(): CL_ASSERT(p_list_item->p_list != p_list) + */ + p_obj = malloc(sizeof(*p_obj)); + if (p_obj) { + memset(p_obj, 0, sizeof(*p_obj)); + p_obj->p_port = p_port; + } + + return p_obj; +} + +static void mcast_work_obj_delete(IN osm_mcast_work_obj_t * p_wobj) +{ + free(p_wobj); +} + +int osm_mcast_make_port_list_and_map(cl_qlist_t * list, cl_qmap_t * map, + osm_mgrp_box_t * mbox) +{ + cl_map_item_t *map_item; + cl_list_item_t *list_item; + osm_mgrp_t *mgrp; + osm_mcm_port_t *mcm_port; + osm_mcast_work_obj_t *wobj; + + cl_qmap_init(map); + cl_qlist_init(list); + + for (list_item = cl_qlist_head(&mbox->mgrp_list); + list_item != cl_qlist_end(&mbox->mgrp_list); + list_item = cl_qlist_next(list_item)) { + mgrp = cl_item_obj(list_item, mgrp, list_item); + for (map_item = cl_qmap_head(&mgrp->mcm_port_tbl); + map_item != cl_qmap_end(&mgrp->mcm_port_tbl); + map_item = cl_qmap_next(map_item)) { + /* Acquire the port object for this port guid, then + create the new worker object to build the list. */ + mcm_port = cl_item_obj(map_item, mcm_port, map_item); + if (cl_qmap_get(map, mcm_port->port->guid) != + cl_qmap_end(map)) + continue; + wobj = mcast_work_obj_new(mcm_port->port); + if (!wobj) + return -1; + cl_qlist_insert_tail(list, &wobj->list_item); + cl_qmap_insert(map, mcm_port->port->guid, + &wobj->map_item); + } + } + return 0; +} + +void osm_mcast_drop_port_list(cl_qlist_t * list) +{ + while (cl_qlist_count(list)) + mcast_work_obj_delete((osm_mcast_work_obj_t *) + cl_qlist_remove_head(list)); +} + +void osm_purge_mtree(osm_sm_t * sm, IN osm_mgrp_box_t * mbox) +{ + OSM_LOG_ENTER(sm->p_log); + + if (mbox->root) + osm_mtree_destroy(mbox->root); + mbox->root = NULL; + + OSM_LOG_EXIT(sm->p_log); +} + +static void create_mgrp_switch_map(cl_qmap_t * m, cl_qlist_t * port_list) +{ + osm_mcast_work_obj_t *wobj; + osm_port_t *port; + osm_switch_t *sw; + ib_net64_t guid; + cl_list_item_t *i; + + cl_qmap_init(m); + for (i = cl_qlist_head(port_list); i != cl_qlist_end(port_list); + i = cl_qlist_next(i)) { + wobj = cl_item_obj(i, wobj, list_item); + port = wobj->p_port; + if (port->p_node->sw) { + sw = port->p_node->sw; + sw->is_mc_member = 1; + } else if (port->p_physp->p_remote_physp) { + sw = port->p_physp->p_remote_physp->p_node->sw; + sw->num_of_mcm++; + } else + continue; + guid = osm_node_get_node_guid(sw->p_node); + if (cl_qmap_get(m, guid) == cl_qmap_end(m)) + cl_qmap_insert(m, guid, &sw->mgrp_item); + } +} + +static void destroy_mgrp_switch_map(cl_qmap_t * m) +{ + osm_switch_t *sw; + cl_map_item_t *i; + + for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) { + sw = cl_item_obj(i, sw, mgrp_item); + sw->num_of_mcm = 0; + sw->is_mc_member = 0; + } + cl_qmap_remove_all(m); +} + +/********************************************************************** + Calculate the maximal "min hops" from the given switch to any + of the group HCAs + **********************************************************************/ +#ifdef OSM_VENDOR_INTF_ANAFA +static float mcast_mgr_compute_avg_hops(osm_sm_t * sm, cl_qmap_t * m, + const osm_switch_t * this_sw) +{ + float avg_hops = 0; + uint32_t hops = 0; + uint32_t num_ports = 0; + uint16_t lid; + uint32_t least_hops; + cl_map_item_t *i; + osm_switch_t *sw; + + OSM_LOG_ENTER(sm->p_log); + + for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) { + sw = cl_item_obj(i, sw, mcast_item); + lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0)); + least_hops = osm_switch_get_least_hops(this_sw, lid); + /* for all host that are MC members and attached to the switch, + we should add the (least_hops + 1) * number_of_such_hosts. + If switch itself is in the MC, we should add the least_hops only */ + hops += (least_hops + 1) * sw->num_of_mcm + + least_hops * sw->is_mc_member; + num_ports += sw->num_of_mcm + sw->is_mc_member; + } + + /* We shouldn't be here if there aren't any ports in the group. */ + CL_ASSERT(num_ports); + + avg_hops = (float)(hops / num_ports); + + OSM_LOG_EXIT(sm->p_log); + return avg_hops; +} +#else +static float mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qmap_t * m, + const osm_switch_t * this_sw) +{ + uint32_t max_hops = 0, hops; + uint16_t lid; + cl_map_item_t *i; + osm_switch_t *sw; + + OSM_LOG_ENTER(sm->p_log); + + /* + For each member of the multicast group, compute the + number of hops to its base LID. + */ + for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) { + sw = cl_item_obj(i, sw, mgrp_item); + lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0)); + hops = osm_switch_get_least_hops(this_sw, lid); + if (!sw->is_mc_member) + hops += 1; + if (hops > max_hops) + max_hops = hops; + } + + /* Note that at this point we might get (max_hops == 0), + which means that there's only one member in the mcast + group, and it's the current switch */ + + OSM_LOG_EXIT(sm->p_log); + return (float)max_hops; +} +#endif + +/********************************************************************** + This function attempts to locate the optimal switch for the + center of the spanning tree. The current algorithm chooses + a switch with the lowest average hop count to the members + of the multicast group. +**********************************************************************/ +static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm, + cl_qlist_t * list) +{ + cl_qmap_t mgrp_sw_map; + cl_qmap_t *p_sw_tbl; + osm_switch_t *p_sw, *p_best_sw = NULL; + float hops = 0; + float best_hops = 10000; /* any big # will do */ + + OSM_LOG_ENTER(sm->p_log); + + p_sw_tbl = &sm->p_subn->sw_guid_tbl; + + create_mgrp_switch_map(&mgrp_sw_map, list); + for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); + p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl); + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) { + if (!osm_switch_supports_mcast(p_sw)) + continue; + +#ifdef OSM_VENDOR_INTF_ANAFA + hops = mcast_mgr_compute_avg_hops(sm, &mgrp_sw_map, p_sw); +#else + hops = mcast_mgr_compute_max_hops(sm, &mgrp_sw_map, p_sw); +#endif + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Switch 0x%016" PRIx64 ", hops = %f\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), hops); + + if (hops < best_hops) { + p_best_sw = p_sw; + best_hops = hops; + } + } + + if (p_best_sw) + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Best switch is 0x%" PRIx64 " (%s), hops = %f\n", + cl_ntoh64(osm_node_get_node_guid(p_best_sw->p_node)), + p_best_sw->p_node->print_desc, best_hops); + else + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "No multicast capable switches detected\n"); + + destroy_mgrp_switch_map(&mgrp_sw_map); + OSM_LOG_EXIT(sm->p_log); + return p_best_sw; +} + +/********************************************************************** + This function returns the existing or optimal root switch for the tree. +**********************************************************************/ +osm_switch_t *osm_mcast_mgr_find_root_switch(osm_sm_t * sm, cl_qlist_t *list) +{ + osm_switch_t *p_sw = NULL; + + OSM_LOG_ENTER(sm->p_log); + + /* + We always look for the best multicast tree root switch. + Otherwise since we always start with a a single join + the root will be always on the first switch attached to it. + - Very bad ... + */ + p_sw = mcast_mgr_find_optimal_switch(sm, list); + + OSM_LOG_EXIT(sm->p_log); + return p_sw; +} + +static int mcast_mgr_set_mft_block(osm_sm_t * sm, IN osm_switch_t * p_sw, + uint32_t block_num, uint32_t position) +{ + osm_node_t *p_node; + osm_physp_t *p_physp; + osm_dr_path_t *p_path; + osm_madw_context_t context; + ib_api_status_t status; + uint32_t block_id_ho; + osm_mcast_tbl_t *p_tbl; + ib_net16_t block[IB_MCAST_BLOCK_SIZE]; + int ret = 0; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_sw); + + p_node = p_sw->p_node; + + CL_ASSERT(p_node); + + p_physp = osm_node_get_physp_ptr(p_node, 0); + p_path = osm_physp_get_dr_path_ptr(p_physp); + + /* + Send multicast forwarding table blocks to the switch + as long as the switch indicates it has blocks needing + configuration. + */ + + context.mft_context.node_guid = osm_node_get_node_guid(p_node); + context.mft_context.set_method = TRUE; + + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + + if (osm_mcast_tbl_get_block(p_tbl, (uint16_t) block_num, + (uint8_t) position, block)) { + block_id_ho = block_num + (position << 28); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Writing MFT block %u position %u to switch 0x%" PRIx64 + "\n", block_num, position, + cl_ntoh64(context.mft_context.node_guid)); + + status = osm_req_set(sm, p_path, (void *)block, sizeof(block), + IB_MAD_ATTR_MCAST_FWD_TBL, + cl_hton32(block_id_ho), FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A02: " + "Sending multicast fwd. tbl. block 0x%X to %s " + "failed (%s)\n", block_id_ho, + p_node->print_desc, ib_get_err_str(status)); + ret = -1; + } + } + + OSM_LOG_EXIT(sm->p_log); + return ret; +} + +/********************************************************************** + This is part of the recursive function to compute the paths in the + spanning tree that emanate from this switch. On input, the p_list + contains the group members that must be routed from this switch. +**********************************************************************/ +static void mcast_mgr_subdivide(osm_sm_t * sm, uint16_t mlid_ho, + osm_switch_t * p_sw, cl_qlist_t * p_list, + cl_qlist_t * list_array, uint8_t array_size) +{ + uint8_t port_num; + boolean_t ignore_existing; + osm_mcast_work_obj_t *p_wobj; + + OSM_LOG_ENTER(sm->p_log); + + /* + For Multicast Groups, we don't want to count on previous + configurations - since we can easily generate a storm + by loops. + */ + ignore_existing = TRUE; + + /* + Subdivide the set of ports into non-overlapping subsets + that will be routed to other switches. + */ + while ((p_wobj = + (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) != + (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) { + port_num = + osm_switch_recommend_mcast_path(p_sw, p_wobj->p_port, + mlid_ho, ignore_existing); + if (port_num == OSM_NO_PATH) { + /* + This typically occurs if the switch does not support + multicast and the multicast tree must branch at this + switch. + */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A03: " + "Error routing MLID 0x%X through switch 0x%" + PRIx64 " %s\n" + "\t\t\t\tNo multicast paths from this switch " + "for port with LID %u\n", mlid_ho, + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), + p_sw->p_node->print_desc, + cl_ntoh16(osm_port_get_base_lid + (p_wobj->p_port))); + mcast_work_obj_delete(p_wobj); + continue; + } + + if (port_num >= array_size) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A04: " + "Error routing MLID 0x%X through switch 0x%" + PRIx64 " %s\n" + "\t\t\t\tNo multicast paths from this switch " + "to port with LID %u\n", mlid_ho, + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), + p_sw->p_node->print_desc, + cl_ntoh16(osm_port_get_base_lid + (p_wobj->p_port))); + mcast_work_obj_delete(p_wobj); + continue; + } + + cl_qlist_insert_tail(&list_array[port_num], &p_wobj->list_item); + } + + OSM_LOG_EXIT(sm->p_log); +} + +static void mcast_mgr_purge_list(osm_sm_t * sm, uint16_t mlid, cl_qlist_t * list) +{ + if (OSM_LOG_IS_ACTIVE_V2(sm->p_log, OSM_LOG_ERROR)) { + osm_mcast_work_obj_t *wobj; + cl_list_item_t *i; + for (i = cl_qlist_head(list); i != cl_qlist_end(list); + i = cl_qlist_next(i)) { + wobj = cl_item_obj(i, wobj, list_item); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A06: " + "Unable to route MLID 0x%X for port 0x%" PRIx64 "\n", + mlid, cl_ntoh64(osm_port_get_guid(wobj->p_port))); + } + } + osm_mcast_drop_port_list(list); +} + +/********************************************************************** + This is the recursive function to compute the paths in the spanning + tree that emanate from this switch. On input, the p_list contains + the group members that must be routed from this switch. + + The function returns the newly created mtree node element. +**********************************************************************/ +static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm, uint16_t mlid_ho, + osm_switch_t * p_sw, + cl_qlist_t * p_list, uint8_t depth, + uint8_t upstream_port, + uint8_t * p_max_depth) +{ + uint8_t max_children; + osm_mtree_node_t *p_mtn = NULL; + cl_qlist_t *list_array = NULL; + uint8_t i; + ib_net64_t node_guid; + osm_mcast_work_obj_t *p_wobj; + cl_qlist_t *p_port_list; + size_t count; + osm_mcast_tbl_t *p_tbl; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_sw); + CL_ASSERT(p_list); + CL_ASSERT(p_max_depth); + + node_guid = osm_node_get_node_guid(p_sw->p_node); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Routing MLID 0x%X through switch 0x%" PRIx64 + " %s, %u nodes at depth %u\n", + mlid_ho, cl_ntoh64(node_guid), p_sw->p_node->print_desc, + cl_qlist_count(p_list), depth); + + CL_ASSERT(cl_qlist_count(p_list) > 0); + + depth++; + + if (depth >= 64) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A21: " + "Maximal hops number is reached for MLID 0x%x." + " Break processing\n", mlid_ho); + mcast_mgr_purge_list(sm, mlid_ho, p_list); + goto Exit; + } + + if (depth > *p_max_depth) { + CL_ASSERT(depth == *p_max_depth + 1); + *p_max_depth = depth; + } + + if (osm_switch_supports_mcast(p_sw) == FALSE) { + /* + This switch doesn't do multicast. Clean-up. + */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A14: " + "Switch 0x%" PRIx64 " %s does not support multicast\n", + cl_ntoh64(node_guid), p_sw->p_node->print_desc); + + /* + Deallocate all the work objects on this branch of the tree. + */ + mcast_mgr_purge_list(sm, mlid_ho, p_list); + goto Exit; + } + + p_mtn = osm_mtree_node_new(p_sw); + if (p_mtn == NULL) { + /* + We are unable to continue routing down this + leg of the tree. Clean-up. + */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A15: " + "Insufficient memory to build multicast tree\n"); + + /* + Deallocate all the work objects on this branch of the tree. + */ + mcast_mgr_purge_list(sm, mlid_ho, p_list); + goto Exit; + } + + max_children = osm_mtree_node_get_max_children(p_mtn); + + CL_ASSERT(max_children > 1); + + /* + Prepare an empty list for each port in the switch. + TO DO - this list array could probably be moved + inside the switch element to save on malloc thrashing. + */ + list_array = malloc(sizeof(cl_qlist_t) * max_children); + if (list_array == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A16: " + "Unable to allocate list array\n"); + mcast_mgr_purge_list(sm, mlid_ho, p_list); + osm_mtree_destroy(p_mtn); + p_mtn = NULL; + goto Exit; + } + + memset(list_array, 0, sizeof(cl_qlist_t) * max_children); + + for (i = 0; i < max_children; i++) + cl_qlist_init(&list_array[i]); + + mcast_mgr_subdivide(sm, mlid_ho, p_sw, p_list, list_array, max_children); + + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + + /* + Add the upstream port to the forwarding table unless + we're at the root of the spanning tree. + */ + if (depth > 1) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Adding upstream port %u\n", upstream_port); + + CL_ASSERT(upstream_port); + osm_mcast_tbl_set(p_tbl, mlid_ho, upstream_port); + } + + /* + For each port that was allocated some routes, + recurse into this function to continue building the tree + if the node on the other end of that port is another switch. + Otherwise, the node is an endpoint, and we've found a leaf + of the tree. Mark leaves with our special pointer value. + */ + + for (i = 0; i < max_children; i++) { + const osm_physp_t *p_physp; + const osm_physp_t *p_remote_physp; + osm_node_t *p_node; + const osm_node_t *p_remote_node; + + p_port_list = &list_array[i]; + + count = cl_qlist_count(p_port_list); + + /* + There should be no children routed through the upstream port! + */ + CL_ASSERT(upstream_port == 0 || i != upstream_port || + (i == upstream_port && count == 0)); + + if (count == 0) + continue; /* No routes down this port. */ + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Routing %zu destinations via switch port %u\n", + count, i); + + if (i == 0) { + /* This means we are adding the switch to the MC group. + We do not need to continue looking at the remote + port, just needed to add the port to the table */ + CL_ASSERT(count == 1); + + osm_mcast_tbl_set(p_tbl, mlid_ho, i); + + p_wobj = (osm_mcast_work_obj_t *) + cl_qlist_remove_head(p_port_list); + mcast_work_obj_delete(p_wobj); + continue; + } + + p_node = p_sw->p_node; + p_remote_node = osm_node_get_remote_node(p_node, i, NULL); + if (!p_remote_node) { + /* + * If we reached here, it means the minhop table has + * invalid entries that leads to disconnected ports. + * + * A possible reason for the code to reach here is + * that ucast cache is enabled, and a leaf switch that + * is used as a non-leaf switch in a multicast has been + * removed from the fabric. + * + * When it happens, we should invalidate the cache + * and force rerouting of the fabric. + */ + + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0A1E: Tried to route MLID 0x%X through " + "disconnected switch 0x%" PRIx64 " port %d\n", + mlid_ho, cl_ntoh64(node_guid), i); + + /* Free memory */ + mcast_mgr_purge_list(sm, mlid_ho, p_port_list); + + /* Invalidate ucast cache */ + if (sm->ucast_mgr.p_subn->opt.use_ucast_cache && + sm->ucast_mgr.cache_valid) { + OSM_LOG(sm->p_log, OSM_LOG_INFO, + "Unicast Cache will be invalidated due " + "to multicast routing errors\n"); + osm_ucast_cache_invalidate(&sm->ucast_mgr); + sm->p_subn->force_heavy_sweep = TRUE; + } + + continue; + } + + /* + This port routes frames for this mcast group. Therefore, + set the appropriate bit in the multicast forwarding + table for this switch. + */ + osm_mcast_tbl_set(p_tbl, mlid_ho, i); + + if (osm_node_get_type(p_remote_node) == IB_NODE_TYPE_SWITCH) { + /* + Acquire a pointer to the remote switch then recurse. + */ + CL_ASSERT(p_remote_node->sw); + + p_physp = osm_node_get_physp_ptr(p_node, i); + CL_ASSERT(p_physp); + + p_remote_physp = osm_physp_get_remote(p_physp); + CL_ASSERT(p_remote_physp); + + p_mtn->child_array[i] = + mcast_mgr_branch(sm, mlid_ho, p_remote_node->sw, + p_port_list, depth, + osm_physp_get_port_num + (p_remote_physp), p_max_depth); + } else { + /* + The neighbor node is not a switch, so this + must be a leaf. + */ + CL_ASSERT(count == 1); + + p_mtn->child_array[i] = OSM_MTREE_LEAF; + p_wobj = (osm_mcast_work_obj_t *) + cl_qlist_remove_head(p_port_list); + + CL_ASSERT(cl_is_qlist_empty(p_port_list)); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Found leaf for port 0x%016" PRIx64 + " on switch port %u\n", + cl_ntoh64(osm_port_get_guid(p_wobj->p_port)), + i); + mcast_work_obj_delete(p_wobj); + } + } + + free(list_array); +Exit: + OSM_LOG_EXIT(sm->p_log); + return p_mtn; +} + +static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm, + osm_mgrp_box_t * mbox) +{ + cl_qlist_t port_list; + cl_qmap_t port_map; + uint32_t num_ports; + osm_switch_t *p_sw; + ib_api_status_t status = IB_SUCCESS; + uint8_t max_depth = 0; + + OSM_LOG_ENTER(sm->p_log); + + /* + TO DO - for now, just blow away the old tree. + In the future we'll need to construct the tree based + on multicast forwarding table information if the user wants to + preserve existing multicast routes. + */ + osm_purge_mtree(sm, mbox); + + /* build the first "subset" containing all member ports */ + if (osm_mcast_make_port_list_and_map(&port_list, &port_map, mbox)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A10: " + "Insufficient memory to make port list\n"); + status = IB_ERROR; + goto Exit; + } + + num_ports = cl_qlist_count(&port_list); + if (num_ports < 2) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "MLID 0x%X has %u members - nothing to do\n", + mbox->mlid, num_ports); + osm_mcast_drop_port_list(&port_list); + goto Exit; + } + + /* + This function builds the single spanning tree recursively. + At each stage, the ports to be reached are divided into + non-overlapping subsets of member ports that can be reached through + a given switch port. Construction then moves down each + branch, and the process starts again with each branch computing + for its own subset of the member ports. + + The maximum recursion depth is at worst the maximum hop count in the + subnet, which is spec limited to 64. + */ + + /* + Locate the switch around which to create the spanning + tree for this multicast group. + */ + p_sw = osm_mcast_mgr_find_root_switch(sm, &port_list); + if (p_sw == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A08: " + "Unable to locate a suitable switch for group 0x%X\n", + mbox->mlid); + osm_mcast_drop_port_list(&port_list); + status = IB_ERROR; + goto Exit; + } + + mbox->root = mcast_mgr_branch(sm, mbox->mlid, p_sw, &port_list, 0, 0, + &max_depth); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Configured MLID 0x%X for %u ports, max tree depth = %u\n", + mbox->mlid, num_ports, max_depth); +Exit: + OSM_LOG_EXIT(sm->p_log); + return status; +} + +#if 0 +/* unused */ +void osm_mcast_mgr_set_table(osm_sm_t * sm, IN const osm_mgrp_t * p_mgrp, + IN const osm_mtree_node_t * p_mtn) +{ + uint8_t i; + uint8_t max_children; + osm_mtree_node_t *p_child_mtn; + uint16_t mlid_ho; + osm_mcast_tbl_t *p_tbl; + osm_switch_t *p_sw; + + OSM_LOG_ENTER(sm->p_log); + + mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); + p_sw = osm_mtree_node_get_switch_ptr(p_mtn); + + CL_ASSERT(p_sw); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Configuring MLID 0x%X on switch 0x%" PRIx64 "\n", + mlid_ho, osm_node_get_node_guid(p_sw->p_node)); + + /* + For every child of this tree node, set the corresponding + bit in the switch's mcast table. + */ + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + max_children = osm_mtree_node_get_max_children(p_mtn); + + CL_ASSERT(max_children <= osm_switch_get_num_ports(p_sw)); + + osm_mcast_tbl_clear_mlid(p_tbl, mlid_ho); + + for (i = 0; i < max_children; i++) { + p_child_mtn = osm_mtree_node_get_child(p_mtn, i); + if (p_child_mtn == NULL) + continue; + + osm_mcast_tbl_set(p_tbl, mlid_ho, i); + } + + OSM_LOG_EXIT(sm->p_log); +} +#endif + +static void mcast_mgr_clear(osm_sm_t * sm, uint16_t mlid) +{ + osm_switch_t *p_sw; + cl_qmap_t *p_sw_tbl; + osm_mcast_tbl_t *p_mcast_tbl; + + OSM_LOG_ENTER(sm->p_log); + + /* Walk the switches and clear the routing entries for this MLID. */ + p_sw_tbl = &sm->p_subn->sw_guid_tbl; + p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); + while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { + p_mcast_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + osm_mcast_tbl_clear_mlid(p_mcast_tbl, mlid); + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + } + + OSM_LOG_EXIT(sm->p_log); +} + +#if 0 +/* TO DO - make this real -- at least update spanning tree */ +/********************************************************************** + Lock must be held on entry. +**********************************************************************/ +ib_api_status_t osm_mcast_mgr_process_single(osm_sm_t * sm, + IN ib_net16_t const mlid, + IN ib_net64_t const port_guid, + IN uint8_t const join_state) +{ + uint8_t port_num; + uint16_t mlid_ho; + ib_net64_t sw_guid; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + osm_node_t *p_remote_node; + osm_mcast_tbl_t *p_mcast_tbl; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(mlid); + CL_ASSERT(port_guid); + + mlid_ho = cl_ntoh16(mlid); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Attempting to add port 0x%" PRIx64 " to MLID 0x%X, " + "\n\t\t\t\tjoin state = 0x%X\n", + cl_ntoh64(port_guid), mlid_ho, join_state); + + /* + Acquire the Port object. + */ + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A01: " + "Unable to acquire port object for 0x%" PRIx64 "\n", + cl_ntoh64(port_guid)); + status = IB_ERROR; + goto Exit; + } + + p_physp = p_port->p_physp; + if (p_physp == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A05: " + "Unable to acquire phsyical port object for 0x%" PRIx64 + "\n", cl_ntoh64(port_guid)); + status = IB_ERROR; + goto Exit; + } + + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_physp == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A11: " + "Unable to acquire remote phsyical port object " + "for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); + status = IB_ERROR; + goto Exit; + } + + p_remote_node = osm_physp_get_node_ptr(p_remote_physp); + + CL_ASSERT(p_remote_node); + + sw_guid = osm_node_get_node_guid(p_remote_node); + + if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A22: " + "Remote node not a switch node 0x%" PRIx64 "\n", + cl_ntoh64(sw_guid)); + status = IB_ERROR; + goto Exit; + } + + if (!p_remote_node->sw) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A12: " + "No switch object 0x%" PRIx64 "\n", cl_ntoh64(sw_guid)); + status = IB_ERROR; + goto Exit; + } + + if (osm_switch_is_in_mcast_tree(p_remote_node->sw, mlid_ho)) { + /* + We're in luck. The switch attached to this port + is already in the multicast group, so we can just + add the specified port as a new leaf of the tree. + */ + if (join_state & (IB_JOIN_STATE_FULL | IB_JOIN_STATE_NON)) { + /* + This node wants to receive multicast frames. + Get the switch port number to which the new member port + is attached, then configure this single mcast table. + */ + port_num = osm_physp_get_port_num(p_remote_physp); + CL_ASSERT(port_num); + + p_mcast_tbl = + osm_switch_get_mcast_tbl_ptr(p_remote_node->sw); + osm_mcast_tbl_set(p_mcast_tbl, mlid_ho, port_num); + } else { + if (join_state & IB_JOIN_STATE_SEND_ONLY) + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Success. Nothing to do for send" + "only member\n"); + else { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A13: " + "Unknown join state 0x%X\n", + join_state); + status = IB_ERROR; + goto Exit; + } + } + } else + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Unable to add port\n"); + +Exit: + OSM_LOG_EXIT(sm->p_log); + return status; +} +#endif + +/********************************************************************** + Process the entire group. + NOTE : The lock should be held externally! + **********************************************************************/ +static ib_api_status_t mcast_mgr_process_mlid(osm_sm_t * sm, uint16_t mlid) +{ + ib_api_status_t status = IB_SUCCESS; + struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used; + osm_mgrp_box_t *mbox; + + OSM_LOG_ENTER(sm->p_log); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Processing multicast group with mlid 0x%X\n", mlid); + + /* Clear the multicast tables to start clean, then build + the spanning tree which sets the mcast table bits for each + port in the group. */ + mcast_mgr_clear(sm, mlid); + + mbox = osm_get_mbox_by_mlid(sm->p_subn, cl_hton16(mlid)); + if (mbox) { + if (re && re->mcast_build_stree) + status = re->mcast_build_stree(re->context, mbox); + else + status = mcast_mgr_build_spanning_tree(sm, mbox); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: " + "Unable to create spanning tree (%s) for mlid " + "0x%x\n", ib_get_err_str(status), mlid); + } + + OSM_LOG_EXIT(sm->p_log); + return status; +} + +static void mcast_mgr_set_mfttop(IN osm_sm_t * sm, IN osm_switch_t * p_sw) +{ + osm_node_t *p_node; + osm_dr_path_t *p_path; + osm_physp_t *p_physp; + osm_mcast_tbl_t *p_tbl; + osm_madw_context_t context; + ib_api_status_t status; + ib_switch_info_t si; + ib_net16_t mcast_top; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_sw); + + p_node = p_sw->p_node; + + CL_ASSERT(p_node); + + p_physp = osm_node_get_physp_ptr(p_node, 0); + p_path = osm_physp_get_dr_path_ptr(p_physp); + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + + if (sm->p_subn->opt.use_mfttop && + p_physp->port_info.capability_mask & IB_PORT_CAP_HAS_MCAST_FDB_TOP) { + /* + Set the top of the multicast forwarding table. + */ + si = p_sw->switch_info; + if (sm->p_subn->first_time_master_sweep == TRUE) + mcast_top = cl_hton16(sm->mlids_init_max); + else { + if (p_tbl->max_block_in_use == -1) + mcast_top = cl_hton16(IB_LID_MCAST_START_HO - 1); + else + mcast_top = cl_hton16(IB_LID_MCAST_START_HO + + (p_tbl->max_block_in_use + 1) * IB_MCAST_BLOCK_SIZE - 1); + } + if (mcast_top == si.mcast_top) + return; + + si.mcast_top = mcast_top; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Setting switch MFT top to MLID 0x%x\n", + cl_ntoh16(si.mcast_top)); + + context.si_context.light_sweep = FALSE; + context.si_context.node_guid = osm_node_get_node_guid(p_node); + context.si_context.set_method = TRUE; + context.si_context.lft_top_change = FALSE; + + status = osm_req_set(sm, p_path, (uint8_t *) & si, + sizeof(si), IB_MAD_ATTR_SWITCH_INFO, + 0, FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A1B: " + "Sending SwitchInfo attribute failed (%s)\n", + ib_get_err_str(status)); + } +} + +static int mcast_mgr_set_mftables(osm_sm_t * sm) +{ + cl_qmap_t *p_sw_tbl = &sm->p_subn->sw_guid_tbl; + osm_switch_t *p_sw; + osm_mcast_tbl_t *p_tbl; + int block_notdone, ret = 0; + int16_t block_num, max_block = -1; + + p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); + while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { + p_sw->mft_block_num = 0; + p_sw->mft_position = 0; + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + if (osm_mcast_tbl_get_max_block_in_use(p_tbl) > max_block) + max_block = osm_mcast_tbl_get_max_block_in_use(p_tbl); + mcast_mgr_set_mfttop(sm, p_sw); + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + } + + /* Stripe the MFT blocks across the switches */ + for (block_num = 0; block_num <= max_block; block_num++) { + block_notdone = 1; + while (block_notdone) { + block_notdone = 0; + p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); + while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { + if (p_sw->mft_block_num == block_num) { + block_notdone = 1; + if (mcast_mgr_set_mft_block(sm, p_sw, + p_sw->mft_block_num, + p_sw->mft_position)) + ret = -1; + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + if (++p_sw->mft_position > p_tbl->max_position) { + p_sw->mft_position = 0; + p_sw->mft_block_num++; + } + } + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + } + } + } + + return ret; +} + +static int alloc_mfts(osm_sm_t * sm) +{ + int i; + cl_map_item_t *item; + osm_switch_t *p_sw; + + for (i = sm->p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO; i >= 0; + i--) + if (sm->p_subn->mboxes[i]) + break; + if (i < 0) + return 0; + + /* Now, walk switches and (re)allocate multicast tables */ + for (item = cl_qmap_head(&sm->p_subn->sw_guid_tbl); + item != cl_qmap_end(&sm->p_subn->sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *) item; + if (osm_mcast_tbl_realloc(&p_sw->mcast_tbl, i)) + return -1; + } + return 0; +} + +/********************************************************************** + This is the function that is invoked during idle time and sweep to + handle the process request for mcast groups where join/leave/delete + was required. + **********************************************************************/ +int osm_mcast_mgr_process(osm_sm_t * sm, boolean_t config_all) +{ + int ret = 0; + unsigned i; + unsigned max_mlid; + + OSM_LOG_ENTER(sm->p_log); + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + + /* If there are no switches in the subnet we have nothing to do. */ + if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "No switches in subnet. Nothing to do\n"); + goto exit; + } + + if (alloc_mfts(sm)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0A09: alloc_mfts failed\n"); + ret = -1; + goto exit; + } + + max_mlid = config_all ? sm->p_subn->max_mcast_lid_ho + - IB_LID_MCAST_START_HO : sm->mlids_req_max; + for (i = 0; i <= max_mlid; i++) { + if (sm->mlids_req[i] || + (config_all && sm->p_subn->mboxes[i])) { + sm->mlids_req[i] = 0; + mcast_mgr_process_mlid(sm, i + IB_LID_MCAST_START_HO); + } + } + + sm->mlids_req_max = 0; + + ret = mcast_mgr_set_mftables(sm); + + osm_dump_mcast_routes(sm->p_subn->p_osm); + +exit: + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG_EXIT(sm->p_log); + return ret; +} diff --git a/opensm/osm_mcast_tbl.c b/opensm/osm_mcast_tbl.c new file mode 100644 index 0000000..617fd50 --- /dev/null +++ b/opensm/osm_mcast_tbl.c @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mcast_tbl_t. + * This object represents a multicast forwarding table. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_MCAST_TBL_C +#include + +void osm_mcast_tbl_init(IN osm_mcast_tbl_t * p_tbl, IN uint8_t num_ports, + IN uint16_t capacity) +{ + CL_ASSERT(p_tbl); + CL_ASSERT(num_ports); + + memset(p_tbl, 0, sizeof(*p_tbl)); + + p_tbl->max_block_in_use = -1; + + if (capacity == 0) { + /* + This switch apparently doesn't support multicast. + Everything is initialized to zero already, so return. + */ + return; + } + + p_tbl->num_entries = capacity; + p_tbl->num_ports = num_ports; + p_tbl->max_position = + (uint8_t) ((ROUNDUP(num_ports, IB_MCAST_MASK_SIZE) / + IB_MCAST_MASK_SIZE) - 1); + + p_tbl->max_block = (uint16_t) ((ROUNDUP(p_tbl->num_entries, + IB_MCAST_BLOCK_SIZE) / + IB_MCAST_BLOCK_SIZE) - 1); +} + +void osm_mcast_tbl_destroy(IN osm_mcast_tbl_t * p_tbl) +{ + free(p_tbl->p_mask_tbl); +} + +void osm_mcast_tbl_set(IN osm_mcast_tbl_t * p_tbl, IN uint16_t mlid_ho, + IN uint8_t port) +{ + unsigned mlid_offset, mask_offset, bit_mask; + int16_t block_num; + + CL_ASSERT(p_tbl && p_tbl->p_mask_tbl); + CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO); + CL_ASSERT(mlid_ho <= p_tbl->max_mlid_ho); + + mlid_offset = mlid_ho - IB_LID_MCAST_START_HO; + mask_offset = port / IB_MCAST_MASK_SIZE; + bit_mask = cl_ntoh16((uint16_t) (1 << (port % IB_MCAST_MASK_SIZE))); + (*p_tbl->p_mask_tbl)[mlid_offset][mask_offset] |= bit_mask; + + block_num = (int16_t) (mlid_offset / IB_MCAST_BLOCK_SIZE); + + if (block_num > p_tbl->max_block_in_use) + p_tbl->max_block_in_use = (uint16_t) block_num; +} + +int osm_mcast_tbl_realloc(IN osm_mcast_tbl_t * p_tbl, IN unsigned mlid_offset) +{ + size_t mft_depth, size; + uint16_t (*p_mask_tbl)[][IB_MCAST_POSITION_MAX + 1]; + + if (mlid_offset < p_tbl->mft_depth) + goto done; + + /* + The number of bytes needed in the mask table is: + The (maximum bit mask 'position' + 1) times the + number of bytes in each bit mask times the + number of MLIDs supported by the table. + + We must always allocate the array with the maximum position + since it is (and must be) defined that way the table structure + in order to create a pointer to a two dimensional array. + */ + mft_depth = (mlid_offset / IB_MCAST_BLOCK_SIZE + 1) * IB_MCAST_BLOCK_SIZE; + size = mft_depth * (IB_MCAST_POSITION_MAX + 1) * IB_MCAST_MASK_SIZE / 8; + p_mask_tbl = realloc(p_tbl->p_mask_tbl, size); + if (!p_mask_tbl) + return -1; + memset((uint8_t *)p_mask_tbl + p_tbl->mft_depth * (IB_MCAST_POSITION_MAX + 1) * IB_MCAST_MASK_SIZE / 8, + 0, + size - p_tbl->mft_depth * (IB_MCAST_POSITION_MAX + 1) * IB_MCAST_MASK_SIZE / 8); + p_tbl->p_mask_tbl = p_mask_tbl; + p_tbl->mft_depth = mft_depth; +done: + p_tbl->max_mlid_ho = mlid_offset + IB_LID_MCAST_START_HO; + return 0; +} + +boolean_t osm_mcast_tbl_is_port(IN const osm_mcast_tbl_t * p_tbl, + IN uint16_t mlid_ho, IN uint8_t port_num) +{ + unsigned mlid_offset, mask_offset, bit_mask; + + CL_ASSERT(p_tbl); + + if (p_tbl->p_mask_tbl) { + CL_ASSERT(port_num <= + (p_tbl->max_position + 1) * IB_MCAST_MASK_SIZE); + CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO); + CL_ASSERT(mlid_ho <= p_tbl->max_mlid_ho); + + mlid_offset = mlid_ho - IB_LID_MCAST_START_HO; + mask_offset = port_num / IB_MCAST_MASK_SIZE; + bit_mask = cl_ntoh16((uint16_t) + (1 << (port_num % IB_MCAST_MASK_SIZE))); + return (((*p_tbl-> + p_mask_tbl)[mlid_offset][mask_offset] & bit_mask) == + bit_mask); + } + + return FALSE; +} + +boolean_t osm_mcast_tbl_is_any_port(IN const osm_mcast_tbl_t * p_tbl, + IN uint16_t mlid_ho) +{ + unsigned mlid_offset; + uint8_t position; + uint16_t result = 0; + + CL_ASSERT(p_tbl); + + if (p_tbl->p_mask_tbl) { + CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO); + CL_ASSERT(mlid_ho <= p_tbl->max_mlid_ho); + + mlid_offset = mlid_ho - IB_LID_MCAST_START_HO; + + for (position = 0; position <= p_tbl->max_position; position++) + result |= (*p_tbl->p_mask_tbl)[mlid_offset][position]; + } + + return (result != 0); +} + +ib_api_status_t osm_mcast_tbl_set_block(IN osm_mcast_tbl_t * p_tbl, + IN const ib_net16_t * p_block, + IN int16_t block_num, + IN uint8_t position) +{ + uint32_t i; + uint16_t mlid_start_ho; + + CL_ASSERT(p_tbl); + CL_ASSERT(p_block); + + if (block_num > p_tbl->max_block) + return IB_INVALID_PARAMETER; + + if (position > p_tbl->max_position) + return IB_INVALID_PARAMETER; + + mlid_start_ho = (uint16_t) (block_num * IB_MCAST_BLOCK_SIZE); + + if (mlid_start_ho + IB_MCAST_BLOCK_SIZE - 1 > p_tbl->mft_depth) + return IB_INVALID_PARAMETER; + + for (i = 0; i < IB_MCAST_BLOCK_SIZE; i++) + (*p_tbl->p_mask_tbl)[mlid_start_ho + i][position] = p_block[i]; + + if (block_num > p_tbl->max_block_in_use) + p_tbl->max_block_in_use = (uint16_t) block_num; + + return IB_SUCCESS; +} + +void osm_mcast_tbl_clear_mlid(IN osm_mcast_tbl_t * p_tbl, IN uint16_t mlid_ho) +{ + unsigned mlid_offset; + + CL_ASSERT(p_tbl); + CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO); + + mlid_offset = mlid_ho - IB_LID_MCAST_START_HO; + if (p_tbl->p_mask_tbl && mlid_offset < p_tbl->mft_depth) + memset((uint8_t *)p_tbl->p_mask_tbl + mlid_offset * (IB_MCAST_POSITION_MAX + 1) * IB_MCAST_MASK_SIZE / 8, + 0, + (IB_MCAST_POSITION_MAX + 1) * IB_MCAST_MASK_SIZE / 8); +} + +boolean_t osm_mcast_tbl_get_block(IN osm_mcast_tbl_t * p_tbl, + IN int16_t block_num, IN uint8_t position, + OUT ib_net16_t * p_block) +{ + uint32_t i; + uint16_t mlid_start_ho; + + CL_ASSERT(p_tbl); + CL_ASSERT(p_block); + + if (block_num > p_tbl->max_block_in_use) + return FALSE; + + if (position > p_tbl->max_position) { + /* + Caller shouldn't do this for efficiency's sake... + */ + memset(p_block, 0, IB_SMP_DATA_SIZE); + return TRUE; + } + + CL_ASSERT(block_num * IB_MCAST_BLOCK_SIZE <= p_tbl->mft_depth); + + mlid_start_ho = (uint16_t) (block_num * IB_MCAST_BLOCK_SIZE); + + for (i = 0; i < IB_MCAST_BLOCK_SIZE; i++) + p_block[i] = (*p_tbl->p_mask_tbl)[mlid_start_ho + i][position]; + + return TRUE; +} diff --git a/opensm/osm_mcm_port.c b/opensm/osm_mcm_port.c new file mode 100644 index 0000000..65cef0b --- /dev/null +++ b/opensm/osm_mcm_port.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mcm_port_t. + * This object represents the membership of a port in a multicast group. + * This object is part of the OpenSM family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_MCM_PORT_C +#include +#include + +osm_mcm_port_t *osm_mcm_port_new(IN osm_port_t *port, IN osm_mgrp_t *mgrp) +{ + osm_mcm_port_t *p_mcm; + + p_mcm = malloc(sizeof(*p_mcm)); + if (p_mcm) { + memset(p_mcm, 0, sizeof(*p_mcm)); + p_mcm->port = port; + p_mcm->mgrp = mgrp; + } + + return p_mcm; +} + +void osm_mcm_port_delete(IN osm_mcm_port_t * p_mcm) +{ + CL_ASSERT(p_mcm); + + free(p_mcm); +} + +osm_mcm_alias_guid_t *osm_mcm_alias_guid_new(IN osm_mcm_port_t *p_base_mcm_port, + IN ib_member_rec_t *mcmr, + IN boolean_t proxy) +{ + osm_mcm_alias_guid_t *p_mcm_alias_guid; + + p_mcm_alias_guid = calloc(1, sizeof(*p_mcm_alias_guid)); + if (p_mcm_alias_guid) { + p_mcm_alias_guid->alias_guid = mcmr->port_gid.unicast.interface_id; + p_mcm_alias_guid->p_base_mcm_port = p_base_mcm_port; + p_mcm_alias_guid->port_gid.unicast.prefix = mcmr->port_gid.unicast.prefix; + p_mcm_alias_guid->port_gid.unicast.interface_id = mcmr->port_gid.unicast.interface_id; + p_mcm_alias_guid->scope_state = mcmr->scope_state; + p_mcm_alias_guid->proxy_join = proxy; + } + + return p_mcm_alias_guid; +} + +void osm_mcm_alias_guid_delete(IN OUT osm_mcm_alias_guid_t ** pp_mcm_alias_guid) +{ + free(*pp_mcm_alias_guid); + *pp_mcm_alias_guid = NULL; +} diff --git a/opensm/osm_mesh.c b/opensm/osm_mesh.c new file mode 100644 index 0000000..073a21e --- /dev/null +++ b/opensm/osm_mesh.c @@ -0,0 +1,1736 @@ +/* + * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2008,2009 System Fabric Works, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * routines to analyze certain meshes + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_MESH_C +#include +#include +#include +#include +#include + +#define MAX_DEGREE (8) +#define MAX_DIMENSION (8) +#define LARGE (0x7fffffff) + +/* + * characteristic polynomials for selected 1d through 8d tori + */ +static const struct mesh_info { + int dimension; /* dimension of the torus */ + int size[MAX_DIMENSION]; /* size of the torus */ + unsigned int degree; /* degree of polynomial */ + int poly[MAX_DEGREE+1]; /* polynomial */ +} mesh_info[] = { + {0, {0}, 0, {0}, }, + + {1, {2}, 1, {0, -1}, }, + {1, {3}, 2, {-1, 0, 1}, }, + {1, {5}, 2, {-9, 0, 1}, }, + {1, {6}, 2, {-36, 0, 1}, }, + + {2, {2, 2}, 2, {-4, 0, 1}, }, + {2, {3, 2}, 3, {8, 9, 0, -1}, }, + {2, {5, 2}, 3, {24, 17, 0, -1}, }, + {2, {6, 2}, 3, {32, 24, 0, -1}, }, + {2, {3, 3}, 4, {-15, -32, -18, 0, 1}, }, + {2, {5, 3}, 4, {-39, -64, -26, 0, 1}, }, + {2, {6, 3}, 4, {-48, -80, -33, 0, 1}, }, + {2, {5, 5}, 4, {-63, -96, -34, 0, 1}, }, + {2, {6, 5}, 4, {-48, -112, -41, 0, 1}, }, + {2, {6, 6}, 4, {0, -128, -48, 0, 1}, }, + + {3, {2, 2, 2}, 3, {16, 12, 0, -1}, }, + {3, {3, 2, 2}, 4, {-28, -48, -21, 0, 1}, }, + {3, {5, 2, 2}, 4, {-60, -80, -29, 0, 1}, }, + {3, {6, 2, 2}, 4, {-64, -96, -36, 0, 1}, }, + {3, {3, 3, 2}, 5, {48, 127, 112, 34, 0, -1}, }, + {3, {5, 3, 2}, 5, {96, 215, 160, 42, 0, -1}, }, + {3, {6, 3, 2}, 5, {96, 232, 184, 49, 0, -1}, }, + {3, {5, 5, 2}, 5, {144, 303, 208, 50, 0, -1}, }, + {3, {6, 5, 2}, 5, {96, 296, 232, 57, 0, -1}, }, + {3, {6, 6, 2}, 5, {0, 256, 256, 64, 0, -1}, }, + {3, {3, 3, 3}, 6, {-81, -288, -381, -224, -51, 0, 1}, }, + {3, {5, 3, 3}, 6, {-153, -480, -557, -288, -59, 0, 1}, }, + {3, {6, 3, 3}, 6, {-144, -480, -591, -320, -66, 0, 1}, }, + {3, {5, 5, 3}, 6, {-225, -672, -733, -352, -67, 0, 1}, }, + {3, {6, 5, 3}, 6, {-144, -576, -743, -384, -74, 0, 1}, }, + {3, {6, 6, 3}, 6, {0, -384, -720, -416, -81, 0, 1}, }, + {3, {5, 5, 5}, 6, {-297, -864, -909, -416, -75, 0, 1}, }, + {3, {6, 5, 5}, 6, {-144, -672, -895, -448, -82, 0, 1}, }, + {3, {6, 6, 5}, 6, {0, -384, -848, -480, -89, 0, 1}, }, + {3, {6, 6, 6}, 6, {0, 0, -768, -512, -96, 0, 1}, }, + + {4, {2, 2, 2, 2}, 4, {-48, -64, -24, 0, 1}, }, + {4, {3, 2, 2, 2}, 5, {80, 180, 136, 37, 0, -1}, }, + {4, {5, 2, 2, 2}, 5, {144, 276, 184, 45, 0, -1}, }, + {4, {6, 2, 2, 2}, 5, {128, 288, 208, 52, 0, -1}, }, + {4, {3, 3, 2, 2}, 6, {-132, -416, -487, -256, -54, 0, 1}, }, + {4, {5, 3, 2, 2}, 6, {-228, -640, -671, -320, -62, 0, 1}, }, + {4, {6, 3, 2, 2}, 6, {-192, -608, -700, -352, -69, 0, 1}, }, + {4, {5, 5, 2, 2}, 6, {-324, -864, -855, -384, -70, 0, 1}, }, + {4, {6, 5, 2, 2}, 6, {-192, -736, -860, -416, -77, 0, 1}, }, + {4, {6, 6, 2, 2}, 6, {0, -512, -832, -448, -84, 0, 1}, }, + {4, {3, 3, 3, 2}, 7, {216, 873, 1392, 1101, 440, 75, 0, -1}, }, + {4, {5, 3, 3, 2}, 7, {360, 1329, 1936, 1405, 520, 83, 0, -1}, }, + {4, {6, 3, 3, 2}, 7, {288, 1176, 1872, 1455, 560, 90, 0, -1}, }, + {4, {5, 5, 3, 2}, 7, {504, 1785, 2480, 1709, 600, 91, 0, -1}, }, + {4, {6, 5, 3, 2}, 7, {288, 1368, 2272, 1735, 640, 98, 0, -1}, }, + {4, {6, 6, 3, 2}, 7, {0, 768, 1920, 1728, 680, 105, 0, -1}, }, + {4, {5, 5, 5, 2}, 7, {648, 2241, 3024, 2013, 680, 99, 0, -1}, }, + {4, {6, 5, 5, 2}, 7, {288, 1560, 2672, 2015, 720, 106, 0, -1}, }, + {4, {6, 6, 5, 2}, 7, {0, 768, 2176, 1984, 760, 113, 0, -1}, }, + {4, {6, 6, 6, 2}, 7, {0, 0, 1536, 1920, 800, 120, 0, -1}, }, + {4, {3, 3, 3, 3}, 8, {-351, -1728, -3492, -3712, -2202, -704, -100, 0, 1}, }, + {4, {5, 3, 3, 3}, 8, {-567, -2592, -4860, -4800, -2658, -800, -108, 0, 1}, }, + {4, {6, 3, 3, 3}, 8, {-432, -2160, -4401, -4672, -2733, -848, -115, 0, 1}, }, + {4, {5, 5, 3, 3}, 8, {-783, -3456, -6228, -5888, -3114, -896, -116, 0, 1}, }, + {4, {6, 5, 3, 3}, 8, {-432, -2448, -5241, -5568, -3165, -944, -123, 0, 1}, }, + {4, {6, 6, 3, 3}, 8, {0, -1152, -3888, -5056, -3183, -992, -130, 0, 1}, }, + {4, {5, 5, 5, 3}, 8, {-999, -4320, -7596, -6976, -3570, -992, -124, 0, 1}, }, + {4, {6, 5, 5, 3}, 8, {-432, -2736, -6081, -6464, -3597, -1040, -131, 0, 1}, }, + {4, {6, 6, 5, 3}, 8, {0, -1152, -4272, -5760, -3591, -1088, -138, 0, 1}, }, + {4, {6, 6, 6, 3}, 8, {0, 0, -2304, -4864, -3552, -1136, -145, 0, 1}, }, + + {5, {2, 2, 2, 2, 2}, 5, {128, 240, 160, 40, 0, -1}, }, + {5, {3, 2, 2, 2, 2}, 6, {-208, -576, -600, -288, -57, 0, 1}, }, + {5, {5, 2, 2, 2, 2}, 6, {-336, -832, -792, -352, -65, 0, 1}, }, + {5, {6, 2, 2, 2, 2}, 6, {-256, -768, -816, -384, -72, 0, 1}, }, + {5, {3, 3, 2, 2, 2}, 7, {336, 1228, 1776, 1287, 480, 78, 0, -1}, }, + {5, {5, 3, 2, 2, 2}, 7, {528, 1772, 2368, 1599, 560, 86, 0, -1}, }, + {5, {6, 3, 2, 2, 2}, 7, {384, 1504, 2256, 1644, 600, 93, 0, -1}, }, + {5, {5, 5, 2, 2, 2}, 7, {720, 2316, 2960, 1911, 640, 94, 0, -1}, }, + {5, {6, 5, 2, 2, 2}, 7, {384, 1760, 2704, 1932, 680, 101, 0, -1}, }, + {5, {6, 6, 2, 2, 2}, 7, {0, 1024, 2304, 1920, 720, 108, 0, -1}, }, + {5, {3, 3, 3, 2, 2}, 8, {-540, -2448, -4557, -4480, -2481, -752, -103, 0, 1}, }, + {5, {5, 3, 3, 2, 2}, 8, {-828, -3504, -6101, -5632, -2945, -848, -111, 0, 1}, }, + {5, {6, 3, 3, 2, 2}, 8, {-576, -2784, -5412, -5440, -3015, -896, -118, 0, 1}, }, + {5, {5, 5, 3, 2, 2}, 8, {-1116, -4560, -7645, -6784, -3409, -944, -119, 0, 1}, }, + {5, {6, 5, 3, 2, 2}, 8, {-576, -3168, -6404, -6400, -3455, -992, -126, 0, 1}, }, + {5, {6, 6, 3, 2, 2}, 8, {0, -1536, -4800, -5824, -3468, -1040, -133, 0, 1}, }, + {5, {5, 5, 5, 2, 2}, 8, {-1404, -5616, -9189, -7936, -3873, -1040, -127, 0, 1}, }, + {5, {6, 5, 5, 2, 2}, 8, {-576, -3552, -7396, -7360, -3895, -1088, -134, 0, 1}, }, + {5, {6, 6, 5, 2, 2}, 8, {0, -1536, -5312, -6592, -3884, -1136, -141, 0, 1}, }, + {5, {6, 6, 6, 2, 2}, 8, {0, 0, -3072, -5632, -3840, -1184, -148, 0, 1}, }, + + {6, {2, 2, 2, 2, 2, 2}, 6, {-320, -768, -720, -320, -60, 0, 1}, }, + {6, {3, 2, 2, 2, 2, 2}, 7, {512, 1680, 2208, 1480, 520, 81, 0, -1}, }, + {6, {5, 2, 2, 2, 2, 2}, 7, {768, 2320, 2848, 1800, 600, 89, 0, -1}, }, + {6, {6, 2, 2, 2, 2, 2}, 7, {512, 1920, 2688, 1840, 640, 96, 0, -1}, }, + {6, {3, 3, 2, 2, 2, 2}, 8, {-816, -3392, -5816, -5312, -2767, -800, -106, 0, 1}, }, + {6, {5, 3, 2, 2, 2, 2}, 8, {-1200, -4672, -7544, -6528, -3239, -896, -114, 0, 1}, }, + {6, {6, 3, 2, 2, 2, 2}, 8, {-768, -3584, -6608, -6272, -3304, -944, -121, 0, 1}, }, + {6, {5, 5, 2, 2, 2, 2}, 8, {-1584, -5952, -9272, -7744, -3711, -992, -122, 0, 1}, }, + {6, {6, 5, 2, 2, 2, 2}, 8, {-768, -4096, -7760, -7296, -3752, -1040, -129, 0, 1}, }, + {6, {6, 6, 2, 2, 2, 2}, 8, {0, -2048, -5888, -6656, -3760, -1088, -136, 0, 1}, }, + + {7, {2, 2, 2, 2, 2, 2, 2}, 7, {768, 2240, 2688, 1680, 560, 84, 0, -1}, }, + {7, {3, 2, 2, 2, 2, 2, 2}, 8, {-1216, -4608, -7280, -6208, -3060, -848, -109, 0, 1}, }, + {7, {5, 2, 2, 2, 2, 2, 2}, 8, {-1728, -6144, -9200, -7488, -3540, -944, -117, 0, 1}, }, + {7, {6, 2, 2, 2, 2, 2, 2}, 8, {-1024, -4608, -8000, -7168, -3600, -992, -124, 0, 1}, }, + + {8, {2, 2, 2, 2, 2, 2, 2, 2}, 8, {-1792, -6144, -8960, -7168, -3360, -896, -112, 0, 1}, }, + + /* + * mesh errors + */ + {2, {6, 6}, 4, {-192, -256, -80, 0, 1}, }, + + {-1, {0,}, 0, {0, }, }, +}; + +/* + * per fabric mesh info + */ +typedef struct _mesh { + int num_class; /* number of switch classes */ + int *class_type; /* index of first switch found for each class */ + int *class_count; /* population of each class */ + int dimension; /* mesh dimension */ + int *size; /* an array to hold size of mesh */ + int dim_order[MAX_DIMENSION]; +} mesh_t; + +typedef struct sort_ctx { + lash_t *p_lash; + mesh_t *mesh; +} sort_ctx_t; + +typedef struct comp { + int index; + sort_ctx_t ctx; +} comp_t; + +/* + * poly_alloc + * + * allocate a polynomial of degree n + */ +static int *poly_alloc(lash_t *p_lash, int n) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int *p; + + if (!(p = calloc(n+1, sizeof(int)))) + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating poly - out of memory\n"); + + return p; +} + +/* + * print a polynomial + */ +static char *poly_print(int n, int *coeff) +{ + static char str[(MAX_DEGREE+1)*20]; + char *p = str; + int i; + int first = 1; + int t; + int sign; + + str[0] = 0; + + for (i = 0; i <= n; i++) { + if (!coeff[i]) + continue; + + if (coeff[i] < 0) { + sign = 1; + t = -coeff[i]; + } else { + sign = 0; + t = coeff[i]; + } + + p += sprintf(p, "%s", sign? "-" : (first? "" : "+")); + first = 0; + + if (t != 1 || i == 0) + p += sprintf(p, "%d", t); + + if (i) + p += sprintf(p, "x"); + if (i > 1) + p += sprintf(p, "^%d", i); + } + + return str; +} + +/* + * poly_diff + * + * return a nonzero value if polynomials differ else 0 + */ +static int poly_diff(unsigned int n, const int *p, switch_t *s) +{ + if (s->node->num_links != n) + return 1; + + return memcmp(p, s->node->poly, n*sizeof(int)); +} + +/* + * m_free + * + * free a square matrix of rank l + */ +static void m_free(int **m, int l) +{ + int i; + + if (m) { + for (i = 0; i < l; i++) { + if (m[i]) + free(m[i]); + } + free(m); + } +} + +/* + * m_alloc + * + * allocate a square matrix of rank l + */ +static int **m_alloc(lash_t *p_lash, int l) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i; + int **m = NULL; + + do { + if (!(m = calloc(l, sizeof(int *)))) + break; + + for (i = 0; i < l; i++) { + if (!(m[i] = calloc(l, sizeof(int)))) + break; + } + if (i != l) + break; + + return m; + } while (0); + + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating matrix - out of memory\n"); + + m_free(m, l); + return NULL; +} + +/* + * pm_free + * + * free a square matrix of rank l of polynomials + */ +static void pm_free(int ***m, int l) +{ + int i, j; + + if (m) { + for (i = 0; i < l; i++) { + if (m[i]) { + for (j = 0; j < l; j++) { + if (m[i][j]) + free(m[i][j]); + } + free(m[i]); + } + } + free(m); + } +} + +/* + * pm_alloc + * + * allocate a square matrix of rank l of polynomials of degree n + */ +static int ***pm_alloc(lash_t *p_lash, int l, int n) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i, j; + int ***m = NULL; + + do { + if (!(m = calloc(l, sizeof(int **)))) + break; + + for (i = 0; i < l; i++) { + if (!(m[i] = calloc(l, sizeof(int *)))) + break; + + for (j = 0; j < l; j++) { + if (!(m[i][j] = calloc(n+1, sizeof(int)))) + break; + } + if (j != l) + break; + } + if (i != l) + break; + + return m; + } while (0); + + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating matrix - out of memory\n"); + + pm_free(m, l); + return NULL; +} + +static int determinant(lash_t *p_lash, int n, int rank, int ***m, int *p); + +/* + * sub_determinant + * + * compute the determinant of a submatrix of matrix of rank l of polynomials of degree n + * with row and col removed in poly. caller must free poly + */ +static int sub_determinant(lash_t *p_lash, int n, int l, int row, int col, + int ***matrix, int **poly) +{ + int ret = -1; + int ***m = NULL; + int *p = NULL; + int i, j, k, x, y; + int rank = l - 1; + + do { + if (!(p = poly_alloc(p_lash, n))) { + break; + } + + if (rank <= 0) { + p[0] = 1; + ret = 0; + break; + } + + if (!(m = pm_alloc(p_lash, rank, n))) { + free(p); + p = NULL; + break; + } + + x = 0; + for (i = 0; i < l; i++) { + if (i == row) + continue; + + y = 0; + for (j = 0; j < l; j++) { + if (j == col) + continue; + + for (k = 0; k <= n; k++) + m[x][y][k] = matrix[i][j][k]; + + y++; + } + x++; + } + + if (determinant(p_lash, n, rank, m, p)) { + free(p); + p = NULL; + break; + } + + ret = 0; + } while (0); + + pm_free(m, rank); + *poly = p; + return ret; +} + +/* + * determinant + * + * compute the determinant of matrix m of rank of polynomials of degree deg + * and add the result to polynomial p allocated by caller + */ +static int determinant(lash_t *p_lash, int deg, int rank, int ***m, int *p) +{ + int i, j, k; + int *q; + int sign = 1; + + /* + * handle simple case of 1x1 matrix + */ + if (rank == 1) { + for (i = 0; i <= deg; i++) + p[i] += m[0][0][i]; + } + + /* + * handle simple case of 2x2 matrix + */ + else if (rank == 2) { + for (i = 0; i <= deg; i++) { + if (m[0][0][i] == 0) + continue; + + for (j = 0; j <= deg; j++) { + if (m[1][1][j] == 0) + continue; + + p[i+j] += m[0][0][i]*m[1][1][j]; + } + } + + for (i = 0; i <= deg; i++) { + if (m[0][1][i] == 0) + continue; + + for (j = 0; j <= deg; j++) { + if (m[1][0][j] == 0) + continue; + + p[i+j] -= m[0][1][i]*m[1][0][j]; + } + } + } + + /* + * handle the general case + */ + else { + for (i = 0; i < rank; i++) { + if (sub_determinant(p_lash, deg, rank, 0, i, m, &q)) + return -1; + + for (j = 0; j <= deg; j++) { + if (m[0][i][j] == 0) + continue; + + for (k = 0; k <= deg; k++) { + if (q[k] == 0) + continue; + + p[j+k] += sign*m[0][i][j]*q[k]; + } + } + + free(q); + sign = -sign; + } + } + + return 0; +} + +/* + * char_poly + * + * compute the characteristic polynomial of matrix of rank + * by computing the determinant of m-x*I and return in poly + * as an array. caller must free poly + */ +static int char_poly(lash_t *p_lash, int rank, int **matrix, int **poly) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int ret = -1; + int i, j; + int ***m = NULL; + int *p = NULL; + int deg = rank; + + OSM_LOG_ENTER(p_log); + + do { + if (!matrix) + break; + + if (!(p = poly_alloc(p_lash, deg))) + break; + + if (!(m = pm_alloc(p_lash, rank, deg))) { + free(p); + p = NULL; + break; + } + + for (i = 0; i < rank; i++) { + for (j = 0; j < rank; j++) { + m[i][j][0] = matrix[i][j]; + } + m[i][i][1] = -1; + } + + if (determinant(p_lash, deg, rank, m, p)) { + free(p); + p = NULL; + break; + } + + ret = 0; + } while (0); + + pm_free(m, rank); + *poly = p; + + OSM_LOG_EXIT(p_log); + return ret; +} + +/* + * get_switch_metric + * + * compute the matrix of minimum distances between each of + * the adjacent switch nodes to sw along paths + * that do not go through sw. do calculation by + * relaxation method + * allocate space for the matrix and save in node_t structure + */ +static int get_switch_metric(lash_t *p_lash, int sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int ret = -1; + unsigned int i, j, change; + int sw1, sw2, sw3; + switch_t *s = p_lash->switches[sw]; + switch_t *s1, *s2, *s3; + int **m; + mesh_node_t *node = s->node; + unsigned int num_links = node->num_links; + + OSM_LOG_ENTER(p_log); + + do { + if (!(m = m_alloc(p_lash, num_links))) + break; + + for (i = 0; i < num_links; i++) { + sw1 = node->links[i]->switch_id; + s1 = p_lash->switches[sw1]; + + /* make all distances big except s1 to itself */ + for (sw2 = 0; sw2 < p_lash->num_switches; sw2++) + p_lash->switches[sw2]->node->temp = LARGE; + + s1->node->temp = 0; + + do { + change = 0; + + for (sw2 = 0; sw2 < p_lash->num_switches; sw2++) { + s2 = p_lash->switches[sw2]; + if (s2->node->temp == LARGE) + continue; + for (j = 0; j < s2->node->num_links; j++) { + sw3 = s2->node->links[j]->switch_id; + s3 = p_lash->switches[sw3]; + + if (sw3 == sw) + continue; + + if ((s2->node->temp + 1) < s3->node->temp) { + s3->node->temp = s2->node->temp + 1; + change++; + } + } + } + } while (change); + + for (j = 0; j < num_links; j++) { + sw2 = node->links[j]->switch_id; + s2 = p_lash->switches[sw2]; + m[i][j] = s2->node->temp; + } + } + + if (char_poly(p_lash, num_links, m, &node->poly)) { + m_free(m, num_links); + m = NULL; + break; + } + + ret = 0; + } while (0); + + node->matrix = m; + + OSM_LOG_EXIT(p_log); + return ret; +} + +/* + * classify_switch + * + * add switch to histogram of switch types + * we keep a reference to the first switch + * found of each type as an exemplar + */ +static void classify_switch(lash_t *p_lash, mesh_t *mesh, int sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i; + switch_t *s = p_lash->switches[sw]; + switch_t *s1; + + OSM_LOG_ENTER(p_log); + + if (!s->node->poly) + goto done; + + for (i = 0; i < mesh->num_class; i++) { + s1 = p_lash->switches[mesh->class_type[i]]; + + if (poly_diff(s->node->num_links, s->node->poly, s1)) + continue; + + mesh->class_count[i]++; + goto done; + } + + mesh->class_type[mesh->num_class] = sw; + mesh->class_count[mesh->num_class] = 1; + mesh->num_class++; + +done: + OSM_LOG_EXIT(p_log); +} + +/* + * classify_mesh_type + * + * try to look up node polynomial in table + */ +static void classify_mesh_type(lash_t *p_lash, int sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i; + switch_t *s = p_lash->switches[sw]; + const struct mesh_info *t; + + OSM_LOG_ENTER(p_log); + + if (!s->node->poly) + goto done; + + for (i = 1; (t = &mesh_info[i])->dimension != -1; i++) { + if (poly_diff(t->degree, t->poly, s)) + continue; + + s->node->type = i; + s->node->dimension = t->dimension; + OSM_LOG_EXIT(p_log); + return; + } + +done: + s->node->type = 0; + OSM_LOG_EXIT(p_log); + return; +} + +/* + * remove_edges + * + * remove type from nodes that have fewer links + * than adjacent nodes + */ +static void remove_edges(lash_t *p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int sw; + mesh_node_t *n, *nn; + unsigned i; + + OSM_LOG_ENTER(p_log); + + for (sw = 0; sw < p_lash->num_switches; sw++) { + n = p_lash->switches[sw]->node; + if (!n->type) + continue; + + for (i = 0; i < n->num_links; i++) { + nn = p_lash->switches[n->links[i]->switch_id]->node; + + if (nn->num_links > n->num_links) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "removed edge switch %s\n", + p_lash->switches[sw]->p_sw->p_node->print_desc); + n->type = -1; + break; + } + } + } + + OSM_LOG_EXIT(p_log); +} + +/* + * get_local_geometry + * + * analyze the local geometry around each switch + */ +static int get_local_geometry(lash_t *p_lash, mesh_t *mesh) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int sw; + int status = 0; + + OSM_LOG_ENTER(p_log); + + for (sw = 0; sw < p_lash->num_switches; sw++) { + /* + * skip switches with more links than MAX_DEGREE + * since they will never match a known case + */ + if (p_lash->switches[sw]->node->num_links > MAX_DEGREE) + continue; + + if (get_switch_metric(p_lash, sw)) { + status = -1; + goto Exit; + } + classify_mesh_type(p_lash, sw); + } + + remove_edges(p_lash); + + for (sw = 0; sw < p_lash->num_switches; sw++) { + if (p_lash->switches[sw]->node->type < 0) + continue; + classify_switch(p_lash, mesh, sw); + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +static void print_axis(lash_t *p_lash, char *p, int sw, int port) +{ + mesh_node_t *node = p_lash->switches[sw]->node; + char *name = p_lash->switches[sw]->p_sw->p_node->print_desc; + int c = node->axes[port]; + + p += sprintf(p, "%s[%d] = ", name, port); + if (c) + p += sprintf(p, "%s%c -> ", ((c - 1) & 1) ? "-" : "+", 'X' + (c - 1)/2); + else + p += sprintf(p, "N/A -> "); + p += sprintf(p, "%s\n", + p_lash->switches[node->links[port]->switch_id]->p_sw->p_node->print_desc); +} + +/* + * seed_axes + * + * assign axes to the links of the seed switch + * assumes switch is of type cartesian mesh + * axes are numbered 1 to n i.e. +x => 1 -x => 2 etc. + * this assumes that if all distances are 2 that + * an axis has only 2 nodes so +A and -A collapse to +A + */ +static void seed_axes(lash_t *p_lash, int sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + mesh_node_t *node = p_lash->switches[sw]->node; + int n = node->num_links; + int i, j, c; + + OSM_LOG_ENTER(p_log); + + if (!node->matrix || !node->dimension) + goto done; + + for (c = 1; c <= 2*node->dimension; c++) { + /* + * find the next unassigned axis + */ + for (i = 0; i < n; i++) { + if (!node->axes[i]) + break; + } + + node->axes[i] = c++; + + /* + * find the matching opposite direction + */ + for (j = 0; j < n; j++) { + if (node->axes[j] || j == i) + continue; + + if (node->matrix[i][j] != 2) + break; + } + + if (j != n) { + node->axes[j] = c; + } + } + + if (OSM_LOG_IS_ACTIVE_V2(p_log, OSM_LOG_DEBUG)) { + char buf[256], *p; + + for (i = 0; i < n; i++) { + p = buf; + print_axis(p_lash, p, sw, i); + OSM_LOG(p_log, OSM_LOG_DEBUG, "%s", buf); + } + } + +done: + OSM_LOG_EXIT(p_log); +} + +/* + * opposite + * + * compute the opposite of axis for switch + */ +static inline int opposite(switch_t *s, int axis) +{ + unsigned i, j; + int negaxis = 1 + (1 ^ (axis - 1)); + + if (!s->node->matrix) + return 0; + + for (i = 0; i < s->node->num_links; i++) { + if (s->node->axes[i] == axis) { + for (j = 0; j < s->node->num_links; j++) { + if (j == i) + continue; + if (s->node->matrix[i][j] != 2) + return negaxis; + } + + return axis; + } + } + + return 0; +} + +/* + * make_geometry + * + * induce a geometry on the switches + */ +static void make_geometry(lash_t *p_lash, int sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int num_switches = p_lash->num_switches; + int sw1, sw2; + switch_t *s, *s1, *s2, *seed; + unsigned int i, j, k, l, n, m; + unsigned int change; + + OSM_LOG_ENTER(p_log); + + s = p_lash->switches[sw]; + + if (!s->node->matrix) + goto done; + + /* + * assign axes to seed switch + */ + seed_axes(p_lash, sw); + seed = p_lash->switches[sw]; + + /* + * induce axes in other switches until + * there is no more change + */ + do { + change = 0; + + /* phase 1 opposites */ + for (sw1 = 0; sw1 < num_switches; sw1++) { + s1 = p_lash->switches[sw1]; + n = s1->node->num_links; + + /* + * ignore chain fragments + */ + if (n < seed->node->num_links && n <= 2) + continue; + + /* + * only process 'mesh' switches + */ + if (!s1->node->matrix) + continue; + + for (i = 0; i < n; i++) { + if (!s1->node->axes[i]) + continue; + + /* + * can't tell across if more than one + * likely looking link + */ + m = 0; + for (j = 0; j < n; j++) { + if (j == i) + continue; + + if (s1->node->matrix[i][j] != 2) + m++; + } + + if (m != 1) { + continue; + } + + for (j = 0; j < n; j++) { + if (j == i) + continue; + + /* Rule out opposite nodes when distance greater than 4 */ + if (s1->node->matrix[i][j] != 2 && + s1->node->matrix[i][j] <= 4) { + if (s1->node->axes[j]) { + if (s1->node->axes[j] != opposite(seed, s1->node->axes[i])) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "phase 1 mismatch\n"); + } + } else { + s1->node->axes[j] = opposite(seed, s1->node->axes[i]); + change++; + } + } + } + } + } + + /* phase 2 switch to switch */ + for (sw1 = 0; sw1 < num_switches; sw1++) { + s1 = p_lash->switches[sw1]; + n = s1->node->num_links; + + if (!s1->node->matrix) + continue; + + for (i = 0; i < n; i++) { + int l2 = s1->node->links[i]->link_id; + + if (!s1->node->axes[i]) + continue; + + if (l2 == -1) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "no reverse link\n"); + continue; + } + + sw2 = s1->node->links[i]->switch_id; + s2 = p_lash->switches[sw2]; + + if (!s2->node->matrix) + continue; + + if (!s2->node->axes[l2]) { + /* + * set axis to opposite of s1->node->axes[i] + */ + s2->node->axes[l2] = opposite(seed, s1->node->axes[i]); + change++; + } else { + if (s2->node->axes[l2] != opposite(seed, s1->node->axes[i])) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "phase 2 mismatch\n"); + } + } + } + } + + /* Phase 3 corners */ + for (sw1 = 0; sw1 < num_switches; sw1++) { + s = p_lash->switches[sw1]; + n = s->node->num_links; + + if (!s->node->matrix) + continue; + + for (i = 0; i < n; i++) { + if (!s->node->axes[i]) + continue; + + for (j = 0; j < n; j++) { + if (i == j || !s->node->axes[j] || s->node->matrix[i][j] != 2) + continue; + + s1 = p_lash->switches[s->node->links[i]->switch_id]; + s2 = p_lash->switches[s->node->links[j]->switch_id]; + + /* + * find switch (other than s1) that neighbors i and j + * have in common + */ + for (k = 0; k < s1->node->num_links; k++) { + if (s1->node->links[k]->switch_id == sw1) + continue; + + for (l = 0; l < s2->node->num_links; l++) { + if (s2->node->links[l]->switch_id == sw1) + continue; + + if (s1->node->links[k]->switch_id == s2->node->links[l]->switch_id) { + if (s1->node->axes[k]) { + if (s1->node->axes[k] != s->node->axes[j]) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "phase 3 mismatch\n"); + } + } else { + s1->node->axes[k] = s->node->axes[j]; + change++; + } + + if (s2->node->axes[l]) { + if (s2->node->axes[l] != s->node->axes[i]) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "phase 3 mismatch\n"); + } + } else { + s2->node->axes[l] = s->node->axes[i]; + change++; + } + goto next_j; + } + } + } +next_j: + ; + } + } + } + } while (change); + +done: + OSM_LOG_EXIT(p_log); +} + +/* + * return |a| < |b| + */ +static inline int ltmag(int a, int b) +{ + int a1 = (a >= 0)? a : -a; + int b1 = (b >= 0)? b : -b; + + return (a1 < b1) || (a1 == b1 && a > b); +} + +/* + * reorder_node_links + * + * reorder the links out of a switch in sign/dimension order + */ +static int reorder_node_links(lash_t *p_lash, mesh_t *mesh, int sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + switch_t *s = p_lash->switches[sw]; + mesh_node_t *node = s->node; + int n = node->num_links; + link_t **links; + int *axes; + int i, j, k, l; + int c; + int next = 0; + int dimension = mesh->dimension; + + if (!(links = calloc(n, sizeof(link_t *)))) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating links array - out of memory\n"); + return -1; + } + + if (!(axes = calloc(n, sizeof(int)))) { + free(links); + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating axes array - out of memory\n"); + return -1; + } + + /* + * find the links with axes + */ + for (i = 0; i < dimension; i++) { + j = mesh->dim_order[i]; + for (k = 1; k <= 2; k++) { + c = 2*j + k; + + if (node->coord[j] > 0) + c = opposite(s, c); + + for (l = 0; l < n; l++) { + if (!node->links[l]) + continue; + if (node->axes[l] == c) { + links[next] = node->links[l]; + axes[next] = node->axes[l]; + node->links[l] = NULL; + next++; + } + } + } + } + + /* + * get the rest + */ + for (i = 0; i < n; i++) { + if (!node->links[i]) + continue; + + links[next] = node->links[i]; + axes[next] = node->axes[i]; + node->links[i] = NULL; + next++; + } + + for (i = 0; i < n; i++) { + node->links[i] = links[i]; + node->axes[i] = axes[i]; + } + + free(links); + free(axes); + + return 0; +} + +/* + * make_coord + */ +static int make_coord(lash_t *p_lash, mesh_t *mesh, int seed) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + unsigned int i, j, k; + int sw; + switch_t *s, *s1; + unsigned int change; + unsigned int dimension = mesh->dimension; + int num_switches = p_lash->num_switches; + int assigned_axes = 0, unassigned_axes = 0; + + OSM_LOG_ENTER(p_log); + + for (sw = 0; sw < num_switches; sw++) { + s = p_lash->switches[sw]; + + s->node->coord = calloc(dimension, sizeof(int)); + if (!s->node->coord) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating coord - out of memory\n"); + OSM_LOG_EXIT(p_log); + return -1; + } + + for (i = 0; i < dimension; i++) + s->node->coord[i] = (sw == seed) ? 0 : LARGE; + + for (i = 0; i < s->node->num_links; i++) + if (s->node->axes[i] == 0) + unassigned_axes++; + else + assigned_axes++; + } + + OSM_LOG(p_log, OSM_LOG_DEBUG, "%d/%d unassigned/assigned axes\n", + unassigned_axes, assigned_axes); + + do { + change = 0; + + for (sw = 0; sw < num_switches; sw++) { + s = p_lash->switches[sw]; + + if (s->node->coord[0] == LARGE) + continue; + + for (j = 0; j < s->node->num_links; j++) { + if (!s->node->axes[j]) + continue; + + s1 = p_lash->switches[s->node->links[j]->switch_id]; + + for (k = 0; k < dimension; k++) { + int coord = s->node->coord[k]; + unsigned axis = s->node->axes[j] - 1; + + if (k == axis/2) + coord += (axis & 1)? -1 : +1; + + if (ltmag(coord, s1->node->coord[k])) { + s1->node->coord[k] = coord; + change++; + } + } + } + } + } while (change); + + OSM_LOG_EXIT(p_log); + return 0; +} + +/* + * measure geometry + */ +static int measure_geometry(lash_t *p_lash, mesh_t *mesh) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i, j; + int sw; + switch_t *s; + int dimension = mesh->dimension; + int num_switches = p_lash->num_switches; + int max[MAX_DIMENSION]; + int min[MAX_DIMENSION]; + int size[MAX_DIMENSION]; + int max_size; + int max_index; + + OSM_LOG_ENTER(p_log); + + mesh->size = calloc(dimension, sizeof(int)); + if (!mesh->size) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating size - out of memory\n"); + OSM_LOG_EXIT(p_log); + return -1; + } + + for (i = 0; i < dimension; i++) { + max[i] = -LARGE; + min[i] = LARGE; + } + + for (sw = 0; sw < num_switches; sw++) { + s = p_lash->switches[sw]; + + for (i = 0; i < dimension; i++) { + if (s->node->coord[i] == LARGE) + continue; + if (s->node->coord[i] > max[i]) + max[i] = s->node->coord[i]; + if (s->node->coord[i] < min[i]) + min[i] = s->node->coord[i]; + } + } + + for (i = 0; i < dimension; i++) + mesh->size[i] = size[i] = max[i] - min[i] + 1; + + /* + * find an order of dimensions that places largest + * sizes first since this seems to work best with LASH + */ + for (j = 0; j < dimension; j++) { + max_size = -1; + max_index = -1; + + for (i = 0; i < dimension; i++) { + if (size[i] > max_size) { + max_size = size[i]; + max_index = i; + } + } + + mesh->dim_order[j] = max_index; + size[max_index] = -1; + } + + OSM_LOG_EXIT(p_log); + return 0; +} + +/* + * reorder links + */ +static int reorder_links(lash_t *p_lash, mesh_t *mesh) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int sw; + int num_switches = p_lash->num_switches; + + OSM_LOG_ENTER(p_log); + + for (sw = 0; sw < num_switches; sw++) { + if (reorder_node_links(p_lash, mesh, sw)) { + OSM_LOG_EXIT(p_log); + return -1; + } + } + + OSM_LOG_EXIT(p_log); + return 0; +} + +/* + * compare two switches in a sort + */ +static int compare_switches(const void *p1, const void *p2) +{ + const comp_t *cp1 = p1, *cp2 = p2; + const sort_ctx_t *ctx = &cp1->ctx; + switch_t *s1 = ctx->p_lash->switches[cp1->index]; + switch_t *s2 = ctx->p_lash->switches[cp2->index]; + int i, j; + int ret; + + for (i = 0; i < ctx->mesh->dimension; i++) { + j = ctx->mesh->dim_order[i]; + ret = s1->node->coord[j] - s2->node->coord[j]; + if (ret) + return ret; + } + + return 0; +} + +/* + * sort_switches - reorder switch array + */ +static void sort_switches(lash_t *p_lash, mesh_t *mesh) +{ + unsigned int i, j; + unsigned int num_switches = p_lash->num_switches; + comp_t *comp; + int *reverse; + switch_t *s; + switch_t **switches; + + comp = malloc(num_switches * sizeof(comp_t)); + reverse = malloc(num_switches * sizeof(int)); + switches = malloc(num_switches * sizeof(switch_t *)); + if (!comp || !reverse || !switches) { + OSM_LOG(&p_lash->p_osm->log, OSM_LOG_ERROR, + "Failed memory allocation - switches not sorted!\n"); + goto Exit; + } + + for (i = 0; i < num_switches; i++) { + comp[i].index = i; + comp[i].ctx.mesh = mesh; + comp[i].ctx.p_lash = p_lash; + } + + qsort(comp, num_switches, sizeof(comp_t), compare_switches); + + for (i = 0; i < num_switches; i++) + reverse[comp[i].index] = i; + + for (i = 0; i < num_switches; i++) { + s = p_lash->switches[comp[i].index]; + switches[i] = s; + s->id = i; + for (j = 0; j < s->node->num_links; j++) + s->node->links[j]->switch_id = + reverse[s->node->links[j]->switch_id]; + } + + for (i = 0; i < num_switches; i++) + p_lash->switches[i] = switches[i]; + +Exit: + if (switches) + free(switches); + if (comp) + free(comp); + if (reverse) + free(reverse); +} + +/* + * osm_mesh_delete - free per mesh resources + */ +static void mesh_delete(mesh_t *mesh) +{ + if (mesh) { + if (mesh->class_type) + free(mesh->class_type); + + if (mesh->class_count) + free(mesh->class_count); + + if (mesh->size) + free(mesh->size); + + free(mesh); + } +} + +/* + * osm_mesh_create - allocate per mesh resources + */ +static mesh_t *mesh_create(lash_t *p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + mesh_t *mesh; + + if(!(mesh = calloc(1, sizeof(mesh_t)))) + goto err; + + if (!(mesh->class_type = calloc(p_lash->num_switches, sizeof(int)))) + goto err; + + if (!(mesh->class_count = calloc(p_lash->num_switches, sizeof(int)))) + goto err; + + return mesh; + +err: + mesh_delete(mesh); + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating mesh - out of memory\n"); + return NULL; +} + +/* + * osm_mesh_node_delete - cleanup per switch resources + */ +void osm_mesh_node_delete(lash_t *p_lash, switch_t *sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + unsigned i; + mesh_node_t *node = sw->node; + unsigned num_ports = sw->p_sw->num_ports; + + OSM_LOG_ENTER(p_log); + + if (node) { + for (i = 0; i < num_ports; i++) + if (node->links[i]) + free(node->links[i]); + + if (node->poly) + free(node->poly); + + if (node->matrix) { + for (i = 0; i < node->num_links; i++) { + if (node->matrix[i]) + free(node->matrix[i]); + } + free(node->matrix); + } + + if (node->axes) + free(node->axes); + + if (node->coord) + free(node->coord); + + free(node); + + sw->node = NULL; + } + + OSM_LOG_EXIT(p_log); +} + +/* + * osm_mesh_node_create - allocate per switch resources + */ +int osm_mesh_node_create(lash_t *p_lash, switch_t *sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + unsigned i; + mesh_node_t *node; + unsigned num_ports = sw->p_sw->num_ports; + + OSM_LOG_ENTER(p_log); + + if (!(node = sw->node = calloc(1, sizeof(mesh_node_t) + num_ports * sizeof(link_t *)))) + goto err; + + for (i = 0; i < num_ports; i++) + if (!(node->links[i] = calloc(1, sizeof(link_t) + num_ports * sizeof(int)))) + goto err; + + if (!(node->axes = calloc(num_ports, sizeof(int)))) + goto err; + + for (i = 0; i < num_ports; i++) { + node->links[i]->switch_id = NONE; + } + + OSM_LOG_EXIT(p_log); + return 0; + +err: + osm_mesh_node_delete(p_lash, sw); + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating mesh node - out of memory\n"); + OSM_LOG_EXIT(p_log); + return -1; +} + +static void dump_mesh(lash_t *p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int sw; + int num_switches = p_lash->num_switches; + int dimension; + int i, j, k, n; + switch_t *s, *s2; + char buf[256]; + + OSM_LOG_ENTER(p_log); + + for (sw = 0; sw < num_switches; sw++) { + s = p_lash->switches[sw]; + dimension = s->node->dimension; + n = sprintf(buf, "["); + for (i = 0; i < dimension; i++) { + n += snprintf(buf + n, sizeof(buf) - n, + "%2d", s->node->coord[i]); + if (n > sizeof(buf)) + n = sizeof(buf); + if (i != dimension - 1) { + n += snprintf(buf + n, sizeof(buf) - n, "%s", ","); + if (n > sizeof(buf)) + n = sizeof(buf); + } + } + n += snprintf(buf + n, sizeof(buf) - n, "]"); + if (n > sizeof(buf)) + n = sizeof(buf); + for (j = 0; j < s->node->num_links; j++) { + s2 = p_lash->switches[s->node->links[j]->switch_id]; + n += snprintf(buf + n, sizeof(buf) - n, " [%d]->[", j); + if (n > sizeof(buf)) + n = sizeof(buf); + for (k = 0; k < dimension; k++) { + n += snprintf(buf + n, sizeof(buf) - n, "%2d", + s2->node->coord[k]); + if (n > sizeof(buf)) + n = sizeof(buf); + if (k != dimension - 1) { + n += snprintf(buf + n, sizeof(buf) - n, + ","); + if (n > sizeof(buf)) + n = sizeof(buf); + } + } + n += snprintf(buf + n, sizeof(buf) - n, "]"); + if (n > sizeof(buf)) + n = sizeof(buf); + } + OSM_LOG(p_log, OSM_LOG_DEBUG, "%s\n", buf); + } + + OSM_LOG_EXIT(p_log); +} + +/* + * osm_do_mesh_analysis + */ +int osm_do_mesh_analysis(lash_t *p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + mesh_t *mesh; + int max_class_num = 0; + int max_class_type = -1; + int i; + switch_t *s; + char buf[256], *p; + + OSM_LOG_ENTER(p_log); + + mesh = mesh_create(p_lash); + if (!mesh) + goto err; + + if (get_local_geometry(p_lash, mesh)) + goto err; + + if (mesh->num_class == 0) { + OSM_LOG(p_log, OSM_LOG_INFO, + "found no likely mesh nodes - done\n"); + goto done; + } + + /* + * find dominant switch class + */ + OSM_LOG(p_log, OSM_LOG_INFO, "found %d node class%s\n", + mesh->num_class, (mesh->num_class == 1) ? "" : "es"); + for (i = 0; i < mesh->num_class; i++) { + OSM_LOG(p_log, OSM_LOG_INFO, + "class[%d] has %d members with type = %d\n", + i, mesh->class_count[i], + p_lash->switches[mesh->class_type[i]]->node->type); + if (mesh->class_count[i] > max_class_num) { + max_class_num = mesh->class_count[i]; + max_class_type = mesh->class_type[i]; + } + } + + s = p_lash->switches[max_class_type]; + + p = buf; + p += sprintf(p, "%snode shape is ", + (mesh->num_class == 1) ? "" : "most common "); + + if (s->node->type) { + const struct mesh_info *t = &mesh_info[s->node->type]; + + for (i = 0; i < t->dimension; i++) { + p += sprintf(p, "%s%d%s", i? " x " : "", t->size[i], + (t->size[i] == 6)? "+" : ""); + } + p += sprintf(p, " mesh\n"); + + mesh->dimension = t->dimension; + } else { + p += sprintf(p, "unknown geometry\n"); + } + + OSM_LOG(p_log, OSM_LOG_INFO, "%s", buf); + + OSM_LOG(p_log, OSM_LOG_INFO, "poly = %s\n", + poly_print(s->node->num_links, s->node->poly)); + + if (s->node->type) { + make_geometry(p_lash, max_class_type); + + if (make_coord(p_lash, mesh, max_class_type)) + goto err; + + if (measure_geometry(p_lash, mesh)) + goto err; + + if (reorder_links(p_lash, mesh)) + goto err; + + sort_switches(p_lash, mesh); + + p = buf; + p += sprintf(p, "found "); + for (i = 0; i < mesh->dimension; i++) + p += sprintf(p, "%s%d", i? " x " : "", mesh->size[i]); + p += sprintf(p, " mesh\n"); + + OSM_LOG(p_log, OSM_LOG_INFO, "%s", buf); + } + + if (OSM_LOG_IS_ACTIVE_V2(p_log, OSM_LOG_DEBUG)) + dump_mesh(p_lash); + +done: + mesh_delete(mesh); + OSM_LOG_EXIT(p_log); + return 0; + +err: + mesh_delete(mesh); + OSM_LOG_EXIT(p_log); + return -1; +} diff --git a/opensm/osm_mlnx_ext_port_info_rcv.c b/opensm/osm_mlnx_ext_port_info_rcv.c new file mode 100644 index 0000000..e4f8521 --- /dev/null +++ b/opensm/osm_mlnx_ext_port_info_rcv.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2011 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mlnx_epi_rcv_t. + * This object represents the MLNX ExtendedPortInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_MLNX_EXT_PORT_INFO_RCV_C +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void osm_mlnx_epi_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_mlnx_ext_port_info_t *p_pi; + ib_smp_t *p_smp; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_node_t *p_node; + osm_pi_context_t *p_context; + ib_net64_t port_guid, node_guid; + uint8_t port_num, portnum, start_port = 1; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_context = osm_madw_get_pi_context_ptr(p_madw); + p_pi = ib_smp_get_payload_ptr(p_smp); + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO); + + port_num = (uint8_t) cl_ntoh32(p_smp->attr_mod); + + port_guid = p_context->port_guid; + node_guid = p_context->node_guid; + + osm_dump_mlnx_ext_port_info_v2(sm->p_log, node_guid, port_guid, port_num, + p_pi, FILE_ID, OSM_LOG_DEBUG); + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F06: " + "No port object for port with GUID 0x%" PRIx64 + "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 + ", TID 0x%" PRIx64 "\n", + cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + goto Exit; + } + + p_node = p_port->p_node; + CL_ASSERT(p_node); + + if (!(cl_ntoh16(p_smp->status) & 0x7fff)) { + if (port_num != 255) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + CL_ASSERT(p_physp); + p_physp->ext_port_info = *p_pi; + } else { + /* Handle all ports on set/set resp */ + if (p_node->sw && + ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)) + start_port = 0; + + for (portnum = start_port; + portnum < osm_node_get_num_physp(p_node); + portnum++) { + p_physp = osm_node_get_physp_ptr(p_node, portnum); + CL_ASSERT(p_physp); + p_physp->ext_port_info = *p_pi; + } + } + } else + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(p_smp->status) & 0x7fff); + + CL_PLOCK_RELEASE(sm->p_lock); + +Exit: + /* + Release the lock before jumping here!! + */ + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_mtree.c b/opensm/osm_mtree.c new file mode 100644 index 0000000..7cc0422 --- /dev/null +++ b/opensm/osm_mtree.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mtree_node_t. + * This file implements the Multicast Tree object. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#define FILE_ID OSM_FILE_MTREE_C +#include +#include + +osm_mtree_node_t *osm_mtree_node_new(IN const osm_switch_t * p_sw) +{ + osm_mtree_node_t *p_mtn; + uint32_t i; + + p_mtn = malloc(sizeof(osm_mtree_node_t) + + sizeof(void *) * (p_sw->num_ports - 1)); + if (!p_mtn) + return NULL; + + memset(p_mtn, 0, sizeof(*p_mtn)); + p_mtn->p_sw = p_sw; + p_mtn->max_children = p_sw->num_ports; + for (i = 0; i < p_mtn->max_children; i++) + p_mtn->child_array[i] = NULL; + + return p_mtn; +} + +void osm_mtree_destroy(IN osm_mtree_node_t * p_mtn) +{ + uint8_t i; + + if (p_mtn == NULL) + return; + + for (i = 0; i < p_mtn->max_children; i++) + if ((p_mtn->child_array[i] != NULL) && + (p_mtn->child_array[i] != OSM_MTREE_LEAF)) + osm_mtree_destroy(p_mtn->child_array[i]); + + free(p_mtn); +} + +#if 0 +static void mtree_dump(IN osm_mtree_node_t * p_mtn) +{ + uint32_t i; + + if (p_mtn == NULL) + return; + + printf("GUID:0x%016" PRIx64 " max_children:%u\n", + cl_ntoh64(p_mtn->p_sw->p_node->node_info.node_guid), + p_mtn->max_children); + if (p_mtn->child_array != NULL) { + for (i = 0; i < p_mtn->max_children; i++) { + printf("i=%d\n", i); + if ((p_mtn->child_array[i] != NULL) + && (p_mtn->child_array[i] != OSM_MTREE_LEAF)) + mtree_dump(p_mtn->child_array[i]); + } + } +} +#endif diff --git a/opensm/osm_multicast.c b/opensm/osm_multicast.c new file mode 100644 index 0000000..38f4a68 --- /dev/null +++ b/opensm/osm_multicast.c @@ -0,0 +1,460 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of multicast functions. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_MULTICAST_C +#include +#include +#include +#include +#include + +static osm_mgrp_box_t *mgrp_box_new(uint16_t mlid) +{ + osm_mgrp_box_t *mbox = malloc(sizeof(*mbox)); + if (!mbox) + return NULL; + + memset(mbox, 0, sizeof(*mbox)); + mbox->mlid = mlid; + cl_qlist_init(&mbox->mgrp_list); + + return mbox; +} + +void mgrp_box_delete(osm_mgrp_box_t *mbox) +{ + osm_mtree_destroy(mbox->root); + free(mbox); +} + +void mgrp_delete(IN osm_mgrp_t * p_mgrp) +{ + osm_mcm_alias_guid_t *p_mcm_alias_guid, *p_next_mcm_alias_guid; + osm_mcm_port_t *p_mcm_port, *p_next_mcm_port; + + CL_ASSERT(p_mgrp); + + p_next_mcm_alias_guid = + (osm_mcm_alias_guid_t *) cl_qmap_head(&p_mgrp->mcm_alias_port_tbl); + while (p_next_mcm_alias_guid != + (osm_mcm_alias_guid_t *) cl_qmap_end(&p_mgrp->mcm_alias_port_tbl)) { + p_mcm_alias_guid = p_next_mcm_alias_guid; + p_next_mcm_alias_guid = + (osm_mcm_alias_guid_t *) cl_qmap_next(&p_mcm_alias_guid->map_item); + osm_mcm_alias_guid_delete(&p_mcm_alias_guid); + } + + p_next_mcm_port = + (osm_mcm_port_t *) cl_qmap_head(&p_mgrp->mcm_port_tbl); + while (p_next_mcm_port != + (osm_mcm_port_t *) cl_qmap_end(&p_mgrp->mcm_port_tbl)) { + p_mcm_port = p_next_mcm_port; + p_next_mcm_port = + (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item); + osm_mcm_port_delete(p_mcm_port); + } + + free(p_mgrp); +} + +void osm_mgrp_box_delete(osm_mgrp_box_t *mbox) +{ + osm_mgrp_t *mgrp; + + while (cl_qlist_count(&mbox->mgrp_list)) { + mgrp = cl_item_obj(cl_qlist_remove_head(&mbox->mgrp_list), + mgrp, list_item); + mgrp_delete(mgrp); + } + mgrp_box_delete(mbox); +} + +osm_mgrp_t *osm_mgrp_new(IN osm_subn_t * subn, IN ib_net16_t mlid, + IN ib_member_rec_t * mcmr) +{ + osm_mgrp_t *p_mgrp; + osm_mgrp_box_t *mbox; + + p_mgrp = (osm_mgrp_t *) malloc(sizeof(*p_mgrp)); + if (!p_mgrp) + return NULL; + + memset(p_mgrp, 0, sizeof(*p_mgrp)); + cl_qmap_init(&p_mgrp->mcm_port_tbl); + cl_qmap_init(&p_mgrp->mcm_alias_port_tbl); + p_mgrp->mlid = mlid; + p_mgrp->mcmember_rec = *mcmr; + + mbox = osm_get_mbox_by_mlid(subn, p_mgrp->mlid); + if (!mbox && !(mbox = mgrp_box_new(cl_ntoh16(p_mgrp->mlid)))) { + free(p_mgrp); + return NULL; + } + + cl_qlist_insert_tail(&mbox->mgrp_list, &p_mgrp->list_item); + subn->mboxes[mbox->mlid - IB_LID_MCAST_START_HO] = mbox; + + cl_fmap_insert(&subn->mgrp_mgid_tbl, &p_mgrp->mcmember_rec.mgid, + &p_mgrp->map_item); + + subn->p_osm->sa.dirty = TRUE; + return p_mgrp; +} + +void osm_mgrp_cleanup(osm_subn_t * subn, osm_mgrp_t * mgrp) +{ + osm_mgrp_box_t *mbox; + osm_mcm_alias_guid_t *mcm_alias_guid; + osm_mcm_port_t *mcm_port; + + if (mgrp->full_members) + return; + + while (cl_qmap_count(&mgrp->mcm_alias_port_tbl)) { + mcm_alias_guid = (osm_mcm_alias_guid_t *) cl_qmap_head(&mgrp->mcm_alias_port_tbl); + cl_qmap_remove_item(&mgrp->mcm_alias_port_tbl, &mcm_alias_guid->map_item); + osm_mcm_alias_guid_delete(&mcm_alias_guid); + } + + while (cl_qmap_count(&mgrp->mcm_port_tbl)) { + mcm_port = (osm_mcm_port_t *) cl_qmap_head(&mgrp->mcm_port_tbl); + cl_qmap_remove_item(&mgrp->mcm_port_tbl, &mcm_port->map_item); + cl_qlist_remove_item(&mcm_port->port->mcm_list, + &mcm_port->list_item); + osm_mcm_port_delete(mcm_port); + } + + if (mgrp->well_known) + return; + + cl_fmap_remove_item(&subn->mgrp_mgid_tbl, &mgrp->map_item); + + mbox = osm_get_mbox_by_mlid(subn, mgrp->mlid); + cl_qlist_remove_item(&mbox->mgrp_list, &mgrp->list_item); + if (cl_is_qlist_empty(&mbox->mgrp_list)) { + subn->mboxes[cl_ntoh16(mgrp->mlid) - IB_LID_MCAST_START_HO] = NULL; + mgrp_box_delete(mbox); + } + free(mgrp); + + subn->p_osm->sa.dirty = TRUE; +} + +static void mgrp_send_notice(osm_subn_t * subn, osm_log_t * log, + osm_mgrp_t * mgrp, unsigned num) +{ + ib_mad_notice_attr_t notice; + ib_api_status_t status; + + notice.generic_type = 0x80 | IB_NOTICE_TYPE_SUBN_MGMT; /* is generic subn mgt type */ + ib_notice_set_prod_type_ho(¬ice, 4); /* A Class Manager generator */ + notice.g_or_v.generic.trap_num = CL_HTON16(num); + /* The sm_base_lid is saved in network order already. */ + notice.issuer_lid = subn->sm_base_lid; + /* following o14-12.1.11 and table 120 p726 */ + /* we need to provide the MGID */ + memcpy(¬ice.data_details.ntc_64_67.gid, + &mgrp->mcmember_rec.mgid, sizeof(ib_gid_t)); + + /* According to page 653 - the issuer gid in this case of trap + is the SM gid, since the SM is the initiator of this trap. */ + notice.issuer_gid.unicast.prefix = subn->opt.subnet_prefix; + notice.issuer_gid.unicast.interface_id = subn->sm_port_guid; + + if ((status = osm_report_notice(log, subn, ¬ice))) + OSM_LOG(log, OSM_LOG_ERROR, "ERR 7601: " + "Error sending trap reports (%s)\n", + ib_get_err_str(status)); +} + +static boolean_t is_qmap_empty_for_port(IN const cl_qmap_t * const p_map, + IN const osm_port_t *port) +{ + size_t count = 0; + cl_map_item_t *item; + osm_mcm_alias_guid_t *mcm_alias_guid; + + for (item = cl_qmap_head(p_map); item != cl_qmap_end(p_map); + item = cl_qmap_next(item)) { + mcm_alias_guid = (osm_mcm_alias_guid_t *) item; + if (mcm_alias_guid->p_base_mcm_port->port == port) { + count++; + break; + } + } + + return (count == 0); +} + +static boolean_t is_qmap_empty_for_mcm_port(IN const cl_qmap_t * const p_map, + IN const osm_mcm_port_t *mcm_port) +{ + size_t count = 0; + cl_map_item_t *item; + osm_mcm_alias_guid_t *mcm_alias_guid; + + for (item = cl_qmap_head(p_map); item != cl_qmap_end(p_map); + item = cl_qmap_next(item)) { + mcm_alias_guid = (osm_mcm_alias_guid_t *) item; + if (mcm_alias_guid->p_base_mcm_port == mcm_port) { + count++; + break; + } + } + + return (count == 0); +} +static osm_mcm_alias_guid_t *insert_alias_guid(IN osm_mgrp_t * mgrp, + IN osm_mcm_alias_guid_t * p_mcm_alias_guid) +{ + osm_mcm_alias_guid_t *p_mcm_alias_guid_check; + + /* insert into mcm alias guid table */ + p_mcm_alias_guid_check = + (osm_mcm_alias_guid_t *) cl_qmap_insert(&mgrp->mcm_alias_port_tbl, + p_mcm_alias_guid->alias_guid, + &p_mcm_alias_guid->map_item); + if (p_mcm_alias_guid_check != (osm_mcm_alias_guid_t *) &p_mcm_alias_guid->map_item) { + /* alias GUID is a duplicate */ + osm_mcm_alias_guid_delete(&p_mcm_alias_guid); + return p_mcm_alias_guid_check; + } + return NULL; +} + +osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t * subn, osm_log_t * log, + IN osm_mgrp_t * mgrp, osm_port_t *port, + IN ib_member_rec_t *mcmr, IN boolean_t proxy) +{ + osm_mcm_port_t *mcm_port; + osm_mcm_alias_guid_t *p_mcm_alias_guid, *p_mcm_alias_guid_check; + cl_map_item_t *prev_item; + uint8_t prev_join_state = 0, join_state = mcmr->scope_state; + uint8_t prev_scope, full_join_state; + + if (OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_VERBOSE)) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(log, OSM_LOG_VERBOSE, "GUID 0x%016" PRIx64 + " Port 0x%016" PRIx64 " joining " + "MC group %s (mlid 0x%x)\n", + cl_ntoh64(mcmr->port_gid.unicast.interface_id), + cl_ntoh64(port->guid), + inet_ntop(AF_INET6, mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof(gid_str)), + cl_ntoh16(mgrp->mlid)); + } + + mcm_port = osm_mcm_port_new(port, mgrp); + if (!mcm_port) + return NULL; + + p_mcm_alias_guid = osm_mcm_alias_guid_new(mcm_port, mcmr, proxy); + if (!p_mcm_alias_guid) { + osm_mcm_port_delete(mcm_port); + return NULL; + } + + /* + prev_item = cl_qmap_insert(...) + Pointer to the item in the map with the specified key. If insertion + was successful, this is the pointer to the item. If an item with the + specified key already exists in the map, the pointer to that item is + returned. + */ + prev_item = cl_qmap_insert(&mgrp->mcm_port_tbl, port->guid, + &mcm_port->map_item); + + if (prev_item != &mcm_port->map_item) { /* mcm port already exists */ + osm_mcm_port_delete(mcm_port); + mcm_port = (osm_mcm_port_t *) prev_item; + + p_mcm_alias_guid->p_base_mcm_port = (osm_mcm_port_t *) prev_item; + p_mcm_alias_guid_check = insert_alias_guid(mgrp, p_mcm_alias_guid); + if (p_mcm_alias_guid_check) { /* alias GUID already exists */ + p_mcm_alias_guid = p_mcm_alias_guid_check; + ib_member_get_scope_state(p_mcm_alias_guid->scope_state, + &prev_scope, &prev_join_state); + p_mcm_alias_guid->scope_state = + ib_member_set_scope_state(prev_scope, + prev_join_state | join_state); + } + } else { + insert_alias_guid(mgrp, p_mcm_alias_guid); + cl_qlist_insert_tail(&port->mcm_list, &mcm_port->list_item); + osm_sm_reroute_mlid(&subn->p_osm->sm, mgrp->mlid); + } + + /* o15.0.1.11: copy the join state */ + mcmr->scope_state = p_mcm_alias_guid->scope_state; + + full_join_state = IB_JOIN_STATE_FULL | IB_JOIN_STATE_SEND_ONLY_FULL; + if ((join_state & full_join_state) && !(prev_join_state & full_join_state) && + ++mgrp->full_members == 1) + mgrp_send_notice(subn, log, mgrp, SM_MGID_CREATED_TRAP); /* 66 */ + + subn->p_osm->sa.dirty = TRUE; + return mcm_port; +} + +boolean_t osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp, + osm_mcm_alias_guid_t * mcm_alias_guid, + ib_member_rec_t *mcmr) +{ + uint8_t join_state = mcmr->scope_state & 0xf; + uint8_t port_join_state, new_join_state, full_join_state; + boolean_t mgrp_deleted = FALSE; + + /* + * according to the same o15-0.1.14 we get the stored + * JoinState and the request JoinState and they must be + * opposite to leave - otherwise just update it + */ + port_join_state = mcm_alias_guid->scope_state & 0x0F; + new_join_state = port_join_state & ~join_state; + + if (OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_VERBOSE)) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(log, OSM_LOG_VERBOSE, + "GUID 0x%" PRIx64 " Port 0x%" PRIx64 + " leaving MC group %s (mlid 0x%x)\n", + cl_ntoh64(mcm_alias_guid->alias_guid), + cl_ntoh64(mcm_alias_guid->p_base_mcm_port->port->guid), + inet_ntop(AF_INET6, mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof(gid_str)), + cl_ntoh16(mgrp->mlid)); + } + + if (new_join_state & IB_JOIN_STATE_FULL || + (new_join_state && + (mgrp->full_members > (port_join_state & IB_JOIN_STATE_FULL) ? 1 : 0))) { + mcm_alias_guid->scope_state = + new_join_state | (mcm_alias_guid->scope_state & 0xf0); + OSM_LOG(log, OSM_LOG_DEBUG, + "updating GUID 0x%" PRIx64 " port 0x%" PRIx64 + " JoinState 0x%x -> 0x%x\n", + cl_ntoh64(mcm_alias_guid->alias_guid), + cl_ntoh64(mcm_alias_guid->p_base_mcm_port->port->guid), + port_join_state, new_join_state); + mcmr->scope_state = mcm_alias_guid->scope_state; + } else { + mcmr->scope_state = mcm_alias_guid->scope_state & 0xf0; + OSM_LOG(log, OSM_LOG_DEBUG, "removing alias GUID 0x%" PRIx64 "\n", + cl_ntoh64(mcm_alias_guid->alias_guid)); + cl_qmap_remove_item(&mgrp->mcm_alias_port_tbl, + &mcm_alias_guid->map_item); + if (is_qmap_empty_for_port(&mgrp->mcm_alias_port_tbl, + mcm_alias_guid->p_base_mcm_port->port)) { /* last alias in mcast group for this port */ + OSM_LOG(log, OSM_LOG_DEBUG, "removing port 0x%" PRIx64 "\n", + cl_ntoh64(mcm_alias_guid->p_base_mcm_port->port->guid)); + cl_qmap_remove_item(&mgrp->mcm_port_tbl, + &mcm_alias_guid->p_base_mcm_port->map_item); + cl_qlist_remove_item(&mcm_alias_guid->p_base_mcm_port->port->mcm_list, + &mcm_alias_guid->p_base_mcm_port->list_item); + if (is_qmap_empty_for_mcm_port(&mgrp->mcm_alias_port_tbl, + mcm_alias_guid->p_base_mcm_port)) /* last alias in mcast group for this mcm port */ + osm_mcm_port_delete(mcm_alias_guid->p_base_mcm_port); + osm_sm_reroute_mlid(&subn->p_osm->sm, mgrp->mlid); + } + osm_mcm_alias_guid_delete(&mcm_alias_guid); + } + + full_join_state = IB_JOIN_STATE_FULL | IB_JOIN_STATE_SEND_ONLY_FULL; + + /* no more full members so the group will be deleted after re-route + but only if it is not a well known group */ + if ((port_join_state & full_join_state) && !(new_join_state & full_join_state) && + --mgrp->full_members == 0) { + mgrp_send_notice(subn, log, mgrp, SM_MGID_DESTROYED_TRAP); /* 67 */ + osm_mgrp_cleanup(subn, mgrp); + mgrp_deleted = TRUE; + } + + subn->p_osm->sa.dirty = TRUE; + + return (mgrp_deleted); +} + +void osm_mgrp_delete_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp, + osm_port_t * port) +{ + osm_mcm_alias_guid_t *mcm_alias_guid, *next_mcm_alias_guid; + ib_member_rec_t mcmrec; + boolean_t mgrp_deleted = FALSE; + + next_mcm_alias_guid = (osm_mcm_alias_guid_t *) cl_qmap_head(&mgrp->mcm_alias_port_tbl); + while (!mgrp_deleted && + next_mcm_alias_guid != (osm_mcm_alias_guid_t *) cl_qmap_end(&mgrp->mcm_alias_port_tbl)) { + mcm_alias_guid = next_mcm_alias_guid; + next_mcm_alias_guid = (osm_mcm_alias_guid_t *) cl_qmap_next(&next_mcm_alias_guid->map_item); + if (mcm_alias_guid->p_base_mcm_port->port == port) { + mcmrec.scope_state = 0xf; + mgrp_deleted = osm_mgrp_remove_port(subn, log, mgrp, mcm_alias_guid, + &mcmrec); + } + } +} + +osm_mcm_port_t *osm_mgrp_get_mcm_port(IN const osm_mgrp_t * p_mgrp, + IN ib_net64_t port_guid) +{ + cl_map_item_t *item = cl_qmap_get(&p_mgrp->mcm_port_tbl, port_guid); + if (item != cl_qmap_end(&p_mgrp->mcm_port_tbl)) + return (osm_mcm_port_t *) item; + return NULL; +} + +osm_mcm_alias_guid_t *osm_mgrp_get_mcm_alias_guid(IN const osm_mgrp_t * p_mgrp, + IN ib_net64_t port_guid) +{ + cl_map_item_t *item = cl_qmap_get(&p_mgrp->mcm_alias_port_tbl, + port_guid); + if (item != cl_qmap_end(&p_mgrp->mcm_alias_port_tbl)) + return (osm_mcm_alias_guid_t *) item; + return NULL; +} diff --git a/opensm/osm_node.c b/opensm/osm_node.c new file mode 100644 index 0000000..fff5936 --- /dev/null +++ b/opensm/osm_node.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_node_t. + * This object represents an Infiniband Node. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_NODE_C +#include +#include + +void osm_node_init_physp(IN osm_node_t * p_node, uint8_t port_num, + IN const osm_madw_t * p_madw) +{ + ib_net64_t port_guid; + ib_smp_t *p_smp; + ib_node_info_t *p_ni; + + p_smp = osm_madw_get_smp_ptr(p_madw); + + p_ni = ib_smp_get_payload_ptr(p_smp); + port_guid = p_ni->port_guid; + + CL_ASSERT(port_num < p_node->physp_tbl_size); + + osm_physp_init(&p_node->physp_table[port_num], + port_guid, port_num, p_node, + osm_madw_get_bind_handle(p_madw), + p_smp->hop_count, p_smp->initial_path); +} + +osm_node_t *osm_node_new(IN const osm_madw_t * p_madw) +{ + osm_node_t *p_node; + ib_smp_t *p_smp; + ib_node_info_t *p_ni; + uint8_t i; + uint32_t size; + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_ni = ib_smp_get_payload_ptr(p_smp); + + /* + The node object already contains one physical port object. + Therefore, subtract 1 from the number of physical ports + used by the switch. This is not done for CA's since they + need to occupy 1 more physp than they physically have since + we still reserve room for a "port 0". + */ + size = p_ni->num_ports; + + p_node = malloc(sizeof(*p_node) + sizeof(osm_physp_t) * size); + if (!p_node) + return NULL; + + memset(p_node, 0, sizeof(*p_node) + sizeof(osm_physp_t) * size); + p_node->node_info = *p_ni; + p_node->physp_tbl_size = size + 1; + + p_node->physp_discovered = malloc(sizeof(uint8_t) * p_node->physp_tbl_size); + if (!p_node->physp_discovered) { + free(p_node); + return NULL; + } + memset(p_node->physp_discovered, 0, sizeof(uint8_t) * p_node->physp_tbl_size); + /* + Construct Physical Port objects owned by this Node. + Then, initialize the Physical Port through with we + discovered this port. + For switches, all ports have the same GUID. + For CAs and routers, each port has a different GUID, so we only + know the GUID for the port that responded to our + Get(NodeInfo). + */ + for (i = 0; i < p_node->physp_tbl_size; i++) + osm_physp_construct(&p_node->physp_table[i]); + + if (p_ni->node_type == IB_NODE_TYPE_SWITCH) + for (i = 0; i <= p_ni->num_ports; i++) + osm_node_init_physp(p_node, i, p_madw); + else + osm_node_init_physp(p_node, + ib_node_info_get_local_port_num(p_ni), + p_madw); + p_node->print_desc = strdup(OSM_NODE_DESC_UNKNOWN); + + return p_node; +} + +static void node_destroy(IN osm_node_t * p_node) +{ + uint16_t i; + + /* + Cleanup all physports + */ + for (i = 0; i < p_node->physp_tbl_size; i++) + osm_physp_destroy(&p_node->physp_table[i]); + + /* cleanup printable node_desc field */ + if (p_node->print_desc) + free(p_node->print_desc); + + /* cleanup physp_discovered array */ + free(p_node->physp_discovered); +} + +void osm_node_delete(IN OUT osm_node_t ** p_node) +{ + CL_ASSERT(p_node && *p_node); + node_destroy(*p_node); + free(*p_node); + *p_node = NULL; +} + +void osm_node_link(IN osm_node_t * p_node, IN uint8_t port_num, + IN osm_node_t * p_remote_node, IN uint8_t remote_port_num) +{ + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + p_remote_physp = osm_node_get_physp_ptr(p_remote_node, remote_port_num); + + if (p_physp->p_remote_physp) + p_physp->p_remote_physp->p_remote_physp = NULL; + if (p_remote_physp->p_remote_physp) + p_remote_physp->p_remote_physp->p_remote_physp = NULL; + + osm_physp_link(p_physp, p_remote_physp); +} + +void osm_node_unlink(IN osm_node_t * p_node, IN uint8_t port_num, + IN osm_node_t * p_remote_node, IN uint8_t remote_port_num) +{ + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + + CL_ASSERT(port_num < p_node->physp_tbl_size); + CL_ASSERT(remote_port_num < p_remote_node->physp_tbl_size); + + if (osm_node_link_exists(p_node, port_num, + p_remote_node, remote_port_num)) { + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + p_remote_physp = + osm_node_get_physp_ptr(p_remote_node, remote_port_num); + + osm_physp_unlink(p_physp, p_remote_physp); + } +} + +boolean_t osm_node_link_exists(IN osm_node_t * p_node, IN uint8_t port_num, + IN osm_node_t * p_remote_node, + IN uint8_t remote_port_num) +{ + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + + CL_ASSERT(port_num < p_node->physp_tbl_size); + CL_ASSERT(remote_port_num < p_remote_node->physp_tbl_size); + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + p_remote_physp = osm_node_get_physp_ptr(p_remote_node, remote_port_num); + + return osm_physp_link_exists(p_physp, p_remote_physp); +} + +boolean_t osm_node_link_has_valid_ports(IN osm_node_t * p_node, + IN uint8_t port_num, + IN osm_node_t * p_remote_node, + IN uint8_t remote_port_num) +{ + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + + CL_ASSERT(port_num < p_node->physp_tbl_size); + CL_ASSERT(remote_port_num < p_remote_node->physp_tbl_size); + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + p_remote_physp = osm_node_get_physp_ptr(p_remote_node, remote_port_num); + + return (p_physp && p_remote_physp); +} + +boolean_t osm_node_has_any_link(IN osm_node_t * p_node, IN uint8_t port_num) +{ + osm_physp_t *p_physp; + CL_ASSERT(port_num < p_node->physp_tbl_size); + p_physp = osm_node_get_physp_ptr(p_node, port_num); + return osm_physp_has_any_link(p_physp); +} + +osm_node_t *osm_node_get_remote_node(IN osm_node_t * p_node, + IN uint8_t port_num, + OUT uint8_t * p_remote_port_num) +{ + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + + if (!p_physp || !osm_physp_has_any_link(p_physp)) + return NULL; + + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_port_num) + *p_remote_port_num = osm_physp_get_port_num(p_remote_physp); + + return osm_physp_get_node_ptr(p_remote_physp); +} + +/********************************************************************** + The lock must be held before calling this function. +**********************************************************************/ +ib_net16_t osm_node_get_remote_base_lid(IN osm_node_t * p_node, + IN uint32_t port_num) +{ + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + CL_ASSERT(port_num < p_node->physp_tbl_size); + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (p_physp) { + p_remote_physp = osm_physp_get_remote(p_physp); + return osm_physp_get_base_lid(p_remote_physp); + } + + return 0; +} diff --git a/opensm/osm_node_desc_rcv.c b/opensm/osm_node_desc_rcv.c new file mode 100644 index 0000000..858ac45 --- /dev/null +++ b/opensm/osm_node_desc_rcv.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_nd_rcv_t. + * This object represents the NodeDescription Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_NODE_DESC_RCV_C +#include +#include +#include +#include +#include + +static void nd_rcv_process_nd(IN osm_sm_t * sm, IN osm_node_t * p_node, + IN const ib_node_desc_t * p_nd) +{ + char *tmp_desc; + char print_desc[IB_NODE_DESCRIPTION_SIZE + 1]; + + OSM_LOG_ENTER(sm->p_log); + + memcpy(&p_node->node_desc.description, p_nd, sizeof(*p_nd)); + + /* also set up a printable version */ + memcpy(print_desc, p_nd, sizeof(*p_nd)); + print_desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; + tmp_desc = remap_node_name(sm->p_subn->p_osm->node_name_map, + cl_ntoh64(osm_node_get_node_guid(p_node)), + print_desc); + + /* make a copy for this node to "own" */ + if (p_node->print_desc) + free(p_node->print_desc); + p_node->print_desc = tmp_desc; + +#ifdef ENABLE_OSM_PERF_MGR + /* update the perfmgr entry if available */ + osm_perfmgr_update_nodename(&sm->p_subn->p_osm->perfmgr, + cl_ntoh64(osm_node_get_node_guid(p_node)), + p_node->print_desc); +#endif /* ENABLE_OSM_PERF_MGR */ + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Node 0x%" PRIx64 ", Description = %s\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), p_node->print_desc); + + OSM_LOG_EXIT(sm->p_log); +} + +void osm_nd_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_node_desc_t *p_nd; + ib_smp_t *p_smp; + osm_node_t *p_node; + ib_net64_t node_guid; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit; + } + + p_nd = ib_smp_get_payload_ptr(p_smp); + + /* Acquire the node object and add the node description. */ + node_guid = osm_madw_get_nd_context_ptr(p_madw)->node_guid; + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_node = osm_get_node_by_guid(sm->p_subn, node_guid); + if (!p_node) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0B01: " + "NodeDescription received for nonexistent node " + "0x%" PRIx64 "\n", cl_ntoh64(node_guid)); + else + nd_rcv_process_nd(sm, p_node, p_nd); + + CL_PLOCK_RELEASE(sm->p_lock); +Exit: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_node_info_rcv.c b/opensm/osm_node_info_rcv.c new file mode 100644 index 0000000..342f602 --- /dev/null +++ b/opensm/osm_node_info_rcv.c @@ -0,0 +1,972 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_ni_rcv_t. + * This object represents the NodeInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_NODE_INFO_RCV_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void report_duplicated_guid(IN osm_sm_t * sm, osm_physp_t * p_physp, + osm_node_t * p_neighbor_node, + const uint8_t port_num) +{ + osm_physp_t *p_old, *p_new; + osm_dr_path_t path; + + p_old = p_physp->p_remote_physp; + p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num); + + OSM_LOG(sm->p_log, OSM_LOG_SYS | OSM_LOG_ERROR, "ERR 0D01: " + "Found duplicated node GUID.\n" + "Node 0x%" PRIx64 " port %u is reachable from remote node " + "0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n" + "Paths are:\n", + cl_ntoh64(p_physp->p_node->node_info.node_guid), + p_physp->port_num, + p_old ? cl_ntoh64(p_old->p_node->node_info.node_guid) : 0, + p_old ? p_old->port_num : 0, + p_new ? cl_ntoh64(p_new->p_node->node_info.node_guid) : 0, + p_new ? p_new->port_num : 0); + + osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp), + FILE_ID, OSM_LOG_ERROR); + + path = *osm_physp_get_dr_path_ptr(p_new); + if (osm_dr_path_extend(&path, port_num)) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D05: " + "DR path with hop count %d couldn't be extended\n", + path.hop_count); + osm_dump_dr_path_v2(sm->p_log, &path, FILE_ID, OSM_LOG_ERROR); +} + +static void requery_dup_node_info(IN osm_sm_t * sm, osm_physp_t * p_physp, + unsigned count) +{ + osm_madw_context_t context; + osm_dr_path_t path; + cl_status_t status; + + if (!p_physp->p_remote_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0D: " + "DR path couldn't be extended due to NULL remote physp\n"); + return; + } + + path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp); + if (osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D08: " + "DR path with hop count %d couldn't be extended\n", + path.hop_count); + return; + } + + context.ni_context.node_guid = + p_physp->p_remote_physp->p_node->node_info.port_guid; + context.ni_context.port_num = p_physp->p_remote_physp->port_num; + context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid; + context.ni_context.dup_port_num = p_physp->port_num; + context.ni_context.dup_count = count; + + status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO, 0, + TRUE, 0, 0, CL_DISP_MSGID_NONE, &context); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: " + "Failure initiating NodeInfo request (%s)\n", + ib_get_err_str(status)); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node, + const uint8_t port_num, + const osm_ni_context_t * p_ni_context) +{ + osm_node_t *p_neighbor_node; + osm_physp_t *p_physp, *p_remote_physp; + + OSM_LOG_ENTER(sm->p_log); + + /* + A special case exists in which the node we're trying to + link is our own node. In this case, the guid value in + the ni_context will be zero. + */ + if (p_ni_context->node_guid == 0) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Nothing to link for our own node 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + goto _exit; + } + + p_neighbor_node = osm_get_node_by_guid(sm->p_subn, + p_ni_context->node_guid); + if (PF(!p_neighbor_node)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: " + "Unexpected removal of neighbor node 0x%" PRIx64 "\n", + cl_ntoh64(p_ni_context->node_guid)); + goto _exit; + } + + /* When setting the link, ports on both + sides of the link should be initialized */ + CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num, + p_neighbor_node, + p_ni_context->port_num)); + + if (osm_node_link_exists(p_node, port_num, + p_neighbor_node, p_ni_context->port_num)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n"); + goto _exit; + } + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: " + "Failed to find physp for port %d of Node GUID 0x%" + PRIx64 "\n", port_num, + cl_ntoh64(osm_node_get_node_guid(p_node))); + goto _exit; + } + + /* + * If the link went UP, after we already discovered it, we shouldn't + * set the link between the ports and resweep. + */ + if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN && + p_node->physp_discovered[port_num]) { + /* Link down on another side. Don't create a link*/ + p_node->physp_discovered[port_num] = 0; + sm->p_subn->force_heavy_sweep = TRUE; + goto _exit; + } + + if (osm_node_has_any_link(p_node, port_num) && + sm->p_subn->force_heavy_sweep == FALSE && + (!p_ni_context->dup_count || + (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) && + p_ni_context->dup_port_num == port_num))) { + /* + Uh oh... + This could be reconnected ports, but also duplicated GUID + (2 nodes have the same guid) or a 12x link with lane reversal + that is not configured correctly. + We will try to recover by querying NodeInfo again. + In order to catch even fast port moving to new location(s) + and back we will count up to 5. + Some crazy reconnections (newly created switch loop right + before targeted CA) will not be catched this way. So in worst + case - report GUID duplication and request new discovery. + When switch node is targeted NodeInfo querying will be done + in opposite order, this is much stronger check, unfortunately + it is impossible with CAs. + */ + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: " + "Failed to find physp for port %d of Node GUID 0x%" + PRIx64 "\n", port_num, + cl_ntoh64(osm_node_get_node_guid(p_node))); + goto _exit; + } + + if (p_ni_context->dup_count > 5) { + report_duplicated_guid(sm, p_physp, p_neighbor_node, + p_ni_context->port_num); + sm->p_subn->force_heavy_sweep = TRUE; + } else if (p_node->sw) + requery_dup_node_info(sm, p_physp->p_remote_physp, + p_ni_context->dup_count + 1); + else + requery_dup_node_info(sm, p_physp, + p_ni_context->dup_count + 1); + } + + /* + When there are only two nodes with exact same guids (connected back + to back) - the previous check for duplicated guid will not catch + them. But the link will be from the port to itself... + Enhanced Port 0 is an exception to this + */ + if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid && + port_num == p_ni_context->port_num && + port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Duplicate GUID found by link from a port to itself:" + "node 0x%" PRIx64 ", port number %u\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), port_num); + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: " + "Failed to find physp for port %d of Node GUID 0x%" + PRIx64 "\n", port_num, + cl_ntoh64(osm_node_get_node_guid(p_node))); + goto _exit; + } + + osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp), + FILE_ID, OSM_LOG_VERBOSE); + + if (sm->p_subn->opt.exit_on_fatal == TRUE) { + osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, + "Errors on subnet. Duplicate GUID found " + "by link from a port to itself. " + "See verbose opensm.log for more details\n"); + exit(1); + } + } + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64 + ", port number %u and\n\t\t\t\tnode 0x%" PRIx64 + ", port number %u\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), port_num, + cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num); + + if (sm->ucast_mgr.cache_valid) + osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num, + p_neighbor_node, + p_ni_context->port_num); + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node, + p_ni_context->port_num); + if (!p_physp || !p_remote_physp) + goto _exit; + + osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num); + + osm_db_neighbor_set(sm->p_subn->p_neighbor, + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + port_num, + cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), + p_ni_context->port_num); + osm_db_neighbor_set(sm->p_subn->p_neighbor, + cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), + p_ni_context->port_num, + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + port_num); + +_exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node, + IN const osm_madw_t * madw) +{ + osm_madw_context_t context; + osm_physp_t *physp; + ib_node_info_t *ni; + unsigned port; + ib_api_status_t status; + int mlnx_epi_supported = 0; + + ni = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(madw)); + + port = ib_node_info_get_local_port_num(ni); + + if (sm->p_subn->opt.fdr10) + mlnx_epi_supported = is_mlnx_ext_port_info_supported( + ib_node_info_get_vendor_id(ni), + ni->device_id); + + physp = osm_node_get_physp_ptr(node, port); + if (!physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1E: " + "Failed to find physp for port %d of Node GUID 0x%" + PRIx64 "\n", port, + cl_ntoh64(osm_node_get_node_guid(node))); + return; + } + + context.pi_context.node_guid = osm_node_get_node_guid(node); + context.pi_context.port_guid = osm_physp_get_port_guid(physp); + context.pi_context.set_method = FALSE; + context.pi_context.light_sweep = FALSE; + context.pi_context.active_transition = FALSE; + context.pi_context.client_rereg = FALSE; + + status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp), + IB_MAD_ATTR_PORT_INFO, cl_hton32(port), + TRUE, 0, 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: " + "Failure initiating PortInfo request (%s)\n", + ib_get_err_str(status)); + if (mlnx_epi_supported) { + status = osm_req_get(sm, + osm_physp_get_dr_path_ptr(physp), + IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO, + cl_hton32(port), + TRUE, 0, 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: " + "Failure initiating MLNX ExtPortInfo request (%s)\n", + ib_get_err_str(status)); + } +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t * p_physp) +{ + ib_api_status_t status = IB_SUCCESS; + osm_madw_context_t context; + + OSM_LOG_ENTER(sm->p_log); + + context.nd_context.node_guid = + osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp)); + + status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), + IB_MAD_ATTR_NODE_DESC, 0, TRUE, 0, + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: " + "Failure initiating NodeDescription request (%s)\n", + ib_get_err_str(status)); + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void ni_rcv_get_node_desc(IN osm_sm_t * sm, IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + ib_node_info_t *p_ni; + ib_smp_t *p_smp; + uint8_t port_num; + osm_physp_t *p_physp = NULL; + + OSM_LOG_ENTER(sm->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_ni = ib_smp_get_payload_ptr(p_smp); + port_num = ib_node_info_get_local_port_num(p_ni); + + /* + Request PortInfo & NodeDescription attributes for the port + that responded to the NodeInfo attribute. + Because this is a channel adapter or router, we are + not allowed to request PortInfo for the other ports. + Set the context union properly, so the recipient + knows which node & port are relevant. + */ + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1F: " + "Failed to find physp for port %d of Node GUID 0x%" + PRIx64 "\n", port_num, + cl_ntoh64(osm_node_get_node_guid(p_node))); + return; + } + + osm_req_get_node_desc(sm, p_physp); + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + OSM_LOG_ENTER(sm->p_log); + + ni_rcv_get_port_info(sm, p_node, p_madw); + + /* + A node guid of 0 is the corner case that indicates + we discovered our own node. Initialize the subnet + object with the SM's own port guid. + */ + if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0) + sm->p_subn->sm_port_guid = p_node->node_info.port_guid; + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + ib_node_info_t *p_ni; + ib_smp_t *p_smp; + osm_port_t *p_port; + osm_port_t *p_port_check; + uint8_t port_num; + osm_dr_path_t *p_dr_path; + osm_alias_guid_t *p_alias_guid, *p_alias_guid_check; + + OSM_LOG_ENTER(sm->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_ni = ib_smp_get_payload_ptr(p_smp); + port_num = ib_node_info_get_local_port_num(p_ni); + + /* + Determine if we have encountered this node through a + previously undiscovered port. If so, build the new + port object. + */ + p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Creating new port object with GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_ni->port_guid)); + + osm_node_init_physp(p_node, port_num, p_madw); + + p_port = osm_port_new(p_ni, p_node); + if (PF(p_port == NULL)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: " + "Unable to create new port object\n"); + goto Exit; + } + + /* + Add the new port object to the database. + */ + p_port_check = + (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl, + p_ni->port_guid, + &p_port->map_item); + if (PF(p_port_check != p_port)) { + /* + We should never be here! + Somehow, this port GUID already exists in the table. + */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: " + "Port 0x%" PRIx64 " already in the database!\n", + cl_ntoh64(p_ni->port_guid)); + + osm_port_delete(&p_port); + goto Exit; + } + + p_alias_guid = osm_alias_guid_new(p_ni->port_guid, + p_port); + if (PF(!p_alias_guid)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D11: " + "alias guid memory allocation failed" + " for port GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_ni->port_guid)); + goto alias_done; + } + + /* insert into alias guid table */ + p_alias_guid_check = + (osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl, + p_alias_guid->alias_guid, + &p_alias_guid->map_item); + if (p_alias_guid_check != p_alias_guid) { + /* alias GUID is a duplicate */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: " + "Duplicate alias port GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_ni->port_guid)); + osm_alias_guid_delete(&p_alias_guid); + osm_port_delete(&p_port); + goto Exit; + } + +alias_done: + /* If we are a master, then this means the port is new on the subnet. + Mark it as new - need to send trap 64 for these ports. + The condition that we are master is true, since if we are in discovering + state (meaning we woke up from standby or we are just initializing), + then these ports may be new to us, but are not new on the subnet. + If we are master, then the subnet as we know it is the updated one, + and any new ports we encounter should cause trap 64. C14-72.1.1 */ + if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) + p_port->is_new = 1; + + } else { + osm_physp_t *p_physp = osm_node_get_physp_ptr(p_node, port_num); + + if (PF(p_physp == NULL)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1C: " + "No physical port found for node GUID 0x%" + PRIx64 " port %u. Might be duplicate port GUID\n", + cl_ntoh64(p_node->node_info.node_guid), + port_num); + goto Exit; + } + + /* + Update the DR Path to the port, + in case the old one is no longer available. + */ + p_dr_path = osm_physp_get_dr_path_ptr(p_physp); + + osm_dr_path_init(p_dr_path, p_smp->hop_count, + p_smp->initial_path); + } + + ni_rcv_get_port_info(sm, p_node, p_madw); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void ni_rcv_process_switch(IN osm_sm_t * sm, IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + ib_api_status_t status = IB_SUCCESS; + osm_physp_t *p_physp; + osm_madw_context_t context; + osm_dr_path_t *path; + ib_smp_t *p_smp; + + OSM_LOG_ENTER(sm->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + p_physp = osm_node_get_physp_ptr(p_node, 0); + /* update DR path of already initialized switch port 0 */ + path = osm_physp_get_dr_path_ptr(p_physp); + osm_dr_path_init(path, p_smp->hop_count, p_smp->initial_path); + + context.si_context.node_guid = osm_node_get_node_guid(p_node); + context.si_context.set_method = FALSE; + context.si_context.light_sweep = FALSE; + context.si_context.lft_top_change = FALSE; + + /* Request a SwitchInfo attribute */ + status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO, 0, TRUE, 0, + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + /* continue despite error */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: " + "Failure initiating SwitchInfo request (%s)\n", + ib_get_err_str(status)); + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void ni_rcv_process_existing_switch(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + OSM_LOG_ENTER(sm->p_log); + + /* + If this switch has already been probed during this sweep, + then don't bother reprobing it. + */ + if (p_node->discovery_count == 1) + ni_rcv_process_switch(sm, p_node, p_madw); + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void ni_rcv_process_new_switch(IN osm_sm_t * sm, IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + OSM_LOG_ENTER(sm->p_log); + + ni_rcv_process_switch(sm, p_node, p_madw); + + /* + A node guid of 0 is the corner case that indicates + we discovered our own node. Initialize the subnet + object with the SM's own port guid. + */ + if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0) + sm->p_subn->sm_port_guid = p_node->node_info.port_guid; + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must NOT be held before calling this function. +**********************************************************************/ +static void ni_rcv_process_new(IN osm_sm_t * sm, IN const osm_madw_t * p_madw) +{ + osm_node_t *p_node; + osm_node_t *p_node_check; + osm_port_t *p_port; + osm_port_t *p_port_check; + osm_router_t *p_rtr = NULL; + osm_router_t *p_rtr_check; + cl_qmap_t *p_rtr_guid_tbl; + ib_node_info_t *p_ni; + ib_smp_t *p_smp; + osm_ni_context_t *p_ni_context; + osm_alias_guid_t *p_alias_guid, *p_alias_guid_check; + uint8_t port_num; + + OSM_LOG_ENTER(sm->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_ni = ib_smp_get_payload_ptr(p_smp); + p_ni_context = osm_madw_get_ni_context_ptr(p_madw); + port_num = ib_node_info_get_local_port_num(p_ni); + + osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_VERBOSE); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Discovered new %s node," + "\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", + ib_get_node_type_str(p_ni->node_type), + cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id)); + + if (PF(port_num > p_ni->num_ports)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0A: " + "New %s node GUID 0x%" PRIx64 "is non-compliant and " + "is being ignored since the " + "local port num %u > num ports %u\n", + ib_get_node_type_str(p_ni->node_type), + cl_ntoh64(p_ni->node_guid), port_num, + p_ni->num_ports); + goto Exit; + } + + p_node = osm_node_new(p_madw); + if (PF(p_node == NULL)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: " + "Unable to create new node object\n"); + goto Exit; + } + + /* + Create a new port object to represent this node's physical + ports in the port table. + */ + p_port = osm_port_new(p_ni, p_node); + if (PF(p_port == NULL)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: " + "Unable to create new port object\n"); + osm_node_delete(&p_node); + goto Exit; + } + + /* + Add the new port object to the database. + */ + p_port_check = + (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl, + p_ni->port_guid, &p_port->map_item); + if (PF(p_port_check != p_port)) { + /* + We should never be here! + Somehow, this port GUID already exists in the table. + */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: " + "Duplicate Port GUID 0x%" PRIx64 + "! Found by the two directed routes:\n", + cl_ntoh64(p_ni->port_guid)); + osm_dump_dr_path_v2(sm->p_log, + osm_physp_get_dr_path_ptr(p_port->p_physp), + FILE_ID, OSM_LOG_ERROR); + osm_dump_dr_path_v2(sm->p_log, + osm_physp_get_dr_path_ptr(p_port_check-> + p_physp), + FILE_ID, OSM_LOG_ERROR); + osm_port_delete(&p_port); + osm_node_delete(&p_node); + goto Exit; + } + + p_alias_guid = osm_alias_guid_new(p_ni->port_guid, + p_port); + if (PF(!p_alias_guid)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D18: " + "alias guid memory allocation failed" + " for port GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_ni->port_guid)); + goto alias_done2; + } + + /* insert into alias guid table */ + p_alias_guid_check = + (osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl, + p_alias_guid->alias_guid, + &p_alias_guid->map_item); + if (p_alias_guid_check != p_alias_guid) { + /* alias GUID is a duplicate */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D19: " + "Duplicate alias port GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_ni->port_guid)); + osm_alias_guid_delete(&p_alias_guid); + } + +alias_done2: + /* If we are a master, then this means the port is new on the subnet. + Mark it as new - need to send trap 64 on these ports. + The condition that we are master is true, since if we are in discovering + state (meaning we woke up from standby or we are just initializing), + then these ports may be new to us, but are not new on the subnet. + If we are master, then the subnet as we know it is the updated one, + and any new ports we encounter should cause trap 64. C14-72.1.1 */ + if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) + p_port->is_new = 1; + + /* If there were RouterInfo or other router attribute, + this would be elsewhere */ + if (p_ni->node_type == IB_NODE_TYPE_ROUTER) { + if (PF((p_rtr = osm_router_new(p_port)) == NULL)) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: " + "Unable to create new router object\n"); + else { + p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; + p_rtr_check = + (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl, + p_ni->port_guid, + &p_rtr->map_item); + if (PF(p_rtr_check != p_rtr)) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: " + "Unable to add port GUID:0x%016" PRIx64 + " to router table\n", + cl_ntoh64(p_ni->port_guid)); + } + } + + p_node_check = + (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl, + p_ni->node_guid, &p_node->map_item); + if (PF(p_node_check != p_node)) { + /* + This node must have been inserted by another thread. + This is unexpected, but is not an error. + We can simply clean-up, since the other thread will + see this processing through to completion. + */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Discovery race detected at node 0x%" PRIx64 "\n", + cl_ntoh64(p_ni->node_guid)); + osm_node_delete(&p_node); + p_node = p_node_check; + ni_rcv_set_links(sm, p_node, port_num, p_ni_context); + goto Exit; + } else + ni_rcv_set_links(sm, p_node, port_num, p_ni_context); + + p_node->discovery_count++; + ni_rcv_get_node_desc(sm, p_node, p_madw); + + switch (p_ni->node_type) { + case IB_NODE_TYPE_CA: + case IB_NODE_TYPE_ROUTER: + ni_rcv_process_new_ca_or_router(sm, p_node, p_madw); + break; + case IB_NODE_TYPE_SWITCH: + ni_rcv_process_new_switch(sm, p_node, p_madw); + break; + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " + "Unknown node type %u with GUID 0x%" PRIx64 "\n", + p_ni->node_type, cl_ntoh64(p_ni->node_guid)); + break; + } + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void ni_rcv_process_existing(IN osm_sm_t * sm, IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + ib_node_info_t *p_ni; + ib_smp_t *p_smp; + osm_ni_context_t *p_ni_context; + uint8_t port_num; + + OSM_LOG_ENTER(sm->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_ni = ib_smp_get_payload_ptr(p_smp); + p_ni_context = osm_madw_get_ni_context_ptr(p_madw); + port_num = ib_node_info_get_local_port_num(p_ni); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64 + ", discovered %u times already\n", + ib_get_node_type_str(p_ni->node_type), + cl_ntoh64(p_ni->node_guid), + cl_ntoh64(p_smp->trans_id), p_node->discovery_count); + + if (PF(port_num > p_ni->num_ports)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0C: " + "Existing %s node GUID 0x%" PRIx64 "is non-compliant " + "and is being ignored since the " + "local port num %u > num ports %u\n", + ib_get_node_type_str(p_ni->node_type), + cl_ntoh64(p_ni->node_guid), port_num, + p_ni->num_ports); + goto Exit; + } + + /* + If we haven't already encountered this existing node + on this particular sweep, then process further. + */ + p_node->discovery_count++; + + switch (p_ni->node_type) { + case IB_NODE_TYPE_CA: + case IB_NODE_TYPE_ROUTER: + ni_rcv_process_existing_ca_or_router(sm, p_node, p_madw); + break; + + case IB_NODE_TYPE_SWITCH: + ni_rcv_process_existing_switch(sm, p_node, p_madw); + break; + + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: " + "Unknown node type %u with GUID 0x%" PRIx64 "\n", + p_ni->node_type, cl_ntoh64(p_ni->node_guid)); + break; + } + + if ( p_ni->sys_guid != p_node->node_info.sys_guid) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Updated SysImageGUID: 0x%" + PRIx64 " for node 0x%" PRIx64 "\n", + cl_ntoh64(p_ni->sys_guid), + cl_ntoh64(p_ni->node_guid)); + } + ni_rcv_set_links(sm, p_node, port_num, p_ni_context); + p_node->node_info = *p_ni; + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +void osm_ni_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_node_info_t *p_ni; + ib_smp_t *p_smp; + osm_node_t *p_node; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_ni = ib_smp_get_payload_ptr(p_smp); + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO); + + if (PF(p_ni->node_guid == 0)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " + "Got Zero Node GUID! Found on the directed route:\n"); + osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + goto Exit; + } + + if (PF(p_ni->port_guid == 0)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: " + "Got Zero Port GUID! Found on the directed route:\n"); + osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + goto Exit; + } + + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit; + } + + /* + Determine if this node has already been discovered, + and process accordingly. + During processing of this node, hold the shared lock. + */ + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid); + + osm_dump_node_info_v2(sm->p_log, p_ni, FILE_ID, OSM_LOG_DEBUG); + + if (!p_node) + ni_rcv_process_new(sm, p_madw); + else + ni_rcv_process_existing(sm, p_node, p_madw); + + CL_PLOCK_RELEASE(sm->p_lock); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_opensm.c b/opensm/osm_opensm.c new file mode 100644 index 0000000..34721e3 --- /dev/null +++ b/opensm/osm_opensm.c @@ -0,0 +1,918 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * Copyright (c) 2019 Fabriscale Technologies AS. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_opensm_t. + * This object represents the opensm super object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_OPENSM_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * built-in routing engine setup functions + */ +extern int osm_ucast_minhop_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_updn_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_dnup_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_file_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_ftree_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_lash_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_dor_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_torus2QoS_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_nue_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_sssp_setup(struct osm_routing_engine *, osm_opensm_t *); +extern int osm_ucast_dfsssp_setup(struct osm_routing_engine *, osm_opensm_t *); + +/* + * Local types + */ + +typedef struct builtin_routing_engine_module { + const char *name; + osm_routing_engine_type_t type; + int (*setup)(struct osm_routing_engine *re, struct osm_opensm *osm); +} builtin_routing_engine_module_t; + +typedef struct routing_engine_module { + char *name; + osm_routing_engine_type_t type; + int (*setup)(struct osm_routing_engine *re, struct osm_opensm *osm); + void *context; +} routing_engine_module_t; + + +/* + * Local variables + */ +static const char *unknown_routing_engine_name = "unknown"; + +static cl_list_t routing_modules; + +static osm_routing_engine_type_t last_external_routing_engine_type = + OSM_ROUTING_ENGINE_TYPE_EXTERNAL; + +static builtin_routing_engine_module_t static_routing_modules[] = { + { + "none", + OSM_ROUTING_ENGINE_TYPE_NONE, + NULL + }, + { + "minhop", + OSM_ROUTING_ENGINE_TYPE_MINHOP, + osm_ucast_minhop_setup + }, + { + "updn", + OSM_ROUTING_ENGINE_TYPE_UPDN, + osm_ucast_updn_setup + }, + { + "dnup", + OSM_ROUTING_ENGINE_TYPE_DNUP, + osm_ucast_dnup_setup + }, + { + "file", + OSM_ROUTING_ENGINE_TYPE_FILE, + osm_ucast_file_setup + }, + { + "ftree", + OSM_ROUTING_ENGINE_TYPE_FTREE, + osm_ucast_ftree_setup + }, + { + "lash", + OSM_ROUTING_ENGINE_TYPE_LASH, + osm_ucast_lash_setup + }, + { + "dor", + OSM_ROUTING_ENGINE_TYPE_DOR, + osm_ucast_dor_setup + }, + { + "torus-2QoS", + OSM_ROUTING_ENGINE_TYPE_TORUS_2QOS, + osm_ucast_torus2QoS_setup + }, + { + "nue", + OSM_ROUTING_ENGINE_TYPE_NUE, + osm_ucast_nue_setup + }, + { + "dfsssp", + OSM_ROUTING_ENGINE_TYPE_DFSSSP, + osm_ucast_dfsssp_setup + }, + { + "sssp", + OSM_ROUTING_ENGINE_TYPE_SSSP, + osm_ucast_sssp_setup + } +}; + +/* + * Forward declarations + */ + +static cl_status_t _match_routing_engine_type( + IN const void *const p_object, IN void *context); + +static cl_status_t _match_routing_engine_str( + IN const void *const p_object, IN void *context); + +static void append_routing_engine( + osm_opensm_t *osm, struct osm_routing_engine *routing_engine); + +static struct osm_routing_engine *setup_routing_engine( + osm_opensm_t *osm, const char *name); + +static void dump_routing_engine( + IN void *const p_object, IN void *context); + +static void dump_routing_engines( + IN osm_opensm_t *osm); + +static void setup_routing_engines( + osm_opensm_t *osm, const char *engine_names); + +static cl_status_t register_builtin_routing_engine( + IN osm_opensm_t *osm, + IN const builtin_routing_engine_module_t *module); + +static cl_status_t register_routing_engine( + IN osm_opensm_t *osm, + IN const routing_engine_module_t *module); + +static void destroy_routing_engines( + osm_opensm_t *osm); + +static void __free_routing_module( + void *p_object, void *context); + +static const char *routing_engine_type( + IN osm_routing_engine_type_t type); + +/** ========================================================================= + */ + +cl_status_t osm_register_external_routing_engine( + IN osm_opensm_t *osm, + IN const external_routing_engine_module_t *module, + IN void *context) +{ + cl_status_t status; + routing_engine_module_t *copy = NULL; + + if (!osm || !module) + return CL_INVALID_PARAMETER; + + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + "Assign type '%d' to external routing engine with name: \'%s\'\n", + last_external_routing_engine_type, + module->name); + + copy = (routing_engine_module_t *) malloc(sizeof(routing_engine_module_t)); + if (!copy) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, "memory allocation failed\n"); + return CL_INSUFFICIENT_MEMORY; + } + + copy->name = strdup(module->name); + if (!copy->name) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, "memory allocation failed\n"); + __free_routing_module(copy, NULL); + return CL_INSUFFICIENT_MEMORY; + } + + copy->setup = module->setup; + copy->type = last_external_routing_engine_type++; + copy->context = context; + + status = register_routing_engine(osm, copy); + if (status != CL_SUCCESS) + __free_routing_module(copy, NULL); + return status; +} + +cl_status_t register_builtin_routing_engine( + IN osm_opensm_t *osm, + IN const builtin_routing_engine_module_t *module) +{ + cl_status_t status; + routing_engine_module_t *copy; + + if (!osm || !module) + return CL_INVALID_PARAMETER; + + copy = (routing_engine_module_t *) malloc(sizeof(routing_engine_module_t)); + if (!copy) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, "memory allocation failed\n"); + return CL_INSUFFICIENT_MEMORY; + } + + copy->name = strdup(module->name); + if (!copy->name) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, "memory allocation failed\n"); + __free_routing_module(copy, NULL); + return CL_INSUFFICIENT_MEMORY; + } + + copy->setup = module->setup; + copy->type = module->type; + copy->context = NULL; + + status = register_routing_engine(osm, copy); + if (status != CL_SUCCESS) + __free_routing_module(copy, NULL); + return status; +} + +cl_status_t register_routing_engine( + IN osm_opensm_t *osm, + IN const routing_engine_module_t *module) +{ + cl_status_t status; + osm_routing_engine_type_t existing_type, new_type; + const char *existing_routing_engine_type, *new_routing_engine_type; + const char *existing_routing_engine_name, *new_name; + + new_type = module->type; + new_name = module->name; + new_routing_engine_type = routing_engine_type(new_type); + + /* check if another routine engine has already been registered with the same name */ + existing_type = osm_routing_engine_type(new_name); + if (existing_type != OSM_ROUTING_ENGINE_TYPE_UNKNOWN) { + existing_routing_engine_type = routing_engine_type(existing_type); + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "Failed to register %s routing engine with name \'%s\': " + "%s routing engine with same name was already registered with type: '%d'\n", + new_routing_engine_type, + new_name, + existing_routing_engine_type, + existing_type); + return CL_DUPLICATE; + } + /* check if another routine engine has already been registed with the same type */ + existing_routing_engine_name = osm_routing_engine_type_str(new_type); + if (strcmp(existing_routing_engine_name, unknown_routing_engine_name) != 0) { + existing_type = new_type; + existing_routing_engine_type = routing_engine_type(existing_type); + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "Failed to register %s routing engine with name \'%s\': " + "%s routing engine with type '%d' " + "was already registered with name: \'%s\'\n", + new_routing_engine_type, + new_name, + existing_routing_engine_type, + existing_type, + existing_routing_engine_name); + return CL_DUPLICATE; + } + + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + "Register %s routine engine with name: \'%s\' and type: '%d'\n", + new_routing_engine_type, + new_name, + new_type); + + status = cl_list_insert_tail(&routing_modules, module); + return status; +} + +static cl_status_t _match_routing_engine_type( + IN const void *const p_object, IN void *context) +{ + osm_routing_engine_type_t type; + routing_engine_module_t *module; + + type = (osm_routing_engine_type_t) context; + module = (routing_engine_module_t *) p_object; + + return module->type == type ? CL_SUCCESS : CL_NOT_FOUND; +} + +const char *osm_routing_engine_type_str(IN osm_routing_engine_type_t type) +{ + cl_list_iterator_t iter; + routing_engine_module_t *module; + + iter = cl_list_find_from_head( + &routing_modules, _match_routing_engine_type, (void *)type); + + if (iter != cl_list_end(&routing_modules)) { + module = (routing_engine_module_t *) cl_list_obj(iter); + return module->name; + } + return unknown_routing_engine_name; +} + +static cl_status_t _match_routing_engine_str( + IN const void *const p_object, IN void *context) +{ + const char *name = (char *) context; + routing_engine_module_t *module; + + name = (char *) context; + module = (routing_engine_module_t *) p_object; + + /* For legacy reasons, consider a NULL pointer and the string + * "null" as the minhop routing engine. + */ + if (!name || !strcasecmp(name, "null")) + name = "minhop"; + + if (strcasecmp(module->name, name) == 0) + return CL_SUCCESS; + else + return CL_NOT_FOUND; +} + +osm_routing_engine_type_t osm_routing_engine_type(IN const char *str) +{ + cl_list_iterator_t iter; + routing_engine_module_t *module; + + iter = cl_list_find_from_head( + &routing_modules, _match_routing_engine_str, (void *)str); + + if (iter != cl_list_end(&routing_modules)) { + module = (routing_engine_module_t *) cl_list_obj(iter); + return module->type; + } + + return OSM_ROUTING_ENGINE_TYPE_UNKNOWN; +} + +static void append_routing_engine(osm_opensm_t *osm, + struct osm_routing_engine *routing_engine) +{ + struct osm_routing_engine *r; + + routing_engine->next = NULL; + + if (!osm->routing_engine_list) { + osm->routing_engine_list = routing_engine; + return; + } + + r = osm->routing_engine_list; + while (r->next) + r = r->next; + + r->next = routing_engine; +} + +static struct osm_routing_engine *setup_routing_engine(osm_opensm_t *osm, + const char *name) +{ + struct osm_routing_engine *re; + routing_engine_module_t *m; + cl_list_iterator_t iter; + + if (!strcmp(name, "no_fallback")) { + osm->no_fallback_routing_engine = TRUE; + return NULL; + } + + for (iter = cl_list_head(&routing_modules); + iter != cl_list_end(&routing_modules); + iter = cl_list_next(iter)) { + m = (routing_engine_module_t *)cl_list_obj(iter); + if (!strcmp(m->name, name)) { + re = malloc(sizeof(struct osm_routing_engine)); + if (!re) { + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + "memory allocation failed\n"); + return NULL; + } + memset(re, 0, sizeof(struct osm_routing_engine)); + + re->name = m->name; + re->context = m->context; + + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + "setup of routing engine \'%s\' ...\n", name); + + re->type = osm_routing_engine_type(m->name); + if (m->setup(re, osm)) { + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + "setup of routing" + " engine \'%s\' failed\n", name); + free(re); + return NULL; + } + OSM_LOG(&osm->log, OSM_LOG_DEBUG, + "\'%s\' routing engine set up\n", re->name); + if (re->type == OSM_ROUTING_ENGINE_TYPE_MINHOP) + osm->default_routing_engine = re; + return re; + } + } + + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "cannot find or setup routing engine \'%s\'\n", name); + return NULL; +} + +static void setup_routing_engines(osm_opensm_t *osm, const char *engine_names) +{ + char *name, *str, *p; + struct osm_routing_engine *re; + + dump_routing_engines(osm); + + if (engine_names && *engine_names) { + str = strdup(engine_names); + name = strtok_r(str, ", \t\n", &p); + while (name && *name) { + re = setup_routing_engine(osm, name); + if (re) + append_routing_engine(osm, re); + else + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "Failed to setup routing engine \'%s\'\n", name); + name = strtok_r(NULL, ", \t\n", &p); + } + free(str); + } + if (!osm->default_routing_engine) + setup_routing_engine(osm, "minhop"); +} + +static void dump_routing_engine(IN void *const p_object, IN void *context) +{ + osm_opensm_t *osm; + routing_engine_module_t *module; + + osm = (osm_opensm_t *) context; + module = (routing_engine_module_t *) p_object; + + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + " name: %s - Type: %d\n", + module->name, module->type); +} + +static void dump_routing_engines(IN osm_opensm_t *osm) +{ + cl_list_apply_func( + &routing_modules, + dump_routing_engine, + (void *) osm); +} + +static const char *routing_engine_type(IN osm_routing_engine_type_t type) +{ + return type < OSM_ROUTING_ENGINE_TYPE_UNKNOWN ? + "built-in" : "external"; +} + +void osm_routing_modules_construct(IN osm_opensm_t *p_osm) +{ + size_t i, len; + + len = sizeof(static_routing_modules) / + sizeof(builtin_routing_engine_module_t); + + cl_list_construct(&routing_modules); + cl_list_init(&routing_modules, len); + for (i = 0; i < len; i++) { + register_builtin_routing_engine( + p_osm, &(static_routing_modules[i])); + } +} + +static void __free_routing_module(void *p_object, void *context) +{ + routing_engine_module_t *p_module; + + p_module = (routing_engine_module_t *) p_object; + if (p_module) { + if (p_module->name) + free(p_module->name); + free(p_module); + } +} + +void osm_routing_modules_destroy(IN osm_opensm_t *p_osm) +{ + cl_list_apply_func(&routing_modules, __free_routing_module, p_osm); + cl_list_remove_all(&routing_modules); + cl_list_destroy(&routing_modules); +} + +void osm_opensm_construct(IN osm_opensm_t * p_osm) +{ + memset(p_osm, 0, sizeof(*p_osm)); + p_osm->osm_version = OSM_VERSION; + osm_routing_modules_construct(p_osm); + osm_subn_construct(&p_osm->subn); + osm_db_construct(&p_osm->db); + osm_log_construct(&p_osm->log); +} + +void osm_opensm_construct_finish(IN osm_opensm_t * p_osm) +{ + osm_sm_construct(&p_osm->sm); + osm_sa_construct(&p_osm->sa); + osm_mad_pool_construct(&p_osm->mad_pool); + p_osm->mad_pool_constructed = TRUE; + osm_vl15_construct(&p_osm->vl15); + p_osm->vl15_constructed = TRUE; +} + +static void destroy_routing_engines(osm_opensm_t *osm) +{ + struct osm_routing_engine *r, *next; + + next = osm->routing_engine_list; + while (next) { + r = next; + next = r->next; + if (r != osm->default_routing_engine) { + if (r->destroy) + r->destroy(r->context); + free(r); + } else /* do not free default_routing_engine */ + r->next = NULL; + } + osm->routing_engine_list = NULL; + + r = osm->default_routing_engine; + if (r) { + if (r->destroy) + r->destroy(r->context); + free(r); + osm->default_routing_engine = NULL; + } +} + +static void destroy_plugins(osm_opensm_t *osm) +{ + osm_epi_plugin_t *p; + /* remove from the list, and destroy it */ + while (!cl_is_qlist_empty(&osm->plugin_list)){ + p = (osm_epi_plugin_t *)cl_qlist_remove_head(&osm->plugin_list); + /* plugin is responsible for freeing its own resources */ + osm_epi_destroy(p); + } +} + +void osm_opensm_destroy(IN osm_opensm_t * p_osm) +{ + /* in case of shutdown through exit proc - no ^C */ + osm_exit_flag = TRUE; + + /* + * First of all, clear the is_sm bit. + */ + if (p_osm->sm.mad_ctrl.h_bind) + osm_vendor_set_sm(p_osm->sm.mad_ctrl.h_bind, FALSE); + +#ifdef ENABLE_OSM_PERF_MGR + /* Shutdown the PerfMgr */ + osm_perfmgr_shutdown(&p_osm->perfmgr); +#endif /* ENABLE_OSM_PERF_MGR */ + + osm_congestion_control_shutdown(&p_osm->cc); + + /* shut down the SM + * - make sure the SM sweeper thread exited + * - unbind from QP0 messages + */ + osm_sm_shutdown(&p_osm->sm); + + /* shut down the SA + * - unbind from QP1 messages + */ + osm_sa_shutdown(&p_osm->sa); + + /* cleanup all messages on VL15 fifo that were not sent yet */ + osm_vl15_shutdown(&p_osm->vl15, &p_osm->mad_pool); + + /* shut down the dispatcher - so no new messages cross */ + cl_disp_shutdown(&p_osm->disp); + if (p_osm->sa_set_disp_initialized) + cl_disp_shutdown(&p_osm->sa_set_disp); + + /* dump SA DB */ + if ((p_osm->sm.p_subn->sm_state == IB_SMINFO_STATE_MASTER) && + p_osm->subn.opt.sa_db_dump) + osm_sa_db_file_dump(p_osm); + + /* do the destruction in reverse order as init */ + destroy_routing_engines(p_osm); + destroy_plugins(p_osm); + osm_sa_destroy(&p_osm->sa); + osm_sm_destroy(&p_osm->sm); + osm_routing_modules_destroy(p_osm); +#ifdef ENABLE_OSM_PERF_MGR + osm_perfmgr_destroy(&p_osm->perfmgr); +#endif /* ENABLE_OSM_PERF_MGR */ + osm_congestion_control_destroy(&p_osm->cc); +} + +void osm_opensm_destroy_finish(IN osm_opensm_t * p_osm) +{ + osm_db_destroy(&p_osm->db); + if (p_osm->vl15_constructed && p_osm->mad_pool_constructed) + osm_vl15_destroy(&p_osm->vl15, &p_osm->mad_pool); + if (p_osm->mad_pool_constructed) + osm_mad_pool_destroy(&p_osm->mad_pool); + p_osm->vl15_constructed = FALSE; + p_osm->mad_pool_constructed = FALSE; + osm_vendor_delete(&p_osm->p_vendor); + osm_subn_destroy(&p_osm->subn); + cl_disp_destroy(&p_osm->disp); + if (p_osm->sa_set_disp_initialized) + cl_disp_destroy(&p_osm->sa_set_disp); +#ifdef HAVE_LIBPTHREAD + pthread_cond_destroy(&p_osm->stats.cond); + pthread_mutex_destroy(&p_osm->stats.mutex); +#else + cl_event_destroy(&p_osm->stats.event); +#endif + if (p_osm->node_name_map) + close_node_name_map(p_osm->node_name_map); + cl_plock_destroy(&p_osm->lock); + + osm_log_destroy(&p_osm->log); +} + +static void load_plugins(osm_opensm_t *osm, const char *plugin_names) +{ + osm_epi_plugin_t *epi; + char *p_names, *name, *p; + + p_names = strdup(plugin_names); + name = strtok_r(p_names, ", \t\n", &p); + while (name && *name) { + epi = osm_epi_construct(osm, name); + if (!epi) + osm_log_v2(&osm->log, OSM_LOG_ERROR, FILE_ID, + "ERR 1000: cannot load plugin \'%s\'\n", + name); + else + cl_qlist_insert_tail(&osm->plugin_list, &epi->list); + name = strtok_r(NULL, " \t\n", &p); + } + free(p_names); +} + +ib_api_status_t osm_opensm_init(IN osm_opensm_t * p_osm, + IN const osm_subn_opt_t * p_opt) +{ + ib_api_status_t status; + + /* Can't use log macros here, since we're initializing the log */ + osm_opensm_construct(p_osm); + + if (p_opt->daemon) + p_osm->log.daemon = 1; + + status = osm_log_init_v2(&p_osm->log, p_opt->force_log_flush, + p_opt->log_flags, p_opt->log_file, + p_opt->log_max_size, p_opt->accum_log_file); + if (status != IB_SUCCESS) + return status; + p_osm->log.log_prefix = p_opt->log_prefix; + + /* If there is a log level defined - add the OSM_VERSION to it */ + osm_log_v2(&p_osm->log, + osm_log_get_level(&p_osm->log) & (OSM_LOG_SYS ^ 0xFF), + FILE_ID, "%s\n", p_osm->osm_version); + /* Write the OSM_VERSION to the SYS_LOG */ + osm_log_v2(&p_osm->log, OSM_LOG_SYS, FILE_ID, "%s\n", p_osm->osm_version); /* Format Waived */ + + OSM_LOG(&p_osm->log, OSM_LOG_FUNCS, "[\n"); /* Format Waived */ + + status = cl_plock_init(&p_osm->lock); + if (status != IB_SUCCESS) + goto Exit; + +#ifdef HAVE_LIBPTHREAD + pthread_mutex_init(&p_osm->stats.mutex, NULL); + pthread_cond_init(&p_osm->stats.cond, NULL); +#else + status = cl_event_init(&p_osm->stats.event, FALSE); + if (status != IB_SUCCESS) + goto Exit; +#endif + + if (p_opt->single_thread) { + OSM_LOG(&p_osm->log, OSM_LOG_INFO, + "Forcing single threaded dispatcher\n"); + status = cl_disp_init(&p_osm->disp, 1, "opensm"); + } else { + /* + * Normal behavior is to initialize the dispatcher with + * one thread per CPU, as specified by a thread count of '0'. + */ + status = cl_disp_init(&p_osm->disp, 0, "opensm"); + } + if (status != IB_SUCCESS) + goto Exit; + + /* Unless OpenSM runs in single threaded mode, we create new single + * threaded dispatcher for SA Set and Delete requets. + */ + p_osm->sa_set_disp_initialized = FALSE; + if (!p_opt->single_thread) { + status = cl_disp_init(&p_osm->sa_set_disp, 1, "subnadmin_set"); + if (status != IB_SUCCESS) + goto Exit; + p_osm->sa_set_disp_initialized = TRUE; + } + + /* the DB is in use by subn so init before */ + status = osm_db_init(&p_osm->db, &p_osm->log); + if (status != IB_SUCCESS) + goto Exit; + + status = osm_subn_init(&p_osm->subn, p_osm, p_opt); + if (status != IB_SUCCESS) + goto Exit; + + p_osm->p_vendor = + osm_vendor_new(&p_osm->log, p_opt->transaction_timeout); + if (p_osm->p_vendor == NULL) + status = IB_INSUFFICIENT_RESOURCES; + +Exit: + OSM_LOG(&p_osm->log, OSM_LOG_FUNCS, "]\n"); /* Format Waived */ + return status; +} + +ib_api_status_t osm_opensm_init_finish(IN osm_opensm_t * p_osm, + IN const osm_subn_opt_t * p_opt) +{ + ib_api_status_t status; + + osm_opensm_construct_finish(p_osm); + + p_osm->subn.sm_port_guid = p_opt->guid; + + status = osm_mad_pool_init(&p_osm->mad_pool); + if (status != IB_SUCCESS) + goto Exit; + + status = osm_vl15_init(&p_osm->vl15, p_osm->p_vendor, + &p_osm->log, &p_osm->stats, &p_osm->subn, + p_opt->max_wire_smps, p_opt->max_wire_smps2, + p_opt->max_smps_timeout); + if (status != IB_SUCCESS) + goto Exit; + + status = osm_sm_init(&p_osm->sm, &p_osm->subn, &p_osm->db, + p_osm->p_vendor, &p_osm->mad_pool, &p_osm->vl15, + &p_osm->log, &p_osm->stats, &p_osm->disp, + &p_osm->lock); + if (status != IB_SUCCESS) + goto Exit; + + status = osm_sa_init(&p_osm->sm, &p_osm->sa, &p_osm->subn, + p_osm->p_vendor, &p_osm->mad_pool, &p_osm->log, + &p_osm->stats, &p_osm->disp, + p_opt->single_thread ? NULL : &p_osm->sa_set_disp, + &p_osm->lock); + if (status != IB_SUCCESS) + goto Exit; + + cl_qlist_init(&p_osm->plugin_list); + + if (p_opt->event_plugin_name) + load_plugins(p_osm, p_opt->event_plugin_name); + +#ifdef ENABLE_OSM_PERF_MGR + status = osm_perfmgr_init(&p_osm->perfmgr, p_osm, p_opt); + if (status != IB_SUCCESS) + goto Exit; +#endif /* ENABLE_OSM_PERF_MGR */ + + status = osm_congestion_control_init(&p_osm->cc, + p_osm, p_opt); + if (status != IB_SUCCESS) + goto Exit; + + p_osm->no_fallback_routing_engine = FALSE; + + setup_routing_engines(p_osm, p_opt->routing_engine_names); + + p_osm->routing_engine_used = NULL; + + p_osm->node_name_map = open_node_name_map(p_opt->node_name_map_name); + +Exit: + OSM_LOG(&p_osm->log, OSM_LOG_FUNCS, "]\n"); /* Format Waived */ + return status; +} + +ib_api_status_t osm_opensm_bind(IN osm_opensm_t * p_osm, IN ib_net64_t guid) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(&p_osm->log); + + status = osm_sm_bind(&p_osm->sm, guid); + if (status != IB_SUCCESS) + goto Exit; + + status = osm_sa_bind(&p_osm->sa, guid); + if (status != IB_SUCCESS) + goto Exit; + +#ifdef ENABLE_OSM_PERF_MGR + status = osm_perfmgr_bind(&p_osm->perfmgr, guid); + if (status != IB_SUCCESS) + goto Exit; +#endif /* ENABLE_OSM_PERF_MGR */ + + status = osm_congestion_control_bind(&p_osm->cc, guid); + if (status != IB_SUCCESS) + goto Exit; + + /* setting IS_SM in capability mask */ + OSM_LOG(&p_osm->log, OSM_LOG_INFO, "Setting IS_SM on port 0x%016" PRIx64 "\n", + cl_ntoh64(guid)); + osm_vendor_set_sm(p_osm->sm.mad_ctrl.h_bind, TRUE); + +Exit: + OSM_LOG_EXIT(&p_osm->log); + return status; +} + +void osm_opensm_report_event(osm_opensm_t *osm, osm_epi_event_id_t event_id, + void *event_data) +{ + cl_list_item_t *item; + + for (item = cl_qlist_head(&osm->plugin_list); + !osm_exit_flag && item != cl_qlist_end(&osm->plugin_list); + item = cl_qlist_next(item)) { + osm_epi_plugin_t *p = (osm_epi_plugin_t *)item; + if (p->impl->report) + p->impl->report(p->plugin_data, event_id, event_data); + } +} diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c new file mode 100644 index 0000000..6f4444d --- /dev/null +++ b/opensm/osm_perfmgr.c @@ -0,0 +1,2034 @@ +/* + * Copyright (c) 2007 The Regents of the University of California. + * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved. + * Copyright (c) 2011-2014 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_perfmgr_t. + * This object implements an IBA performance manager. + * + * Author: + * Ira Weiny, LLNL + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef ENABLE_OSM_PERF_MGR +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_PERFMGR_C +#include +#include +#include +#include +#include +#include + +#define PERFMGR_INITIAL_TID_VALUE 0xcafe + +#ifdef ENABLE_OSM_PERF_MGR_PROFILE +struct { + double fastest_us; + double slowest_us; + double avg_us; + uint64_t num; +} perfmgr_mad_stats = { +fastest_us: DBL_MAX, slowest_us: DBL_MIN, avg_us: 0, num:0}; + +/* diff must be something which can fit in a susecond_t */ +static inline void update_mad_stats(struct timeval *diff) +{ + double new = (diff->tv_sec * 1000000) + diff->tv_usec; + if (new < perfmgr_mad_stats.fastest_us) + perfmgr_mad_stats.fastest_us = new; + if (new > perfmgr_mad_stats.slowest_us) + perfmgr_mad_stats.slowest_us = new; + + perfmgr_mad_stats.avg_us = + ((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new) + / (perfmgr_mad_stats.num + 1); + perfmgr_mad_stats.num++; +} + +static inline void clear_mad_stats(void) +{ + perfmgr_mad_stats.fastest_us = DBL_MAX; + perfmgr_mad_stats.slowest_us = DBL_MIN; + perfmgr_mad_stats.avg_us = 0; + perfmgr_mad_stats.num = 0; +} + +/* after and diff can be the same struct */ +static inline void diff_time(struct timeval *before, struct timeval *after, + struct timeval *diff) +{ + struct timeval tmp = *after; + if (tmp.tv_usec < before->tv_usec) { + tmp.tv_sec--; + tmp.tv_usec += 1000000; + } + diff->tv_sec = tmp.tv_sec - before->tv_sec; + diff->tv_usec = tmp.tv_usec - before->tv_usec; +} +#endif + +/********************************************************************** + * Internal helper functions + **********************************************************************/ +static void init_monitored_nodes(osm_perfmgr_t * pm) +{ + cl_qmap_init(&pm->monitored_map); + pm->remove_list = NULL; + cl_event_construct(&pm->sig_query); + cl_event_init(&pm->sig_query, FALSE); +} + +static void mark_for_removal(osm_perfmgr_t * pm, monitored_node_t * node) +{ + if (pm->remove_list) { + node->next = pm->remove_list; + pm->remove_list = node; + } else { + node->next = NULL; + pm->remove_list = node; + } +} + +static void remove_marked_nodes(osm_perfmgr_t * pm) +{ + while (pm->remove_list) { + monitored_node_t *next = pm->remove_list->next; + unsigned port; + + cl_qmap_remove_item(&pm->monitored_map, + (cl_map_item_t *) (pm->remove_list)); + + if (pm->rm_nodes) + perfmgr_db_delete_entry(pm->db, pm->remove_list->guid); + else + perfmgr_db_mark_active(pm->db, pm->remove_list->guid, FALSE); + + if (pm->remove_list->name) + free(pm->remove_list->name); + + for (port = pm->remove_list->esp0 ? 0 : 1; + port < pm->remove_list->num_ports; + port++) { + if (pm->remove_list->port[port].remote_name) + free(pm->remove_list->port[port].remote_name); + } + + free(pm->remove_list); + pm->remove_list = next; + } +} + +static inline void decrement_outstanding_queries(osm_perfmgr_t * pm) +{ + cl_atomic_dec(&pm->outstanding_queries); + + if (!pm->outstanding_queries) { + cl_spinlock_acquire(&pm->lock); + if (pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) { + pm->sweep_state = PERFMGR_SWEEP_SLEEP; + OSM_LOG(pm->log, OSM_LOG_INFO, + "PM sweep state exiting Post Processing\n"); + } + cl_spinlock_release(&pm->lock); + } + + cl_event_signal(&pm->sig_query); +} + +/********************************************************************** + * Receive the MAD from the vendor layer and post it for processing by + * the dispatcher + **********************************************************************/ +static void perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, + osm_madw_t * p_req_madw) +{ + osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; + + OSM_LOG_ENTER(pm->log); + + CL_ASSERT(p_madw); + CL_ASSERT(p_req_madw != NULL); + + osm_madw_copy_context(p_madw, p_req_madw); + osm_mad_pool_put(pm->mad_pool, p_req_madw); + + decrement_outstanding_queries(pm); + + /* post this message for later processing. */ + if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS, + p_madw, NULL, NULL) != CL_SUCCESS) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5401: " + "PerfMgr Dispatcher post failed\n"); + osm_mad_pool_put(pm->mad_pool, p_madw); + } + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Process MAD send errors + **********************************************************************/ +static void perfmgr_mad_send_err_callback(void *bind_context, + osm_madw_t * p_madw) +{ + osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; + osm_madw_context_t *context = &p_madw->context; + uint64_t node_guid = context->perfmgr_context.node_guid; + uint8_t port = context->perfmgr_context.port; + cl_map_item_t *p_node; + monitored_node_t *p_mon_node; + ib_net16_t orig_lid; + + OSM_LOG_ENTER(pm->log); + + /* + * get the monitored node struct to have the printable name + * for log messages + */ + if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) == + cl_qmap_end(&pm->monitored_map)) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5415: GUID 0x%016" + PRIx64 " not found in monitored map\n", node_guid); + goto Exit; + } + p_mon_node = (monitored_node_t *) p_node; + + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5402: %s (0x%" PRIx64 + ") port %u LID %u TID 0x%" PRIx64 "\n", + p_mon_node->name, p_mon_node->guid, port, + cl_ntoh16(p_madw->mad_addr.dest_lid), + cl_ntoh64(p_madw->p_mad->trans_id)); + + if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { + /* First, find the node in the monitored map */ + cl_plock_acquire(&pm->osm->lock); + /* Now, validate port number */ + if (port >= p_mon_node->num_ports) { + cl_plock_release(&pm->osm->lock); + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5416: " + "Invalid port num %u for %s (GUID 0x%016" + PRIx64 ") num ports %u\n", port, + p_mon_node->name, p_mon_node->guid, + p_mon_node->num_ports); + goto Exit; + } + /* Clear redirection info for this port except orig_lid */ + orig_lid = p_mon_node->port[port].orig_lid; + memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t)); + p_mon_node->port[port].orig_lid = orig_lid; + p_mon_node->port[port].valid = TRUE; + cl_plock_release(&pm->osm->lock); + } + +Exit: + osm_mad_pool_put(pm->mad_pool, p_madw); + + decrement_outstanding_queries(pm); + + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Bind the PerfMgr to the vendor layer for MAD sends/receives + **********************************************************************/ +ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid) +{ + osm_bind_info_t bind_info; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(pm->log); + + if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5403: Multiple binds not allowed\n"); + status = IB_ERROR; + goto Exit; + } + + bind_info.port_guid = pm->port_guid = port_guid; + bind_info.mad_class = IB_MCLASS_PERF; + bind_info.class_version = 1; + bind_info.is_responder = FALSE; + bind_info.is_report_processor = FALSE; + bind_info.is_trap_processor = FALSE; + bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE; + bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; + bind_info.timeout = pm->subn->opt.transaction_timeout; + bind_info.retries = pm->subn->opt.transaction_retries; + + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); + + pm->bind_handle = osm_vendor_bind(pm->vendor, &bind_info, pm->mad_pool, + perfmgr_mad_recv_callback, + perfmgr_mad_send_err_callback, pm); + + if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { + status = IB_ERROR; + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5404: Vendor specific bind failed (%s)\n", + ib_get_err_str(status)); + } + +Exit: + OSM_LOG_EXIT(pm->log); + return status; +} + +/********************************************************************** + * Unbind the PerfMgr from the vendor layer for MAD sends/receives + **********************************************************************/ +static void perfmgr_mad_unbind(osm_perfmgr_t * pm) +{ + OSM_LOG_ENTER(pm->log); + if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5405: No previous bind\n"); + goto Exit; + } + osm_vendor_unbind(pm->bind_handle); +Exit: + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Given a monitored node and a port, return the qp + **********************************************************************/ +static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port) +{ + ib_net32_t qp = IB_QP1; + + if (mon_node && mon_node->num_ports && port < mon_node->num_ports && + mon_node->port[port].redirection && mon_node->port[port].qp) + qp = mon_node->port[port].qp; + + return qp; +} + +static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port) +{ + switch (p_node->node_info.node_type) { + case IB_NODE_TYPE_CA: + case IB_NODE_TYPE_ROUTER: + return osm_node_get_base_lid(p_node, port); + case IB_NODE_TYPE_SWITCH: + return osm_node_get_base_lid(p_node, 0); + default: + return 0; + } +} + +/********************************************************************** + * Given a node, a port, and an optional monitored node, + * return the lid appropriate to query that port + **********************************************************************/ +static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port, + monitored_node_t * mon_node) +{ + if (mon_node && mon_node->num_ports && port < mon_node->num_ports && + mon_node->port[port].lid) + return mon_node->port[port].lid; + + return get_base_lid(p_node, port); +} + +/********************************************************************** + * Build a Performance Management class MAD + **********************************************************************/ +static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr, + ib_net16_t dest_lid, + uint8_t sl, + ib_net32_t dest_qp, + uint16_t pkey_ix, + uint8_t mad_method, + ib_net16_t attr_id, + osm_madw_context_t * p_context, + ib_perfmgt_mad_t ** p_pm_mad) +{ + ib_perfmgt_mad_t *pm_mad = NULL; + osm_madw_t *p_madw = NULL; + + OSM_LOG_ENTER(perfmgr->log); + + p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, + MAD_BLOCK_SIZE, NULL); + if (p_madw == NULL) + return NULL; + + pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw); + + /* build the mad */ + pm_mad->header.base_ver = 1; + pm_mad->header.mgmt_class = IB_MCLASS_PERF; + pm_mad->header.class_ver = 1; + pm_mad->header.method = mad_method; + pm_mad->header.status = 0; + pm_mad->header.class_spec = 0; + pm_mad->header.trans_id = + cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & + (uint64_t) (0xFFFFFFFF)); + if (perfmgr->trans_id == 0) + pm_mad->header.trans_id = + cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & + (uint64_t) (0xFFFFFFFF)); + pm_mad->header.attr_id = attr_id; + pm_mad->header.resv = 0; + pm_mad->header.attr_mod = 0; + + p_madw->mad_addr.dest_lid = dest_lid; + p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp; + p_madw->mad_addr.addr_type.gsi.remote_qkey = + cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); + p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix; + p_madw->mad_addr.addr_type.gsi.service_level = sl; + p_madw->mad_addr.addr_type.gsi.global_route = FALSE; + p_madw->resp_expected = TRUE; + + if (p_context) + p_madw->context = *p_context; + + if (p_pm_mad) + *p_pm_mad = pm_mad; + + OSM_LOG_EXIT(perfmgr->log); + + return (p_madw); +} + +/********************************************************************** + * Send a Performance Management class MAD + **********************************************************************/ +static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr, + osm_madw_t * const p_madw) +{ + cl_status_t sts; + ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw, + TRUE); + if (status == IB_SUCCESS) { + /* pause thread if there are too many outstanding requests */ + cl_atomic_inc(&(perfmgr->outstanding_queries)); + while (perfmgr->outstanding_queries > + (int32_t)perfmgr->max_outstanding_queries) { + cl_spinlock_acquire(&perfmgr->lock); + if (perfmgr->sweep_state == PERFMGR_SWEEP_SLEEP) { + perfmgr->sweep_state = PERFMGR_SWEEP_POST_PROCESSING; + OSM_LOG(perfmgr->log, OSM_LOG_INFO, + "PM sweep state going into Post Processing\n"); + } else if (perfmgr->sweep_state == PERFMGR_SWEEP_ACTIVE) + perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED; + cl_spinlock_release(&perfmgr->lock); +wait: + sts = cl_event_wait_on(&perfmgr->sig_query, + EVENT_NO_TIMEOUT, TRUE); + if (sts != CL_SUCCESS) + goto wait; + + cl_spinlock_acquire(&perfmgr->lock); + if (perfmgr->sweep_state == PERFMGR_SWEEP_SUSPENDED) + perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE; + cl_spinlock_release(&perfmgr->lock); + } + } + return (status); +} + + +/********************************************************************** + * Form and send the PortCounters MAD for a single port + **********************************************************************/ +static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, + ib_net16_t dest_lid, + ib_net32_t dest_qp, uint16_t pkey_ix, + uint8_t port, uint8_t mad_method, + uint16_t counter_select, + uint8_t counter_select2, + osm_madw_context_t * p_context, + uint8_t sl) +{ + ib_api_status_t status = IB_SUCCESS; + ib_port_counters_t *port_counter = NULL; + ib_perfmgt_mad_t *pm_mad = NULL; + osm_madw_t *p_madw = NULL; + + OSM_LOG_ENTER(perfmgr->log); + + p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS; + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, + mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context, + &pm_mad); + if (p_madw == NULL) + return IB_INSUFFICIENT_MEMORY; + + port_counter = (ib_port_counters_t *) & pm_mad->data; + memset(port_counter, 0, sizeof(*port_counter)); + port_counter->port_select = port; + port_counter->counter_select = cl_hton16(counter_select); + port_counter->counter_select2 = counter_select2; + + status = perfmgr_send_mad(perfmgr, p_madw); + + OSM_LOG_EXIT(perfmgr->log); + return status; +} + +/********************************************************************** + * sweep the node_guid_tbl and collect the node guids to be tracked + **********************************************************************/ +static void collect_guids(cl_map_item_t * p_map_item, void *context) +{ + osm_node_t *node = (osm_node_t *) p_map_item; + uint64_t node_guid = cl_ntoh64(node->node_info.node_guid); + osm_perfmgr_t *pm = (osm_perfmgr_t *) context; + monitored_node_t *mon_node = NULL; + uint32_t num_ports; + unsigned port; + + OSM_LOG_ENTER(pm->log); + + if (cl_qmap_get(&pm->monitored_map, node_guid) == + cl_qmap_end(&pm->monitored_map)) { + + if (pm->ignore_cas && + (node->node_info.node_type == IB_NODE_TYPE_CA)) + goto Exit; + + /* if not already in map add it */ + num_ports = osm_node_get_num_physp(node); + mon_node = malloc(sizeof(*mon_node) + + sizeof(monitored_port_t) * num_ports); + if (!mon_node) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5406: " + "malloc failed: not handling node %s" + "(GUID 0x%" PRIx64 ")\n", node->print_desc, + node_guid); + goto Exit; + } + memset(mon_node, 0, + sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports); + mon_node->guid = node_guid; + mon_node->name = strdup(node->print_desc); + mon_node->num_ports = num_ports; + mon_node->node_type = node->node_info.node_type; + /* check for enhanced switch port 0 */ + mon_node->esp0 = (node->sw && + ib_switch_info_is_enhanced_port0(&node->sw-> + switch_info)); + for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) { + monitored_port_t *mon_port = &mon_node->port[port]; + osm_physp_t *p_physp = &node->physp_table[port]; + osm_physp_t *p_remote_physp = p_physp->p_remote_physp; + + mon_port->orig_lid = 0; + mon_port->valid = FALSE; + if (osm_physp_is_valid(p_physp)) { + mon_port->orig_lid = get_base_lid(node, port); + mon_port->valid = TRUE; + } + mon_port->remote_valid = FALSE; + mon_port->remote_name = NULL; + if (p_remote_physp && osm_physp_is_valid(p_remote_physp)) { + osm_node_t *p_remote_node = p_remote_physp->p_node; + mon_port->remote_valid = TRUE; + mon_port->remote_guid = p_remote_node->node_info.node_guid; + mon_port->remote_name = strdup(p_remote_node->print_desc); + mon_port->remote_port = p_remote_physp->port_num; + } + } + + cl_qmap_insert(&pm->monitored_map, node_guid, + (cl_map_item_t *) mon_node); + } + +Exit: + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Form and send the ClassPortInfo MAD for a single port + **********************************************************************/ +static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm, + ib_net16_t dest_lid, + ib_net32_t dest_qp, + uint16_t pkey_ix, + uint8_t port, + osm_madw_context_t * p_context, + uint8_t sl) +{ + ib_api_status_t status = IB_SUCCESS; + osm_madw_t *p_madw = NULL; + + OSM_LOG_ENTER(pm->log); + + p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_CLASS_PORT_INFO; + p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp, + pkey_ix, IB_MAD_METHOD_GET, + IB_MAD_ATTR_CLASS_PORT_INFO, p_context, + NULL); + if (p_madw == NULL) + return IB_INSUFFICIENT_MEMORY; + + status = perfmgr_send_mad(pm, p_madw); + + OSM_LOG_EXIT(pm->log); + return status; +} + +/********************************************************************** + * return if some form of PortCountersExtended (PCE || PCE NoIETF) are supported + **********************************************************************/ +static inline boolean_t pce_supported(monitored_node_t *mon_node, uint8_t port) +{ + monitored_port_t *mon_port = &(mon_node->port[port]); + return (mon_port->cpi_valid + && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED + || mon_port->cap_mask & IB_PM_EXT_WIDTH_NOIETF_SUP)); +} + +/********************************************************************** + * return if CapMask.PortCountersXmitWaitSupported is set + **********************************************************************/ +static inline boolean_t xmit_wait_supported(monitored_node_t *mon_node, uint8_t port) +{ + monitored_port_t *mon_port = &(mon_node->port[port]); + return (mon_port->cpi_valid + && (mon_port->cap_mask & IB_PM_PC_XMIT_WAIT_SUP)); +} + +/********************************************************************** + * return if "full" PortCountersExtended (IETF) is indicated + **********************************************************************/ +static inline boolean_t ietf_supported(monitored_node_t *mon_node, uint8_t port) +{ + monitored_port_t *mon_port = &(mon_node->port[port]); + return (mon_port->cpi_valid + && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)); +} + +/********************************************************************** + * Form and send the PortCountersExtended MAD for a single port + **********************************************************************/ +static ib_api_status_t perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr, + ib_net16_t dest_lid, + ib_net32_t dest_qp, + uint16_t pkey_ix, + uint8_t port, uint8_t mad_method, + osm_madw_context_t * p_context, + uint8_t sl) +{ + ib_api_status_t status = IB_SUCCESS; + ib_port_counters_ext_t *port_counter_ext = NULL; + ib_perfmgt_mad_t *pm_mad = NULL; + osm_madw_t *p_madw = NULL; + + OSM_LOG_ENTER(perfmgr->log); + + p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS_EXT; + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, + mad_method, IB_MAD_ATTR_PORT_CNTRS_EXT, p_context, + &pm_mad); + if (p_madw == NULL) + return IB_INSUFFICIENT_MEMORY; + + port_counter_ext = (ib_port_counters_ext_t *) & pm_mad->data; + memset(port_counter_ext, 0, sizeof(*port_counter_ext)); + port_counter_ext->port_select = port; + port_counter_ext->counter_select = cl_hton16(0x00FF); + + status = perfmgr_send_mad(perfmgr, p_madw); + + OSM_LOG_EXIT(perfmgr->log); + return status; +} + +/********************************************************************** + * query the Port Counters of all the nodes in the subnet + **********************************************************************/ +static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) +{ + ib_api_status_t status = IB_SUCCESS; + osm_perfmgr_t *pm = context; + osm_node_t *node = NULL; + monitored_node_t *mon_node = (monitored_node_t *) p_map_item; + osm_madw_context_t mad_context; + uint64_t node_guid = 0; + ib_net32_t remote_qp; + uint8_t port, num_ports = 0; + + OSM_LOG_ENTER(pm->log); + + cl_plock_acquire(&pm->osm->lock); + node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); + if (!node) { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5407: Node \"%s\" (guid 0x%" PRIx64 + ") no longer exists so removing from PerfMgr monitoring\n", + mon_node->name, mon_node->guid); + mark_for_removal(pm, mon_node); + goto Exit; + } + + num_ports = osm_node_get_num_physp(node); + node_guid = cl_ntoh64(node->node_info.node_guid); + + /* make sure there is a database object ready to store this info */ + if (perfmgr_db_create_entry(pm->db, node_guid, mon_node->esp0, + num_ports, node->print_desc) != + PERFMGR_EVENT_DB_SUCCESS) { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5408: DB create entry failed for 0x%" + PRIx64 " (%s) : %s\n", node_guid, node->print_desc, + strerror(errno)); + goto Exit; + } + + perfmgr_db_mark_active(pm->db, node_guid, TRUE); + + /* issue the query for each port */ + for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) { + ib_net16_t lid; + + if (!osm_node_get_physp_ptr(node, port)) + continue; + + if (!mon_node->port[port].valid) + continue; + + lid = get_lid(node, port, mon_node); + if (lid == 0) { + OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64 + " port %d (%s): port out of range, skipping\n", + cl_ntoh64(node->node_info.node_guid), port, + node->print_desc); + continue; + } + + remote_qp = get_qp(mon_node, port); + + mad_context.perfmgr_context.node_guid = node_guid; + mad_context.perfmgr_context.port = port; + mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; + + if (pm->query_cpi && !mon_node->port[port].cpi_valid) { + status = perfmgr_send_cpi_mad(pm, lid, remote_qp, + mon_node->port[port].pkey_ix, + port, &mad_context, + 0); /* FIXME SL != 0 */ + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " + "Failed to issue ClassPortInfo query " + "for node 0x%" PRIx64 + " port %d (%s)\n", + node->node_info.node_guid, port, + node->print_desc); + if (mon_node->node_type == IB_NODE_TYPE_SWITCH) + goto Exit; /* only need to issue 1 CPI query + for switches */ + } else { + +#ifdef ENABLE_OSM_PERF_MGR_PROFILE + gettimeofday(&mad_context.perfmgr_context.query_start, NULL); +#endif + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" + PRIx64 " port %d (lid %u) (%s)\n", + node_guid, port, cl_ntoh16(lid), + node->print_desc); + status = perfmgr_send_pc_mad(pm, lid, remote_qp, + mon_node->port[port].pkey_ix, + port, IB_MAD_METHOD_GET, + 0xffff, + 1, + &mad_context, + 0); /* FIXME SL != 0 */ + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " + "Failed to issue port counter query for node 0x%" + PRIx64 " port %d (%s)\n", + node->node_info.node_guid, port, + node->print_desc); + + if (pce_supported(mon_node, port)) { + +#if ENABLE_OSM_PERF_MGR_PROFILE + gettimeofday(&mad_context.perfmgr_context.query_start, NULL); +#endif + status = perfmgr_send_pce_mad(pm, lid, remote_qp, + mon_node->port[port].pkey_ix, + port, + IB_MAD_METHOD_GET, + &mad_context, + 0); /* FIXME SL != 0 */ + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5417: Failed to issue " + "port counter query for " + "node 0x%" PRIx64 " port " + "%d (%s)\n", + node->node_info.node_guid, + port, + node->print_desc); + } + } + } +Exit: + cl_plock_release(&pm->osm->lock); + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Discovery stuff + * This code should not be here, but merged with main OpenSM + **********************************************************************/ +extern int wait_for_pending_transactions(osm_stats_t * stats); +extern void osm_drop_mgr_process(IN osm_sm_t * sm); + +static int sweep_hop_1(osm_sm_t * sm) +{ + ib_api_status_t status = IB_SUCCESS; + osm_madw_context_t context; + osm_node_t *p_node; + osm_port_t *p_port; + osm_dr_path_t hop_1_path; + ib_net64_t port_guid; + uint8_t port_num; + uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; + uint8_t num_ports; + osm_physp_t *p_ext_physp; + + port_guid = sm->p_subn->sm_port_guid; + + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 5481: No SM port object\n"); + return -1; + } + + p_node = p_port->p_node; + port_num = ib_node_info_get_local_port_num(&p_node->node_info); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Probing hop 1 on local port %u\n", port_num); + + memset(path_array, 0, sizeof(path_array)); + /* the hop_1 operations depend on the type of our node. + * Currently - legal nodes that can host SM are SW and CA */ + switch (osm_node_get_type(p_node)) { + case IB_NODE_TYPE_CA: + case IB_NODE_TYPE_ROUTER: + memset(&context, 0, sizeof(context)); + context.ni_context.node_guid = osm_node_get_node_guid(p_node); + context.ni_context.port_num = port_num; + + path_array[1] = port_num; + + osm_dr_path_init(&hop_1_path, 1, path_array); + CL_PLOCK_ACQUIRE(sm->p_lock); + status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, + TRUE, 0, 0, CL_DISP_MSGID_NONE, &context); + CL_PLOCK_RELEASE(sm->p_lock); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5482: " + "Request for NodeInfo failed\n"); + break; + + case IB_NODE_TYPE_SWITCH: + /* Need to go over all the ports of the switch, and send a node_info + * from them. This doesn't include the port 0 of the switch, which + * hosts the SM. + * Note: We'll send another switchInfo on port 0, since if no ports + * are connected, we still want to get some response, and have the + * subnet come up. + */ + num_ports = osm_node_get_num_physp(p_node); + for (port_num = 0; port_num < num_ports; port_num++) { + /* go through the port only if the port is not DOWN */ + p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_ext_physp || ib_port_info_get_port_state + (&p_ext_physp->port_info) <= IB_LINK_DOWN) + continue; + + memset(&context, 0, sizeof(context)); + context.ni_context.node_guid = + osm_node_get_node_guid(p_node); + context.ni_context.port_num = port_num; + + path_array[1] = port_num; + + osm_dr_path_init(&hop_1_path, 1, path_array); + CL_PLOCK_ACQUIRE(sm->p_lock); + status = osm_req_get(sm, &hop_1_path, + IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0, + 0, CL_DISP_MSGID_NONE, &context); + CL_PLOCK_RELEASE(sm->p_lock); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5484: " + "Request for NodeInfo failed\n"); + } + break; + + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 5483: Unknown node type %d\n", + osm_node_get_type(p_node)); + } + + return status; +} + +static unsigned is_sm_port_down(osm_sm_t * sm) +{ + ib_net64_t port_guid; + osm_port_t *p_port; + + port_guid = sm->p_subn->sm_port_guid; + if (port_guid == 0) + return 1; + + CL_PLOCK_ACQUIRE(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5485: " + "SM port with GUID:%016" PRIx64 " is unknown\n", + cl_ntoh64(port_guid)); + return 1; + } + CL_PLOCK_RELEASE(sm->p_lock); + + if (p_port->p_node->sw && + !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info)) + return 0; /* base SP0 */ + + return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN; +} + +static int sweep_hop_0(osm_sm_t * sm) +{ + ib_api_status_t status; + osm_dr_path_t dr_path; + osm_bind_handle_t h_bind; + uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; + + memset(path_array, 0, sizeof(path_array)); + + h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); + if (h_bind == OSM_BIND_INVALID_HANDLE) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports\n"); + return -1; + } + + osm_dr_path_init(&dr_path, 0, path_array); + CL_PLOCK_ACQUIRE(sm->p_lock); + status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, + TRUE, 0, 0, CL_DISP_MSGID_NONE, NULL); + CL_PLOCK_RELEASE(sm->p_lock); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 5486: Request for NodeInfo failed\n"); + + return status; +} + +static void reset_node_count(cl_map_item_t * p_map_item, void *cxt) +{ + osm_node_t *p_node = (osm_node_t *) p_map_item; + p_node->discovery_count = 0; + + memset(p_node->physp_discovered, 0, + sizeof(uint8_t) * p_node->physp_tbl_size); +} + +static void reset_port_count(cl_map_item_t * p_map_item, void *cxt) +{ + osm_port_t *p_port = (osm_port_t *) p_map_item; + p_port->discovery_count = 0; +} + +static void reset_switch_count(cl_map_item_t * p_map_item, void *cxt) +{ + osm_switch_t *p_sw = (osm_switch_t *) p_map_item; + p_sw->need_update = 0; +} + +static int perfmgr_discovery(osm_opensm_t * osm) +{ + int ret; + + CL_PLOCK_ACQUIRE(&osm->lock); + cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL); + cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL); + cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL); + CL_PLOCK_RELEASE(&osm->lock); + + osm->subn.in_sweep_hop_0 = TRUE; + + ret = sweep_hop_0(&osm->sm); + if (ret) + goto _exit; + + if (wait_for_pending_transactions(&osm->stats)) + goto _exit; + + if (is_sm_port_down(&osm->sm)) { + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n"); + goto _drop; + } + + osm->subn.in_sweep_hop_0 = FALSE; + + ret = sweep_hop_1(&osm->sm); + if (ret) + goto _exit; + + if (wait_for_pending_transactions(&osm->stats)) + goto _exit; + +_drop: + osm_drop_mgr_process(&osm->sm); + +_exit: + return ret; +} + +/********************************************************************** + * Main PerfMgr processor - query the performance counters + **********************************************************************/ +void osm_perfmgr_process(osm_perfmgr_t * pm) +{ +#ifdef ENABLE_OSM_PERF_MGR_PROFILE + struct timeval before, after; +#endif + + if (pm->state != PERFMGR_STATE_ENABLED) + return; + + cl_spinlock_acquire(&pm->lock); + if (pm->sweep_state == PERFMGR_SWEEP_ACTIVE || + pm->sweep_state == PERFMGR_SWEEP_SUSPENDED || + pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) { + cl_spinlock_release(&pm->lock); + OSM_LOG(pm->log, OSM_LOG_INFO, + "PM sweep state %d, skipping sweep\n", + pm->sweep_state); + return; + } + + pm->sweep_state = PERFMGR_SWEEP_ACTIVE; + cl_spinlock_release(&pm->lock); + + if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY || + pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) + perfmgr_discovery(pm->subn->p_osm); + + /* if redirection enabled, determine local port */ + if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) { + osm_node_t *p_node; + osm_port_t *p_port; + + CL_PLOCK_ACQUIRE(pm->sm->p_lock); + p_port = osm_get_port_by_guid(pm->subn, pm->port_guid); + if (p_port) { + p_node = p_port->p_node; + CL_ASSERT(p_node); + pm->local_port = + ib_node_info_get_local_port_num(&p_node->node_info); + } else + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5487: No PerfMgr port object for " + "port GUID 0x%" PRIx64 "\n", + cl_ntoh64(pm->port_guid)); + CL_PLOCK_RELEASE(pm->sm->p_lock); + } + +#ifdef ENABLE_OSM_PERF_MGR_PROFILE + gettimeofday(&before, NULL); +#endif + /* With the global lock held, collect the node guids */ + /* FIXME we should be able to track SA notices + * and not have to sweep the node_guid_tbl each pass + */ + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n"); + cl_plock_acquire(&pm->osm->lock); + cl_qmap_apply_func(&pm->subn->node_guid_tbl, collect_guids, pm); + cl_plock_release(&pm->osm->lock); + + /* then for each node query their counters */ + cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm); + + /* clean out any nodes found to be removed during the sweep */ + remove_marked_nodes(pm); + +#ifdef ENABLE_OSM_PERF_MGR_PROFILE + gettimeofday(&after, NULL); + diff_time(&before, &after, &after); + osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, + "PerfMgr total sweep time : %ld.%06ld s\n" + " fastest mad : %g us\n" + " slowest mad : %g us\n" + " average mad : %g us\n", + after.tv_sec, after.tv_usec, perfmgr_mad_stats.fastest_us, + perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us); + clear_mad_stats(); +#endif + + cl_spinlock_acquire(&pm->lock); + pm->sweep_state = PERFMGR_SWEEP_SLEEP; + cl_spinlock_release(&pm->lock); +} + +/********************************************************************** + * PerfMgr timer - loop continuously and signal SM to run PerfMgr + * processor if enabled + **********************************************************************/ +static void perfmgr_sweep(void *arg) +{ + osm_perfmgr_t *pm = arg; + + osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP); + cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); +} + +void osm_perfmgr_shutdown(osm_perfmgr_t * pm) +{ + OSM_LOG_ENTER(pm->log); + cl_timer_stop(&pm->sweep_timer); + cl_disp_unregister(pm->pc_disp_h); + perfmgr_mad_unbind(pm); + OSM_LOG_EXIT(pm->log); +} + +void osm_perfmgr_destroy(osm_perfmgr_t * pm) +{ + OSM_LOG_ENTER(pm->log); + perfmgr_db_destroy(pm->db); + cl_timer_destroy(&pm->sweep_timer); + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Detect if someone else on the network could have cleared the counters + * without us knowing. This is easy to detect because the counters never + * wrap but are "sticky". + * + * The one time this will not work is if the port is getting errors fast + * enough to have the reading overtake the previous reading. In this case, + * counters will be missed. + **********************************************************************/ +static void perfmgr_check_oob_clear(osm_perfmgr_t * pm, + monitored_node_t * mon_node, uint8_t port, + perfmgr_db_err_reading_t * cr) +{ + perfmgr_db_err_reading_t prev_err; + + if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err) + != PERFMGR_EVENT_DB_SUCCESS) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " + "error reading for %s (guid 0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + return; + } + + OSM_LOG(pm->log, OSM_LOG_DEBUG, + "Errors vs previous node %s (0x%" PRIx64 ") port %u\n" + "SE: %"PRIu64" ?< %"PRIu64"\n" + "LE: %"PRIu64" ?< %"PRIu64"\n" + "LD: %"PRIu64" ?< %"PRIu64"\n" + "RE: %"PRIu64" ?< %"PRIu64"\n" + "RPE: %"PRIu64" ?< %"PRIu64"\n" + "SRE: %"PRIu64" ?< %"PRIu64"\n" + "XD: %"PRIu64" ?< %"PRIu64"\n" + "XCE: %"PRIu64" ?< %"PRIu64"\n" + "RCE: %"PRIu64" ?< %"PRIu64"\n" + "LI: %"PRIu64" ?< %"PRIu64"\n" + "BO: %"PRIu64" ?< %"PRIu64"\n" + "VL15: %"PRIu64" ?< %"PRIu64"\n" + "XW: %"PRIu64" ?< %"PRIu64"\n" + , + mon_node->name, mon_node->guid, port, + cr->symbol_err_cnt, prev_err.symbol_err_cnt, + cr->link_err_recover, prev_err.link_err_recover, + cr->link_downed, prev_err.link_downed, + cr->rcv_err, prev_err.rcv_err, + cr->rcv_rem_phys_err, prev_err.rcv_rem_phys_err, + cr->rcv_switch_relay_err, prev_err.rcv_switch_relay_err, + cr->xmit_discards, prev_err.xmit_discards, + cr->xmit_constraint_err, prev_err.xmit_constraint_err, + cr->rcv_constraint_err, prev_err.rcv_constraint_err, + cr->link_integrity, prev_err.link_integrity, + cr->buffer_overrun, prev_err.buffer_overrun, + cr->vl15_dropped, prev_err.vl15_dropped, + cr->xmit_wait, prev_err.xmit_wait); + + if (cr->symbol_err_cnt < prev_err.symbol_err_cnt || + cr->link_err_recover < prev_err.link_err_recover || + cr->link_downed < prev_err.link_downed || + cr->rcv_err < prev_err.rcv_err || + cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err || + cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err || + cr->xmit_discards < prev_err.xmit_discards || + cr->xmit_constraint_err < prev_err.xmit_constraint_err || + cr->rcv_constraint_err < prev_err.rcv_constraint_err || + cr->link_integrity < prev_err.link_integrity || + cr->buffer_overrun < prev_err.buffer_overrun || + cr->vl15_dropped < prev_err.vl15_dropped || + cr->xmit_wait < prev_err.xmit_wait) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540A: " + "Detected an out of band error clear " + "on %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); + } +} + +/********************************************************************** + * Return 1 if the value is "close" to overflowing + * "close" is defined at 25% for now + **********************************************************************/ +static int counter_overflow_4(uint8_t val) +{ + return (val >= 10); +} + +static int counter_overflow_8(uint8_t val) +{ + return (val >= (UINT8_MAX - (UINT8_MAX / 4))); +} + +static int counter_overflow_16(ib_net16_t val) +{ + return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4))); +} + +static int counter_overflow_32(ib_net32_t val) +{ + return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4))); +} + +static int counter_overflow_64(ib_net64_t val) +{ + return (cl_ntoh64(val) >= (UINT64_MAX - (UINT64_MAX / 4))); +} + +/********************************************************************** + * Check if the port counters have overflowed and if so issue a clear + * MAD to the port + **********************************************************************/ +static void perfmgr_check_overflow(osm_perfmgr_t * pm, + monitored_node_t * mon_node, int16_t pkey_ix, + uint8_t port, ib_port_counters_t * pc, + boolean_t xmit_wait_sup) +{ + osm_madw_context_t mad_context; + ib_api_status_t status; + ib_net32_t remote_qp; + uint16_t counter_select; + uint8_t counter_select2; + + OSM_LOG_ENTER(pm->log); + + if (counter_overflow_16(pc->symbol_err_cnt) || + counter_overflow_8(pc->link_err_recover) || + counter_overflow_8(pc->link_downed) || + counter_overflow_16(pc->rcv_err) || + counter_overflow_16(pc->rcv_rem_phys_err) || + counter_overflow_16(pc->rcv_switch_relay_err) || + counter_overflow_16(pc->xmit_discards) || + counter_overflow_8(pc->xmit_constraint_err) || + counter_overflow_8(pc->rcv_constraint_err) || + counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) || + counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) || + counter_overflow_16(pc->vl15_dropped) || + (xmit_wait_sup && counter_overflow_32(pc->xmit_wait)) || + (!pce_supported(mon_node, port) && + (counter_overflow_32(pc->xmit_data) || + counter_overflow_32(pc->rcv_data) || + counter_overflow_32(pc->xmit_pkts) || + counter_overflow_32(pc->rcv_pkts)))) { + osm_node_t *p_node = NULL; + ib_net16_t lid = 0; + + if (!mon_node->port[port].valid) + goto Exit; + + osm_log_v2(pm->log, OSM_LOG_VERBOSE, FILE_ID, + "PerfMgr: Counter overflow: %s (0x%" PRIx64 + ") port %d; clearing counters\n", + mon_node->name, mon_node->guid, port); + + cl_plock_acquire(&pm->osm->lock); + p_node = + osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); + lid = get_lid(p_node, port, mon_node); + cl_plock_release(&pm->osm->lock); + if (lid == 0) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540C: " + "Failed to clear counters for %s (0x%" + PRIx64 ") port %d; failed to get lid\n", + mon_node->name, mon_node->guid, port); + goto Exit; + } + + remote_qp = get_qp(NULL, port); + + mad_context.perfmgr_context.node_guid = mon_node->guid; + mad_context.perfmgr_context.port = port; + mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; + + /* apparently some HW uses the same counters for the 32 and 64 + * bit versions and a clear of them in the PortCounters + * attribute also clears the ExtendedPortCounters equivalant + * counters + */ + if (pce_supported(mon_node, port)) + counter_select = 0x0fff; + else + counter_select = 0xffff; + + if (xmit_wait_sup) + counter_select2 = 1; + else + counter_select2 = 0; + + status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, + port, IB_MAD_METHOD_SET, + counter_select, + counter_select2, + &mad_context, + 0); /* FIXME SL != 0 */ + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " + "Failed to send clear counters MAD for %s (0x%" + PRIx64 ") port %d\n", + mon_node->name, mon_node->guid, port); + + perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); + if (!pce_supported(mon_node, port)) + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); + } + +Exit: + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Check if the port counters have overflowed and if so issue a clear + * MAD to the port + **********************************************************************/ +static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, + monitored_node_t * mon_node, + int16_t pkey_ix, + uint8_t port, + ib_port_counters_ext_t * pc) +{ + osm_madw_context_t mad_context; + ib_api_status_t status; + ib_net32_t remote_qp; + + OSM_LOG_ENTER(pm->log); + + if (counter_overflow_64(pc->xmit_data) || + counter_overflow_64(pc->rcv_data) || + counter_overflow_64(pc->xmit_pkts) || + counter_overflow_64(pc->rcv_pkts) || + (ietf_supported(mon_node, port) && + (counter_overflow_64(pc->unicast_xmit_pkts) || + counter_overflow_64(pc->unicast_rcv_pkts) || + counter_overflow_64(pc->multicast_xmit_pkts) || + counter_overflow_64(pc->multicast_rcv_pkts)))) { + osm_node_t *p_node = NULL; + ib_net16_t lid = 0; + + if (!mon_node->port[port].valid) + goto Exit; + + osm_log(pm->log, OSM_LOG_VERBOSE, + "PerfMgr: PortCountersExtended overflow: %s (0x%" + PRIx64 ") port %d; clearing counters\n", + mon_node->name, mon_node->guid, port); + + cl_plock_acquire(&pm->osm->lock); + p_node = + osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); + lid = get_lid(p_node, port, mon_node); + cl_plock_release(&pm->osm->lock); + if (lid == 0) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5418: " + "Failed to clear counters for %s (0x%" + PRIx64 ") port %d; failed to get lid\n", + mon_node->name, mon_node->guid, port); + goto Exit; + } + + remote_qp = get_qp(NULL, port); + + mad_context.perfmgr_context.node_guid = mon_node->guid; + mad_context.perfmgr_context.port = port; + mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; + /* clear port counters */ + status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix, + port, IB_MAD_METHOD_SET, + &mad_context, + 0); /* FIXME SL != 0 */ + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: " + "Failed to send clear counters MAD for %s (0x%" + PRIx64 ") port %d\n", + mon_node->name, mon_node->guid, port); + + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); + } + +Exit: + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Check values for logging of errors + **********************************************************************/ +static void perfmgr_log_errors(osm_perfmgr_t * pm, + monitored_node_t * mon_node, uint8_t port, + perfmgr_db_err_reading_t * reading) +{ + perfmgr_db_err_reading_t prev_read; + perfmgr_db_err_t err = + perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read); + uint64_t cur, prev; + + if (err != PERFMGR_EVENT_DB_SUCCESS) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " + "reading for %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + return; + } + +#define LOG_ERR_CNT(errname, errnum, counter_name) \ + if (reading->counter_name > prev_read.counter_name) { \ + if (mon_node->port[port].remote_valid == TRUE) \ + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \ + "%s : %" PRIu64 " : node " \ + "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u " \ + "connected to \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \ + errnum, errname, \ + reading->counter_name - prev_read.counter_name, \ + mon_node->name, mon_node->guid, port, \ + mon_node->port[port].remote_name, \ + mon_node->port[port].remote_guid, \ + mon_node->port[port].remote_port); \ + else \ + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \ + "%s : %" PRIu64 " : node " \ + "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \ + errnum, errname, \ + reading->counter_name - prev_read.counter_name, \ + mon_node->name, mon_node->guid, port); \ + } + + LOG_ERR_CNT("SymbolErrorCounter", "5431", symbol_err_cnt); + LOG_ERR_CNT("LinkErrorRecoveryCounter", "5432", link_err_recover); + LOG_ERR_CNT("LinkDownedCounter", "5433", link_downed); + LOG_ERR_CNT("PortRcvErrors", "5434", rcv_err); + LOG_ERR_CNT("PortRcvRemotePhysicalErrors", "5435", rcv_rem_phys_err); + LOG_ERR_CNT("PortRcvSwitchRelayErrors", "5436", rcv_switch_relay_err); + LOG_ERR_CNT("PortXmitDiscards", "5437", xmit_discards); + LOG_ERR_CNT("PortXmitConstraintErrors", "5438", xmit_constraint_err); + LOG_ERR_CNT("PortRcvConstraintErrors", "5439", rcv_constraint_err); + LOG_ERR_CNT("LocalLinkIntegrityErrors", "543A", link_integrity); + LOG_ERR_CNT("ExcessiveBufferOverrunErrors", "543B", buffer_overrun); + LOG_ERR_CNT("VL15Dropped", "543C", vl15_dropped); + + cur = reading->xmit_wait; + prev = prev_read.xmit_wait; + if (pm->xmit_wait_log && cur > prev && + (cur - prev) >= pm->xmit_wait_threshold) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 543D: XmitWait : %" PRIu64 + " : node \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", + cur - prev, mon_node->name, mon_node->guid, port); + } +} + +static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey) +{ + int16_t pkey_ix = -1; + osm_port_t *p_port; + osm_pkey_tbl_t *p_pkey_tbl; + ib_net16_t *p_orig_pkey; + uint16_t block; + uint8_t index; + + OSM_LOG_ENTER(pm->log); + + CL_PLOCK_ACQUIRE(pm->sm->p_lock); + p_port = osm_get_port_by_guid(pm->subn, pm->port_guid); + if (!p_port) { + CL_PLOCK_RELEASE(pm->sm->p_lock); + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 541E: No PerfMgr port object\n"); + goto Exit; + } + if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) { + p_pkey_tbl = &p_port->p_physp->pkeys; + if (!p_pkey_tbl) { + CL_PLOCK_RELEASE(pm->sm->p_lock); + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "No PKey table found for PerfMgr port\n"); + goto Exit; + } + p_orig_pkey = cl_map_get(&p_pkey_tbl->keys, + ib_pkey_get_base(pkey)); + if (!p_orig_pkey) { + CL_PLOCK_RELEASE(pm->sm->p_lock); + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "PKey 0x%x not found for PerfMgr port\n", + cl_ntoh16(pkey)); + goto Exit; + } + if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey, + &block, &index) == IB_SUCCESS) { + CL_PLOCK_RELEASE(pm->sm->p_lock); + pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index; + } else { + CL_PLOCK_RELEASE(pm->sm->p_lock); + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 541F: Failed to obtain P_Key 0x%04x " + "block and index for PerfMgr port\n", + cl_ntoh16(pkey)); + } + } else { + CL_PLOCK_RELEASE(pm->sm->p_lock); + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5420: Local PerfMgt port physp invalid\n"); + } + +Exit: + OSM_LOG_EXIT(pm->log); + return pkey_ix; +} + +static boolean_t handle_redirect(osm_perfmgr_t *pm, + ib_class_port_info_t *cpi, + monitored_node_t *p_mon_node, + uint8_t port, + osm_madw_context_t *mad_context) +{ + char gid_str[INET6_ADDRSTRLEN]; + ib_api_status_t status; + boolean_t valid = TRUE; + int16_t pkey_ix = 0; + uint8_t mad_method; + + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Redirection to LID %u GID %s QP 0x%x received\n", + cl_ntoh16(cpi->redir_lid), + inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str, + sizeof gid_str), cl_ntoh32(cpi->redir_qp)); + + if (!pm->subn->opt.perfmgr_redir) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Redirection requested but disabled\n"); + valid = FALSE; + } + + /* valid redirection ? */ + if (cpi->redir_lid == 0) { + if (!ib_gid_is_notzero(&cpi->redir_gid)) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Invalid redirection " + "(both redirect LID and GID are zero)\n"); + valid = FALSE; + } + } + if (cpi->redir_qp == 0) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n"); + valid = FALSE; + } + if (cpi->redir_pkey == 0) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n"); + valid = FALSE; + } + if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n"); + valid = FALSE; + } + + pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey); + if (pkey_ix == -1) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Index for Pkey 0x%x not found\n", + cl_ntoh16(cpi->redir_pkey)); + valid = FALSE; + } + + if (cpi->redir_lid == 0) { + /* GID redirection: get PathRecord information */ + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "GID redirection not currently supported\n"); + goto Exit; + } + + if (!valid) + goto Exit; + + /* LID redirection support (easier than GID redirection) */ + cl_plock_acquire(&pm->osm->lock); + p_mon_node->port[port].redirection = TRUE; + p_mon_node->port[port].valid = valid; + memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid, + sizeof(ib_gid_t)); + p_mon_node->port[port].lid = cpi->redir_lid; + p_mon_node->port[port].qp = cpi->redir_qp; + p_mon_node->port[port].pkey = cpi->redir_pkey; + if (pkey_ix != -1) + p_mon_node->port[port].pkey_ix = pkey_ix; + cl_plock_release(&pm->osm->lock); + + /* either */ + if (pm->query_cpi) + { + /* issue a CPI query to the redirected location */ + mad_method = IB_MAD_METHOD_GET; + p_mon_node->port[port].cpi_valid = FALSE; + status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, + cpi->redir_qp, pkey_ix, + port, mad_context, + 0); /* FIXME SL != 0 */ + } else { + /* reissue the original query to the redirected location */ + uint8_t counter_select2; + + if (xmit_wait_supported(p_mon_node, port)) + counter_select2 = 1; + else + counter_select2 = 0; + + mad_method = mad_context->perfmgr_context.mad_method; + if (mad_context->perfmgr_context.mad_attr_id + == IB_MAD_ATTR_PORT_CNTRS) { + status = perfmgr_send_pc_mad(pm, cpi->redir_lid, + cpi->redir_qp, + pkey_ix, port, + mad_method, + 0xffff, + counter_select2, + mad_context, + 0); /* FIXME SL != 0 */ + } else { + status = perfmgr_send_pce_mad(pm, cpi->redir_lid, + cpi->redir_qp, + pkey_ix, port, + mad_method, + mad_context, + 0); /* FIXME SL != 0 */ + } + } + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: " + "Failed to send redirected MAD " + "with method 0x%x for node %s " + "(NodeGuid 0x%" PRIx64 ") port %d\n", + mad_method, p_mon_node->name, p_mon_node->guid, port); +Exit: + return (valid); +} + +/********************************************************************** + * Detect if someone else on the network could have cleared the counters + * without us knowing. This is easy to detect because the counters never + * wrap but are "sticky" PortCountersExtended version. + * + * The one time this will not work is if the port is getting errors fast + * enough to have the reading overtake the previous reading. In this case, + * counters will be missed. + **********************************************************************/ +static void perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm, + monitored_node_t * mon_node, + uint8_t port, + perfmgr_db_data_cnt_reading_t * dc) +{ + perfmgr_db_data_cnt_reading_t prev_dc; + + if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) + != PERFMGR_EVENT_DB_SUCCESS) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Failed to find previous data count " + "reading for %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + return; + } + + OSM_LOG(pm->log, OSM_LOG_DEBUG, + "Data vs previous node %s (0x%" PRIx64 ") port %u\n" + "TX: %"PRIu64" ?< %"PRIu64"\n" + "RX: %"PRIu64" ?< %"PRIu64"\n" + "TXP: %"PRIu64" ?< %"PRIu64"\n" + "RXP: %"PRIu64" ?< %"PRIu64"\n" + "UTXP: %"PRIu64" ?< %"PRIu64"\n" + "URXP: %"PRIu64" ?< %"PRIu64"\n" + "MTXP: %"PRIu64" ?< %"PRIu64"\n" + "MRXP: %"PRIu64" ?< %"PRIu64"\n" + , + mon_node->name, mon_node->guid, port, + dc->xmit_data, prev_dc.xmit_data, + dc->rcv_data, prev_dc.rcv_data, + dc->xmit_pkts, prev_dc.xmit_pkts, + dc->rcv_pkts, prev_dc.rcv_pkts, + dc->unicast_xmit_pkts, prev_dc.unicast_xmit_pkts, + dc->unicast_rcv_pkts, prev_dc.unicast_rcv_pkts, + dc->multicast_xmit_pkts, prev_dc.multicast_xmit_pkts, + dc->multicast_rcv_pkts, prev_dc.multicast_rcv_pkts); + + if (dc->xmit_data < prev_dc.xmit_data || + dc->rcv_data < prev_dc.rcv_data || + dc->xmit_pkts < prev_dc.xmit_pkts || + dc->rcv_pkts < prev_dc.rcv_pkts || + (ietf_supported(mon_node, port) && + (dc->unicast_xmit_pkts < prev_dc.unicast_xmit_pkts || + dc->unicast_rcv_pkts < prev_dc.unicast_rcv_pkts || + dc->multicast_xmit_pkts < prev_dc.multicast_xmit_pkts || + dc->multicast_rcv_pkts < prev_dc.multicast_rcv_pkts))) { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "PerfMgr: ERR 540B: Detected an out of band data counter " + "clear on node %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); + } +} + +/********************************************************************** + * The dispatcher uses a thread pool which will call this function when + * there is a thread available to process the mad received on the wire + **********************************************************************/ +static void pc_recv_process(void *context, void *data) +{ + osm_perfmgr_t *pm = context; + osm_madw_t *p_madw = data; + osm_madw_context_t *mad_context = &p_madw->context; + ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); + uint64_t node_guid = mad_context->perfmgr_context.node_guid; + uint8_t port = mad_context->perfmgr_context.port; + perfmgr_db_err_reading_t err_reading; + perfmgr_db_data_cnt_reading_t data_reading; + cl_map_item_t *p_node; + monitored_node_t *p_mon_node; + ib_class_port_info_t *cpi = NULL; + + OSM_LOG_ENTER(pm->log); + + /* + * get the monitored node struct to have the printable name + * for log messages + */ + if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) == + cl_qmap_end(&pm->monitored_map)) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5412: GUID 0x%016" + PRIx64 " not found in monitored map\n", node_guid); + goto Exit; + } + p_mon_node = (monitored_node_t *) p_node; + + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Processing received MAD status 0x%x context 0x%" + PRIx64 " port %u\n", cl_ntoh16(p_mad->status), node_guid, port); + + CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || + p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT || + p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); + + cl_plock_acquire(&pm->osm->lock); + /* validate port number */ + if (port >= p_mon_node->num_ports) { + cl_plock_release(&pm->osm->lock); + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " + "Invalid port num %d for GUID 0x%016" + PRIx64 " num ports %d\n", port, node_guid, + p_mon_node->num_ports); + goto Exit; + } + cl_plock_release(&pm->osm->lock); + + /* capture CLASS_PORT_INFO data */ + if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { + boolean_t cpi_valid = TRUE; + + cpi = (ib_class_port_info_t *) & + (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); + + /* Response could be redirection (IBM eHCA PMA does this) */ + if (p_mad->status & IB_MAD_STATUS_REDIRECT) + cpi_valid = handle_redirect(pm, cpi, p_mon_node, port, + mad_context); + + if (pm->query_cpi && cpi_valid) { + cl_plock_acquire(&pm->osm->lock); + if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { + unsigned i; + for (i = p_mon_node->esp0 ? 0 : 1; + i < p_mon_node->num_ports; + i++) { + p_mon_node->port[i].cap_mask = cpi->cap_mask; + p_mon_node->port[i].cpi_valid = cpi_valid; + } + } else { + p_mon_node->port[port].cap_mask = cpi->cap_mask; + p_mon_node->port[port].cpi_valid = cpi_valid; + } + cl_plock_release(&pm->osm->lock); + } + goto Exit; + } + + if (p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT) { + ib_port_counters_ext_t *ext_wire_read = + (ib_port_counters_ext_t *) + &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; + + /* convert wire data to perfmgr data counter reading */ + perfmgr_db_fill_data_cnt_read_pce(ext_wire_read, &data_reading, + ietf_supported(p_mon_node, + port)); + + /* add counter */ + if (mad_context->perfmgr_context.mad_method + == IB_MAD_METHOD_GET) { + /* detect an out of band clear on the port */ + perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, + &data_reading); + + perfmgr_db_add_dc_reading(pm->db, node_guid, port, + &data_reading, + ietf_supported(p_mon_node, + port)); + } else { + perfmgr_db_clear_prev_dc(pm->db, node_guid, port); + } + + perfmgr_check_pce_overflow(pm, p_mon_node, + p_mon_node->port[port].pkey_ix, + port, ext_wire_read); + } else { + boolean_t pce_sup = pce_supported(p_mon_node, port); + boolean_t xmit_wait_sup = xmit_wait_supported(p_mon_node, port); + ib_port_counters_t *wire_read = + (ib_port_counters_t *) + &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; + + perfmgr_db_fill_err_read(wire_read, &err_reading, xmit_wait_sup); + if (!pce_sup) + perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading); + + if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { + /* detect an out of band clear on the port */ + perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading); + if (!pce_sup) + perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, + &data_reading); + + /* log errors from this reading */ + if (pm->subn->opt.perfmgr_log_errors) + perfmgr_log_errors(pm, p_mon_node, port, &err_reading); + + perfmgr_db_add_err_reading(pm->db, node_guid, port, + &err_reading); + if (!pce_sup) + perfmgr_db_add_dc_reading(pm->db, node_guid, port, + &data_reading, 0); + } else { + perfmgr_db_clear_prev_err(pm->db, node_guid, port); + if (!pce_sup) + perfmgr_db_clear_prev_dc(pm->db, node_guid, port); + } + + perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix, + port, wire_read, xmit_wait_sup); + + } + +#ifdef ENABLE_OSM_PERF_MGR_PROFILE + do { + struct timeval proc_time; + gettimeofday(&proc_time, NULL); + diff_time(&p_madw->context.perfmgr_context.query_start, + &proc_time, &proc_time); + update_mad_stats(&proc_time); + } while (0); +#endif + +Exit: + osm_mad_pool_put(pm->mad_pool, p_madw); + + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Initialize the PerfMgr object + **********************************************************************/ +ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm, + const osm_subn_opt_t * p_opt) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(&osm->log); + + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); + + memset(pm, 0, sizeof(*pm)); + + pm->subn = &osm->subn; + pm->sm = &osm->sm; + pm->log = &osm->log; + pm->mad_pool = &osm->mad_pool; + pm->vendor = osm->p_vendor; + pm->trans_id = PERFMGR_INITIAL_TID_VALUE; + pm->state = + p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE; + pm->sweep_state = PERFMGR_SWEEP_SLEEP; + cl_spinlock_init(&pm->lock); + pm->sweep_time_s = p_opt->perfmgr_sweep_time_s; + pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries; + pm->ignore_cas = p_opt->perfmgr_ignore_cas; + pm->osm = osm; + pm->local_port = -1; + + status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm); + if (status != IB_SUCCESS) + goto Exit; + + status = IB_INSUFFICIENT_RESOURCES; + pm->db = perfmgr_db_construct(pm); + if (!pm->db) { + pm->state = PERFMGR_STATE_NO_DB; + goto Exit; + } + + pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS, + pc_recv_process, pm); + if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE) { + perfmgr_db_destroy(pm->db); + goto Exit; + } + + init_monitored_nodes(pm); + + if (pm->state == PERFMGR_STATE_ENABLED) + cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); + + pm->rm_nodes = p_opt->perfmgr_rm_nodes; + pm->query_cpi = p_opt->perfmgr_query_cpi; + pm->xmit_wait_log = p_opt->perfmgr_xmit_wait_log; + pm->xmit_wait_threshold = p_opt->perfmgr_xmit_wait_threshold; + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(pm->log); + return status; +} + +/********************************************************************** + * Clear the counters from the db + **********************************************************************/ +void osm_perfmgr_clear_counters(osm_perfmgr_t * pm) +{ + /** + * FIXME todo issue clear on the fabric? + */ + perfmgr_db_clear_counters(pm->db); + osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, "PerfMgr counters cleared\n"); +} + +/******************************************************************* + * Dump the DB information to the file specified + *******************************************************************/ +void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type) +{ + char path[256]; + char *file_name; + if (pm->subn->opt.event_db_dump_file) + file_name = pm->subn->opt.event_db_dump_file; + else { + snprintf(path, sizeof(path), "%s/%s", + pm->subn->opt.dump_files_dir, + OSM_PERFMGR_DEFAULT_DUMP_FILE); + file_name = path; + } + if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0) + OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s", + file_name, strerror(errno)); +} + +/******************************************************************* + * Print the DB information to the fp specified + *******************************************************************/ +void osm_perfmgr_print_counters(osm_perfmgr_t * pm, char *nodename, FILE * fp, + char *port, int err_only) +{ + if (nodename) { + char *end = NULL; + uint64_t guid = strtoull(nodename, &end, 0); + if (nodename + strlen(nodename) != end) + perfmgr_db_print_by_name(pm->db, nodename, fp, port, + err_only); + else + perfmgr_db_print_by_guid(pm->db, guid, fp, port, + err_only); + } else + perfmgr_db_print_all(pm->db, fp, err_only); +} + +void osm_perfmgr_update_nodename(osm_perfmgr_t *pm, uint64_t node_guid, + char *nodename) +{ + if (pm->db) + perfmgr_db_update_name(pm->db, node_guid, nodename); +} +#endif /* ENABLE_OSM_PERF_MGR */ diff --git a/opensm/osm_perfmgr_db.c b/opensm/osm_perfmgr_db.c new file mode 100644 index 0000000..e57b804 --- /dev/null +++ b/opensm/osm_perfmgr_db.c @@ -0,0 +1,1098 @@ +/* + * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 The Regents of the University of California. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#ifdef ENABLE_OSM_PERF_MGR + +#include +#include +#include +#include +#include + +#include +#define FILE_ID OSM_FILE_PERFMGR_DB_C +#include +#include +#include + +static void free_node(db_node_t * node); + +/** ========================================================================= + */ +perfmgr_db_t *perfmgr_db_construct(osm_perfmgr_t *perfmgr) +{ + perfmgr_db_t *db = malloc(sizeof(*db)); + if (!db) + return NULL; + + cl_qmap_init(&db->pc_data); + cl_plock_construct(&db->lock); + cl_plock_init(&db->lock); + db->perfmgr = perfmgr; + return db; +} + +/** ========================================================================= + */ +void perfmgr_db_destroy(perfmgr_db_t * db) +{ + cl_map_item_t *item, *next_item; + + if (db) { + item = cl_qmap_head(&db->pc_data); + while (item != cl_qmap_end(&db->pc_data)) { + next_item = cl_qmap_next(item); + free_node((db_node_t *)item); + item = next_item; + } + cl_plock_destroy(&db->lock); + free(db); + } +} + +/********************************************************************** + * Internal call db->lock should be held when calling + **********************************************************************/ +static inline db_node_t *get(perfmgr_db_t * db, uint64_t guid) +{ + cl_map_item_t *rc = cl_qmap_get(&db->pc_data, guid); + const cl_map_item_t *end = cl_qmap_end(&db->pc_data); + + if (rc == end) + return NULL; + return (db_node_t *) rc; +} + +static inline perfmgr_db_err_t bad_node_port(db_node_t * node, uint8_t port) +{ + if (!node) + return PERFMGR_EVENT_DB_GUIDNOTFOUND; + if (port >= node->num_ports || (!node->esp0 && port == 0)) + return PERFMGR_EVENT_DB_PORTNOTFOUND; + + return PERFMGR_EVENT_DB_SUCCESS; +} + +static inline void mark_port_valid(db_node_t * node, uint8_t port) +{ + node->ports[port].valid = TRUE; +} + +/** ========================================================================= + */ +static db_node_t *malloc_node(uint64_t guid, boolean_t esp0, + uint8_t num_ports, char *name) +{ + int i = 0; + time_t cur_time = 0; + db_node_t *rc = malloc(sizeof(*rc)); + if (!rc) + return NULL; + + rc->ports = calloc(num_ports, sizeof(db_port_t)); + if (!rc->ports) + goto free_rc; + rc->num_ports = num_ports; + rc->node_guid = guid; + rc->esp0 = esp0; + + cur_time = time(NULL); + for (i = 0; i < num_ports; i++) { + rc->ports[i].last_reset = cur_time; + rc->ports[i].err_previous.time = cur_time; + rc->ports[i].dc_previous.time = cur_time; + rc->ports[i].valid = FALSE; + } + snprintf(rc->node_name, sizeof(rc->node_name), "%s", name); + rc->active = FALSE; + + return rc; + +free_rc: + free(rc); + return NULL; +} + +/** ========================================================================= + */ +static void free_node(db_node_t * node) +{ + if (!node) + return; + if (node->ports) + free(node->ports); + free(node); +} + +/* insert nodes to the database */ +static perfmgr_db_err_t insert(perfmgr_db_t * db, db_node_t * node) +{ + cl_map_item_t *rc = cl_qmap_insert(&db->pc_data, node->node_guid, + (cl_map_item_t *) node); + + if ((void *)rc != (void *)node) + return PERFMGR_EVENT_DB_FAIL; + return PERFMGR_EVENT_DB_SUCCESS; +} + +perfmgr_db_err_t +perfmgr_db_create_entry(perfmgr_db_t * db, uint64_t guid, boolean_t esp0, + uint8_t num_ports, char *name) +{ + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + + cl_plock_excl_acquire(&db->lock); + if (!get(db, guid)) { + db_node_t *pc_node = malloc_node(guid, esp0, num_ports, + name); + if (!pc_node) { + rc = PERFMGR_EVENT_DB_NOMEM; + goto Exit; + } + if (insert(db, pc_node)) { + free_node(pc_node); + rc = PERFMGR_EVENT_DB_FAIL; + goto Exit; + } + } +Exit: + cl_plock_release(&db->lock); + return rc; +} + +perfmgr_db_err_t +perfmgr_db_update_name(perfmgr_db_t * db, uint64_t node_guid, char *name) +{ + db_node_t *node = NULL; + + cl_plock_excl_acquire(&db->lock); + node = get(db, node_guid); + if (node) + snprintf(node->node_name, sizeof(node->node_name), "%s", name); + cl_plock_release(&db->lock); + return (PERFMGR_EVENT_DB_SUCCESS); +} + +perfmgr_db_err_t +perfmgr_db_delete_entry(perfmgr_db_t * db, uint64_t guid) +{ + cl_map_item_t * rc = cl_qmap_remove(&db->pc_data, guid); + + if (rc == cl_qmap_end(&db->pc_data)) + return(PERFMGR_EVENT_DB_GUIDNOTFOUND); + + db_node_t *pc_node = (db_node_t *)rc; + free_node(pc_node); + return(PERFMGR_EVENT_DB_SUCCESS); +} + +perfmgr_db_err_t +perfmgr_db_delete_inactive(perfmgr_db_t * db, unsigned *cnt) +{ + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + int i = 0; + int num = 0; + uint64_t * guid_list = NULL; + cl_map_item_t * p_map_item = cl_qmap_head(&db->pc_data); + + if (p_map_item == cl_qmap_end(&db->pc_data)) { + rc = PERFMGR_EVENT_DB_SUCCESS; + goto Done; + } + + while (p_map_item != cl_qmap_end(&db->pc_data)) { + db_node_t *n = (db_node_t *)p_map_item; + if (n->active == FALSE) { + guid_list = realloc(guid_list, + sizeof(*guid_list) * (num+1)); + if (!guid_list) { + num = 0; + rc = PERFMGR_EVENT_DB_NOMEM; + goto Done; + } + guid_list[num] = n->node_guid; + num++; + } + p_map_item = cl_qmap_next(p_map_item); + } + + for (i = 0 ; i < num; i++) + perfmgr_db_delete_entry(db, guid_list[i]); + + free(guid_list); + +Done: + if (cnt) + *cnt = num; + + return(rc); +} + +perfmgr_db_err_t +perfmgr_db_mark_active(perfmgr_db_t *db, uint64_t guid, boolean_t active) +{ + db_node_t *node = NULL; + + cl_plock_excl_acquire(&db->lock); + node = get(db, guid); + if (node) + node->active = active; + cl_plock_release(&db->lock); + return (PERFMGR_EVENT_DB_SUCCESS); +} + + +/********************************************************************** + * Dump a reading vs the previous reading to stdout + **********************************************************************/ +static inline void +debug_dump_err_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port_num, + db_port_t * port, perfmgr_db_err_reading_t * cur) +{ + osm_log_t *log = db->perfmgr->log; + + if (!OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_DEBUG)) + return; /* optimize this a bit */ + + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "GUID 0x%" PRIx64 " Port %u:\n", guid, port_num); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "sym %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->symbol_err_cnt, port->err_previous.symbol_err_cnt, + port->err_total.symbol_err_cnt); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "ler %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->link_err_recover, port->err_previous.link_err_recover, + port->err_total.link_err_recover); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "ld %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->link_downed, port->err_previous.link_downed, + port->err_total.link_downed); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "re %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_err, + port->err_previous.rcv_err, port->err_total.rcv_err); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "rrp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->rcv_rem_phys_err, port->err_previous.rcv_rem_phys_err, + port->err_total.rcv_rem_phys_err); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "rsr %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->rcv_switch_relay_err, + port->err_previous.rcv_switch_relay_err, + port->err_total.rcv_switch_relay_err); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "xd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->xmit_discards, port->err_previous.xmit_discards, + port->err_total.xmit_discards); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "xce %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->xmit_constraint_err, + port->err_previous.xmit_constraint_err, + port->err_total.xmit_constraint_err); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "rce %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->rcv_constraint_err, port->err_previous.rcv_constraint_err, + port->err_total.rcv_constraint_err); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "li %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->link_integrity, port->err_previous.link_integrity, + port->err_total.link_integrity); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "bo %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->buffer_overrun, port->err_previous.buffer_overrun, + port->err_total.buffer_overrun); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "vld %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->vl15_dropped, port->err_previous.vl15_dropped, + port->err_total.vl15_dropped); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "xw %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->xmit_wait, port->err_previous.xmit_wait, + port->err_total.xmit_wait); +} + +/********************************************************************** + * perfmgr_db_err_reading_t functions + **********************************************************************/ +perfmgr_db_err_t +perfmgr_db_add_err_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port, + perfmgr_db_err_reading_t * reading) +{ + db_port_t *p_port = NULL; + db_node_t *node = NULL; + perfmgr_db_err_reading_t *previous = NULL; + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + osm_epi_pe_event_t epi_pe_data; + + cl_plock_excl_acquire(&db->lock); + node = get(db, guid); + if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) + goto Exit; + + mark_port_valid(node, port); + + p_port = &(node->ports[port]); + previous = &(node->ports[port].err_previous); + + debug_dump_err_reading(db, guid, port, p_port, reading); + + epi_pe_data.time_diff_s = (reading->time - previous->time); + osm_epi_create_port_id(&epi_pe_data.port_id, guid, port, + node->node_name); + + /* calculate changes from previous reading */ + epi_pe_data.symbol_err_cnt = + (reading->symbol_err_cnt - previous->symbol_err_cnt); + p_port->err_total.symbol_err_cnt += epi_pe_data.symbol_err_cnt; + epi_pe_data.link_err_recover = + (reading->link_err_recover - previous->link_err_recover); + p_port->err_total.link_err_recover += epi_pe_data.link_err_recover; + epi_pe_data.link_downed = + (reading->link_downed - previous->link_downed); + p_port->err_total.link_downed += epi_pe_data.link_downed; + epi_pe_data.rcv_err = (reading->rcv_err - previous->rcv_err); + p_port->err_total.rcv_err += epi_pe_data.rcv_err; + epi_pe_data.rcv_rem_phys_err = + (reading->rcv_rem_phys_err - previous->rcv_rem_phys_err); + p_port->err_total.rcv_rem_phys_err += epi_pe_data.rcv_rem_phys_err; + epi_pe_data.rcv_switch_relay_err = + (reading->rcv_switch_relay_err - previous->rcv_switch_relay_err); + p_port->err_total.rcv_switch_relay_err += + epi_pe_data.rcv_switch_relay_err; + epi_pe_data.xmit_discards = + (reading->xmit_discards - previous->xmit_discards); + p_port->err_total.xmit_discards += epi_pe_data.xmit_discards; + epi_pe_data.xmit_constraint_err = + (reading->xmit_constraint_err - previous->xmit_constraint_err); + p_port->err_total.xmit_constraint_err += + epi_pe_data.xmit_constraint_err; + epi_pe_data.rcv_constraint_err = + (reading->rcv_constraint_err - previous->rcv_constraint_err); + p_port->err_total.rcv_constraint_err += epi_pe_data.rcv_constraint_err; + epi_pe_data.link_integrity = + (reading->link_integrity - previous->link_integrity); + p_port->err_total.link_integrity += epi_pe_data.link_integrity; + epi_pe_data.buffer_overrun = + (reading->buffer_overrun - previous->buffer_overrun); + p_port->err_total.buffer_overrun += epi_pe_data.buffer_overrun; + epi_pe_data.vl15_dropped = + (reading->vl15_dropped - previous->vl15_dropped); + p_port->err_total.vl15_dropped += epi_pe_data.vl15_dropped; + epi_pe_data.xmit_wait = + (reading->xmit_wait - previous->xmit_wait); + p_port->err_total.xmit_wait += epi_pe_data.xmit_wait; + + p_port->err_previous = *reading; + + /* mark the time this total was updated */ + p_port->err_total.time = reading->time; + + osm_opensm_report_event(db->perfmgr->osm, OSM_EVENT_ID_PORT_ERRORS, + &epi_pe_data); + +Exit: + cl_plock_release(&db->lock); + return rc; +} + +perfmgr_db_err_t perfmgr_db_get_prev_err(perfmgr_db_t * db, uint64_t guid, + uint8_t port, + perfmgr_db_err_reading_t * reading) +{ + db_node_t *node = NULL; + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + + cl_plock_acquire(&db->lock); + + node = get(db, guid); + if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) + goto Exit; + + *reading = node->ports[port].err_previous; + +Exit: + cl_plock_release(&db->lock); + return rc; +} + +perfmgr_db_err_t +perfmgr_db_clear_prev_err(perfmgr_db_t * db, uint64_t guid, uint8_t port) +{ + db_node_t *node = NULL; + perfmgr_db_err_reading_t *previous = NULL; + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + + cl_plock_excl_acquire(&db->lock); + node = get(db, guid); + if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) + goto Exit; + + previous = &(node->ports[port].err_previous); + + memset(previous, 0, sizeof(*previous)); + node->ports[port].err_previous.time = time(NULL); + +Exit: + cl_plock_release(&db->lock); + return rc; +} + +static inline void +debug_dump_dc_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port_num, + db_port_t * port, perfmgr_db_data_cnt_reading_t * cur) +{ + osm_log_t *log = db->perfmgr->log; + if (!OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_DEBUG)) + return; + + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "xd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->xmit_data, port->dc_previous.xmit_data, + port->dc_total.xmit_data); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "rd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_data, + port->dc_previous.rcv_data, port->dc_total.rcv_data); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "xp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", + cur->xmit_pkts, port->dc_previous.xmit_pkts, + port->dc_total.xmit_pkts); + osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, + "rp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_pkts, + port->dc_previous.rcv_pkts, port->dc_total.rcv_pkts); +} + +/********************************************************************** + * perfmgr_db_data_cnt_reading_t functions + **********************************************************************/ +perfmgr_db_err_t +perfmgr_db_add_dc_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port, + perfmgr_db_data_cnt_reading_t * reading, + int ietf_sup) +{ + db_port_t *p_port = NULL; + db_node_t *node = NULL; + perfmgr_db_data_cnt_reading_t *previous = NULL; + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + osm_epi_dc_event_t epi_dc_data; + + cl_plock_excl_acquire(&db->lock); + node = get(db, guid); + if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) + goto Exit; + + mark_port_valid(node, port); + + p_port = &node->ports[port]; + previous = &node->ports[port].dc_previous; + + debug_dump_dc_reading(db, guid, port, p_port, reading); + + epi_dc_data.time_diff_s = reading->time - previous->time; + osm_epi_create_port_id(&epi_dc_data.port_id, guid, port, + node->node_name); + + /* calculate changes from previous reading */ + epi_dc_data.xmit_data = reading->xmit_data - previous->xmit_data; + p_port->dc_total.xmit_data += epi_dc_data.xmit_data; + epi_dc_data.rcv_data = reading->rcv_data - previous->rcv_data; + p_port->dc_total.rcv_data += epi_dc_data.rcv_data; + epi_dc_data.xmit_pkts = reading->xmit_pkts - previous->xmit_pkts; + p_port->dc_total.xmit_pkts += epi_dc_data.xmit_pkts; + epi_dc_data.rcv_pkts = reading->rcv_pkts - previous->rcv_pkts; + p_port->dc_total.rcv_pkts += epi_dc_data.rcv_pkts; + + if (ietf_sup) + { + epi_dc_data.unicast_xmit_pkts = + reading->unicast_xmit_pkts - previous->unicast_xmit_pkts; + p_port->dc_total.unicast_xmit_pkts += epi_dc_data.unicast_xmit_pkts; + epi_dc_data.unicast_rcv_pkts = + reading->unicast_rcv_pkts - previous->unicast_rcv_pkts; + p_port->dc_total.unicast_rcv_pkts += epi_dc_data.unicast_rcv_pkts; + epi_dc_data.multicast_xmit_pkts = + reading->multicast_xmit_pkts - previous->multicast_xmit_pkts; + p_port->dc_total.multicast_xmit_pkts += epi_dc_data.multicast_xmit_pkts; + epi_dc_data.multicast_rcv_pkts = + reading->multicast_rcv_pkts - previous->multicast_rcv_pkts; + p_port->dc_total.multicast_rcv_pkts += epi_dc_data.multicast_rcv_pkts; + } + + p_port->dc_previous = *reading; + + /* mark the time this total was updated */ + p_port->dc_total.time = reading->time; + + osm_opensm_report_event(db->perfmgr->osm, + OSM_EVENT_ID_PORT_DATA_COUNTERS, &epi_dc_data); + +Exit: + cl_plock_release(&db->lock); + return rc; +} + +perfmgr_db_err_t perfmgr_db_get_prev_dc(perfmgr_db_t * db, uint64_t guid, + uint8_t port, + perfmgr_db_data_cnt_reading_t * reading) +{ + db_node_t *node = NULL; + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + + cl_plock_acquire(&db->lock); + + node = get(db, guid); + if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) + goto Exit; + + *reading = node->ports[port].dc_previous; + +Exit: + cl_plock_release(&db->lock); + return rc; +} + +perfmgr_db_err_t +perfmgr_db_clear_prev_dc(perfmgr_db_t * db, uint64_t guid, uint8_t port) +{ + db_node_t *node = NULL; + perfmgr_db_data_cnt_reading_t *previous = NULL; + perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; + + cl_plock_excl_acquire(&db->lock); + node = get(db, guid); + if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) + goto Exit; + + previous = &node->ports[port].dc_previous; + + memset(previous, 0, sizeof(*previous)); + node->ports[port].dc_previous.time = time(NULL); + +Exit: + cl_plock_release(&db->lock); + return rc; +} + +static void clear_counters(cl_map_item_t * const p_map_item, void *context) +{ + db_node_t *node = (db_node_t *) p_map_item; + int i = 0; + time_t ts = time(NULL); + + for (i = 0; i < node->num_ports; i++) { + node->ports[i].err_total.symbol_err_cnt = 0; + node->ports[i].err_total.link_err_recover = 0; + node->ports[i].err_total.link_downed = 0; + node->ports[i].err_total.rcv_err = 0; + node->ports[i].err_total.rcv_rem_phys_err = 0; + node->ports[i].err_total.rcv_switch_relay_err = 0; + node->ports[i].err_total.xmit_discards = 0; + node->ports[i].err_total.xmit_constraint_err = 0; + node->ports[i].err_total.rcv_constraint_err = 0; + node->ports[i].err_total.link_integrity = 0; + node->ports[i].err_total.buffer_overrun = 0; + node->ports[i].err_total.vl15_dropped = 0; + node->ports[i].err_total.xmit_wait = 0; + node->ports[i].err_total.time = ts; + + node->ports[i].dc_total.xmit_data = 0; + node->ports[i].dc_total.rcv_data = 0; + node->ports[i].dc_total.xmit_pkts = 0; + node->ports[i].dc_total.rcv_pkts = 0; + node->ports[i].dc_total.unicast_xmit_pkts = 0; + node->ports[i].dc_total.unicast_rcv_pkts = 0; + node->ports[i].dc_total.multicast_xmit_pkts = 0; + node->ports[i].dc_total.multicast_rcv_pkts = 0; + node->ports[i].dc_total.time = ts; + + node->ports[i].last_reset = ts; + } +} + +/********************************************************************** + * Clear all the counters from the db + **********************************************************************/ +void perfmgr_db_clear_counters(perfmgr_db_t * db) +{ + cl_plock_excl_acquire(&db->lock); + cl_qmap_apply_func(&db->pc_data, clear_counters, (void *)db); + cl_plock_release(&db->lock); +#if 0 + if (db->db_impl->clear_counters) + db->db_impl->clear_counters(db->db_data); +#endif +} + +/********************************************************************** + * Output a tab delimited output of the port counters + **********************************************************************/ +static void dump_node_mr(db_node_t * node, FILE * fp) +{ + int i = 0; + + fprintf(fp, "\nName\tGUID\tActive\tPort\tLast Reset\t" + "Last Error Update\tLast Data Update\t" + "%s\t%s\t" + "%s\t%s\t%s\t%s\t%s\t%s\t%s\t" + "%s\t%s\t%s\t%s\t%s\t%s\t%s\t" + "%s\t%s\t%s\t%s\t%s\n", + "symbol_err_cnt", + "link_err_recover", + "link_downed", + "rcv_err", + "rcv_rem_phys_err", + "rcv_switch_relay_err", + "xmit_discards", + "xmit_constraint_err", + "rcv_constraint_err", + "link_int_err", + "buf_overrun_err", + "vl15_dropped", + "xmit_wait", + "xmit_data", + "rcv_data", + "xmit_pkts", + "rcv_pkts", + "unicast_xmit_pkts", + "unicast_rcv_pkts", + "multicast_xmit_pkts", + "multicast_rcv_pkts"); + for (i = (node->esp0) ? 0 : 1; i < node->num_ports; i++) { + char lr[128]; + char *last_reset = ctime_r(&node->ports[i].last_reset, lr); + char leu[128]; + char *last_err_update = ctime_r(&node->ports[i].err_total.time, leu); + char ldu[128]; + char *last_data_update = ctime_r(&node->ports[i].dc_total.time, ldu); + + if (!node->ports[i].valid) + continue; + + last_reset[strlen(last_reset) - 1] = '\0'; /* remove \n */ + last_err_update[strlen(last_err_update) - 1] = '\0'; /* remove \n */ + last_data_update[strlen(last_data_update) - 1] = '\0'; /* remove \n */ + + fprintf(fp, + "%s\t0x%" PRIx64 "\t%s\t%d\t%s\t%s\t%s\t%" PRIu64 "\t%" PRIu64 "\t" + "%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t" + "%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\n", node->node_name, + node->node_guid, + node->active ? "TRUE" : "FALSE", + i, last_reset, last_err_update, last_data_update, + node->ports[i].err_total.symbol_err_cnt, + node->ports[i].err_total.link_err_recover, + node->ports[i].err_total.link_downed, + node->ports[i].err_total.rcv_err, + node->ports[i].err_total.rcv_rem_phys_err, + node->ports[i].err_total.rcv_switch_relay_err, + node->ports[i].err_total.xmit_discards, + node->ports[i].err_total.xmit_constraint_err, + node->ports[i].err_total.rcv_constraint_err, + node->ports[i].err_total.link_integrity, + node->ports[i].err_total.buffer_overrun, + node->ports[i].err_total.vl15_dropped, + node->ports[i].err_total.xmit_wait, + node->ports[i].dc_total.xmit_data, + node->ports[i].dc_total.rcv_data, + node->ports[i].dc_total.xmit_pkts, + node->ports[i].dc_total.rcv_pkts, + node->ports[i].dc_total.unicast_xmit_pkts, + node->ports[i].dc_total.unicast_rcv_pkts, + node->ports[i].dc_total.multicast_xmit_pkts, + node->ports[i].dc_total.multicast_rcv_pkts); + } +} + +static void dump_hr_dc(FILE *fp, uint64_t val64, int data) +{ + const char *unit = ""; + uint64_t tmp = val64; + float val = 0.0; + int ui = 0; + uint64_t div = 1; + + tmp /= 1024; + while (tmp) { + ui++; + tmp /= 1024; + div *= 1024; + } + + val = (float)(val64); + if (data) { + val *= 4; + if (val/div > 1024) { + ui++; + div *= 1024; + } + } + val /= div; + + switch (ui) { + case 1: + unit = "K"; + break; + case 2: + unit = "M"; + break; + case 3: + unit = "G"; + break; + case 4: + unit = "T"; + break; + case 5: + unit = "P"; + break; + case 6: + unit = "E"; + break; + } + + fprintf(fp, " (%5.3f%s%s)\n", val, unit, data ? "B" : ""); +} + +/********************************************************************** + * Output a human readable output of the port counters + **********************************************************************/ +static void dump_node_hr(db_node_t * node, FILE * fp, char *port, int err_only) +{ + int i = (node->esp0) ? 0 : 1; + int num_ports = node->num_ports; + + if (port) { + char *end = NULL; + int p = strtoul(port, &end, 0); + if (port + strlen(port) == end && p >= i && p < num_ports) { + i = p; + num_ports = p+1; + } else { + fprintf(fp, "Warning: \"%s\" is not a valid port\n", port); + } + } + for (/* set above */; i < num_ports; i++) { + char lr[128]; + char *last_reset = ctime_r(&node->ports[i].last_reset, lr); + char leu[128]; + char *last_err_update = ctime_r(&node->ports[i].err_total.time, leu); + char ldu[128]; + char *last_data_update = ctime_r(&node->ports[i].dc_total.time, ldu); + + if (!node->ports[i].valid) + continue; + + last_reset[strlen(last_reset) - 1] = '\0'; /* remove \n */ + last_err_update[strlen(last_err_update) - 1] = '\0'; /* remove \n */ + last_data_update[strlen(last_data_update) - 1] = '\0'; /* remove \n */ + + perfmgr_db_err_reading_t *err = &node->ports[i].err_total; + + if (err_only + && err->symbol_err_cnt == 0 + && err->link_err_recover == 0 + && err->link_downed == 0 + && err->rcv_err == 0 + && err->rcv_rem_phys_err == 0 + && err->rcv_switch_relay_err == 0 + && err->xmit_discards == 0 + && err->xmit_constraint_err == 0 + && err->rcv_constraint_err == 0 + && err->link_integrity == 0 + && err->buffer_overrun == 0 + && err->vl15_dropped == 0 + && err->xmit_wait == 0) + continue; + + fprintf(fp, "\"%s\" 0x%" PRIx64 " active %s port %d\n" + " Last Reset : %s\n" + " Last Error Update : %s\n", + node->node_name, node->node_guid, + node->active ? "TRUE":"FALSE", i, last_reset, + last_err_update); + + if (!err_only || err->symbol_err_cnt != 0) + fprintf(fp, " symbol_err_cnt : %" PRIu64 "\n", + err->symbol_err_cnt); + if (!err_only || err->link_err_recover != 0) + fprintf(fp, " link_err_recover : %" PRIu64 "\n", + err->link_err_recover); + if (!err_only || err->link_downed != 0) + fprintf(fp, " link_downed : %" PRIu64 "\n", + err->link_downed); + if (!err_only || err->rcv_err != 0) + fprintf(fp, " rcv_err : %" PRIu64 "\n", + err->rcv_err); + if (!err_only || err->rcv_rem_phys_err != 0) + fprintf(fp, " rcv_rem_phys_err : %" PRIu64 "\n", + err->rcv_rem_phys_err); + if (!err_only || err->rcv_switch_relay_err != 0) + fprintf(fp, " rcv_switch_relay_err : %" PRIu64 "\n", + err->rcv_switch_relay_err); + if (!err_only || err->xmit_discards != 0) + fprintf(fp, " xmit_discards : %" PRIu64 "\n", + err->xmit_discards); + if (!err_only || err->xmit_constraint_err != 0) + fprintf(fp, " xmit_constraint_err : %" PRIu64 "\n", + err->xmit_constraint_err); + if (!err_only || err->rcv_constraint_err != 0) + fprintf(fp, " rcv_constraint_err : %" PRIu64 "\n", + err->rcv_constraint_err); + if (!err_only || err->link_integrity != 0) + fprintf(fp, " link_integrity_err : %" PRIu64 "\n", + err->link_integrity); + if (!err_only || err->buffer_overrun != 0) + fprintf(fp, " buf_overrun_err : %" PRIu64 "\n", + err->buffer_overrun); + if (!err_only || err->vl15_dropped != 0) + fprintf(fp, " vl15_dropped : %" PRIu64 "\n", + err->vl15_dropped); + if (!err_only || err->xmit_wait != 0) + fprintf(fp, " xmit_wait : %" PRIu64 "\n", + err->xmit_wait); + + if (err_only) + continue; + + fprintf(fp, " Last Data Update : %s\n", + last_data_update); + fprintf(fp, " xmit_data : %" PRIu64, + node->ports[i].dc_total.xmit_data); + dump_hr_dc(fp, node->ports[i].dc_total.xmit_data, 1); + fprintf(fp, " rcv_data : %" PRIu64, + node->ports[i].dc_total.rcv_data); + dump_hr_dc(fp, node->ports[i].dc_total.rcv_data, 1); + fprintf(fp, " xmit_pkts : %" PRIu64, + node->ports[i].dc_total.xmit_pkts); + dump_hr_dc(fp, node->ports[i].dc_total.xmit_pkts, 0); + fprintf(fp, " rcv_pkts : %" PRIu64, + node->ports[i].dc_total.rcv_pkts); + dump_hr_dc(fp, node->ports[i].dc_total.rcv_pkts, 0); + fprintf(fp, " unicast_xmit_pkts : %" PRIu64, + node->ports[i].dc_total.unicast_xmit_pkts); + dump_hr_dc(fp, node->ports[i].dc_total.unicast_xmit_pkts, 0); + fprintf(fp, " unicast_rcv_pkts : %" PRIu64, + node->ports[i].dc_total.unicast_rcv_pkts); + dump_hr_dc(fp, node->ports[i].dc_total.unicast_rcv_pkts, 0); + fprintf(fp, " multicast_xmit_pkts : %" PRIu64, + node->ports[i].dc_total.multicast_xmit_pkts); + dump_hr_dc(fp, node->ports[i].dc_total.multicast_xmit_pkts, 0); + fprintf(fp, " multicast_rcv_pkts : %" PRIu64, + node->ports[i].dc_total.multicast_rcv_pkts); + dump_hr_dc(fp, node->ports[i].dc_total.multicast_rcv_pkts, 0); + + } +} + +/* Define a context for the __db_dump callback */ +typedef struct { + FILE *fp; + perfmgr_db_dump_t dump_type; +} dump_context_t; + +static void db_dump(cl_map_item_t * const p_map_item, void *context) +{ + db_node_t *node = (db_node_t *) p_map_item; + dump_context_t *c = (dump_context_t *) context; + FILE *fp = c->fp; + + switch (c->dump_type) { + case PERFMGR_EVENT_DB_DUMP_MR: + dump_node_mr(node, fp); + break; + case PERFMGR_EVENT_DB_DUMP_HR: + default: + dump_node_hr(node, fp, NULL, 0); + break; + } +} + +/********************************************************************** + * print all node data to fp + **********************************************************************/ +void +perfmgr_db_print_all(perfmgr_db_t * db, FILE *fp, int err_only) +{ + cl_map_item_t *item; + db_node_t *node; + + cl_plock_acquire(&db->lock); + item = cl_qmap_head(&db->pc_data); + while (item != cl_qmap_end(&db->pc_data)) { + node = (db_node_t *)item; + dump_node_hr(node, fp, NULL, err_only); + item = cl_qmap_next(item); + } + cl_plock_release(&db->lock); +} + +/********************************************************************** + * print node data to fp + **********************************************************************/ +void +perfmgr_db_print_by_name(perfmgr_db_t * db, char *nodename, FILE *fp, + char *port, int err_only) +{ + cl_map_item_t *item; + db_node_t *node; + + cl_plock_acquire(&db->lock); + + /* find the node */ + item = cl_qmap_head(&db->pc_data); + while (item != cl_qmap_end(&db->pc_data)) { + node = (db_node_t *)item; + if (strcmp(node->node_name, nodename) == 0) { + dump_node_hr(node, fp, port, err_only); + goto done; + } + item = cl_qmap_next(item); + } + + fprintf(fp, "Node %s not found...\n", nodename); +done: + cl_plock_release(&db->lock); +} + +/********************************************************************** + * print node data to fp + **********************************************************************/ +void +perfmgr_db_print_by_guid(perfmgr_db_t * db, uint64_t nodeguid, FILE *fp, + char *port, int err_only) +{ + cl_map_item_t *node; + + cl_plock_acquire(&db->lock); + + node = cl_qmap_get(&db->pc_data, nodeguid); + if (node != cl_qmap_end(&db->pc_data)) + dump_node_hr((db_node_t *)node, fp, port, err_only); + else + fprintf(fp, "Node 0x%" PRIx64 " not found...\n", nodeguid); + + cl_plock_release(&db->lock); +} + +/********************************************************************** + * dump the data to the file "file" + **********************************************************************/ +perfmgr_db_err_t +perfmgr_db_dump(perfmgr_db_t * db, char *file, perfmgr_db_dump_t dump_type) +{ + dump_context_t context; + + context.fp = fopen(file, "w+"); + if (!context.fp) + return PERFMGR_EVENT_DB_FAIL; + context.dump_type = dump_type; + + cl_plock_acquire(&db->lock); + cl_qmap_apply_func(&db->pc_data, db_dump, (void *)&context); + cl_plock_release(&db->lock); + fclose(context.fp); + return PERFMGR_EVENT_DB_SUCCESS; +} + +/********************************************************************** + * Fill in the various DB objects from their wire counter parts + **********************************************************************/ +void +perfmgr_db_fill_err_read(ib_port_counters_t * wire_read, + perfmgr_db_err_reading_t * reading, + boolean_t xmit_wait_sup) +{ + reading->symbol_err_cnt = cl_ntoh16(wire_read->symbol_err_cnt); + reading->link_err_recover = wire_read->link_err_recover; + reading->link_downed = wire_read->link_downed; + reading->rcv_err = cl_ntoh16(wire_read->rcv_err); + reading->rcv_rem_phys_err = cl_ntoh16(wire_read->rcv_rem_phys_err); + reading->rcv_switch_relay_err = + cl_ntoh16(wire_read->rcv_switch_relay_err); + reading->xmit_discards = cl_ntoh16(wire_read->xmit_discards); + reading->xmit_constraint_err = wire_read->xmit_constraint_err; + reading->rcv_constraint_err = wire_read->rcv_constraint_err; + reading->link_integrity = + PC_LINK_INT(wire_read->link_int_buffer_overrun); + reading->buffer_overrun = + PC_BUF_OVERRUN(wire_read->link_int_buffer_overrun); + reading->vl15_dropped = cl_ntoh16(wire_read->vl15_dropped); + if (xmit_wait_sup) + reading->xmit_wait = cl_ntoh32(wire_read->xmit_wait); + else + reading->xmit_wait = 0; + reading->time = time(NULL); +} + +void +perfmgr_db_fill_data_cnt_read_pc(ib_port_counters_t * wire_read, + perfmgr_db_data_cnt_reading_t * reading) +{ + reading->xmit_data = cl_ntoh32(wire_read->xmit_data); + reading->rcv_data = cl_ntoh32(wire_read->rcv_data); + reading->xmit_pkts = cl_ntoh32(wire_read->xmit_pkts); + reading->rcv_pkts = cl_ntoh32(wire_read->rcv_pkts); + reading->unicast_xmit_pkts = 0; + reading->unicast_rcv_pkts = 0; + reading->multicast_xmit_pkts = 0; + reading->multicast_rcv_pkts = 0; + reading->time = time(NULL); +} + +void +perfmgr_db_fill_data_cnt_read_pce(ib_port_counters_ext_t * wire_read, + perfmgr_db_data_cnt_reading_t * reading, + int ietf_sup) +{ + reading->xmit_data = cl_ntoh64(wire_read->xmit_data); + reading->rcv_data = cl_ntoh64(wire_read->rcv_data); + reading->xmit_pkts = cl_ntoh64(wire_read->xmit_pkts); + reading->rcv_pkts = cl_ntoh64(wire_read->rcv_pkts); + if (ietf_sup) + { + reading->unicast_xmit_pkts = cl_ntoh64(wire_read->unicast_xmit_pkts); + reading->unicast_rcv_pkts = cl_ntoh64(wire_read->unicast_rcv_pkts); + reading->multicast_xmit_pkts = + cl_ntoh64(wire_read->multicast_xmit_pkts); + reading->multicast_rcv_pkts = cl_ntoh64(wire_read->multicast_rcv_pkts); + } + reading->time = time(NULL); +} +#endif /* ENABLE_OSM_PERF_MGR */ diff --git a/opensm/osm_pkey.c b/opensm/osm_pkey.c new file mode 100644 index 0000000..f14144d --- /dev/null +++ b/opensm/osm_pkey.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of opensm pkey manipulation functions. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_PKEY_C +#include +#include +#include +#include +#include +#include + +void osm_pkey_tbl_construct(IN osm_pkey_tbl_t * p_pkey_tbl) +{ + cl_map_construct(&p_pkey_tbl->accum_pkeys); + cl_ptr_vector_construct(&p_pkey_tbl->blocks); + cl_ptr_vector_construct(&p_pkey_tbl->new_blocks); + cl_map_construct(&p_pkey_tbl->keys); +} + +void osm_pkey_tbl_destroy(IN osm_pkey_tbl_t * p_pkey_tbl) +{ + ib_pkey_table_t *p_block; + uint16_t num_blocks, i; + + num_blocks = (uint16_t) (cl_ptr_vector_get_size(&p_pkey_tbl->blocks)); + for (i = 0; i < num_blocks; i++) + if ((p_block = cl_ptr_vector_get(&p_pkey_tbl->blocks, i))) + free(p_block); + cl_ptr_vector_destroy(&p_pkey_tbl->blocks); + + num_blocks = + (uint16_t) (cl_ptr_vector_get_size(&p_pkey_tbl->new_blocks)); + for (i = 0; i < num_blocks; i++) + if ((p_block = cl_ptr_vector_get(&p_pkey_tbl->new_blocks, i))) + free(p_block); + cl_ptr_vector_destroy(&p_pkey_tbl->new_blocks); + + cl_map_remove_all(&p_pkey_tbl->accum_pkeys); + cl_map_destroy(&p_pkey_tbl->accum_pkeys); + + cl_map_remove_all(&p_pkey_tbl->keys); + cl_map_destroy(&p_pkey_tbl->keys); +} + +ib_api_status_t osm_pkey_tbl_init(IN osm_pkey_tbl_t * p_pkey_tbl) +{ + cl_map_init(&p_pkey_tbl->accum_pkeys, 1); + cl_ptr_vector_init(&p_pkey_tbl->blocks, 0, 1); + cl_ptr_vector_init(&p_pkey_tbl->new_blocks, 0, 1); + cl_map_init(&p_pkey_tbl->keys, 1); + cl_qlist_init(&p_pkey_tbl->pending); + p_pkey_tbl->last_pkey_idx = 0; + p_pkey_tbl->used_blocks = 0; + p_pkey_tbl->max_blocks = 0; + p_pkey_tbl->rcv_blocks_cnt = 0; + p_pkey_tbl->indx0_pkey = 0; + return IB_SUCCESS; +} + +void osm_pkey_tbl_init_new_blocks(IN const osm_pkey_tbl_t * p_pkey_tbl) +{ + ib_pkey_table_t *p_block; + size_t b, num_blocks = cl_ptr_vector_get_size(&p_pkey_tbl->new_blocks); + + for (b = 0; b < num_blocks; b++) + if ((p_block = cl_ptr_vector_get(&p_pkey_tbl->new_blocks, b))) + memset(p_block, 0, sizeof(*p_block)); +} + +ib_api_status_t osm_pkey_tbl_set(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t block, IN ib_pkey_table_t * p_tbl, + IN boolean_t allow_both_pkeys) +{ + uint16_t b, i; + ib_pkey_table_t *p_pkey_block; + uint16_t *p_prev_pkey; + ib_net16_t pkey, pkey_base; + + /* make sure the block is allocated */ + if (cl_ptr_vector_get_size(&p_pkey_tbl->blocks) > block) + p_pkey_block = + (ib_pkey_table_t *) cl_ptr_vector_get(&p_pkey_tbl->blocks, + block); + else + p_pkey_block = NULL; + + if (!p_pkey_block) { + p_pkey_block = + (ib_pkey_table_t *) malloc(sizeof(ib_pkey_table_t)); + if (!p_pkey_block) + return IB_ERROR; + memset(p_pkey_block, 0, sizeof(ib_pkey_table_t)); + cl_ptr_vector_set(&p_pkey_tbl->blocks, block, p_pkey_block); + } + + /* sets the block values */ + memcpy(p_pkey_block, p_tbl, sizeof(ib_pkey_table_t)); + + /* + NOTE: as the spec does not require uniqueness of PKeys in + tables there is no other way but to refresh the entire keys map. + + Moreover, if the same key exists but with full membership it should + have precedence over the key with limited membership ! + */ + cl_map_remove_all(&p_pkey_tbl->keys); + + for (b = 0; b < cl_ptr_vector_get_size(&p_pkey_tbl->blocks); b++) { + + p_pkey_block = cl_ptr_vector_get(&p_pkey_tbl->blocks, b); + if (!p_pkey_block) + continue; + + for (i = 0; i < IB_NUM_PKEY_ELEMENTS_IN_BLOCK; i++) { + pkey = p_pkey_block->pkey_entry[i]; + if (ib_pkey_is_invalid(pkey)) + continue; + + if (allow_both_pkeys) + pkey_base = pkey; + else + pkey_base = ib_pkey_get_base(pkey); + + /* + If allow_both_pkeys is FALSE, + ignore the PKey Full Member bit in the key but store + the pointer to the table element as the map value + */ + p_prev_pkey = cl_map_get(&p_pkey_tbl->keys, pkey_base); + + /* we only insert if no previous or it is not full member and allow_both_pkeys is FALSE */ + if ((p_prev_pkey == NULL) || + (allow_both_pkeys == FALSE && + cl_ntoh16(*p_prev_pkey) < cl_ntoh16(pkey))) + cl_map_insert(&p_pkey_tbl->keys, pkey_base, + &(p_pkey_block->pkey_entry[i]) + ); + } + } + return IB_SUCCESS; +} + +/* + Store the given pkey (along with it's overall index) in the accum_pkeys array. +*/ +cl_status_t osm_pkey_tbl_set_accum_pkeys(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t pkey, + IN uint16_t pkey_idx) +{ + uintptr_t ptr = pkey_idx + 1; /* 0 means not found so bias by 1 */ + uint16_t *p_prev_pkey_idx; + cl_status_t status = CL_SUCCESS; + + if (pkey_idx >= p_pkey_tbl->last_pkey_idx) + p_pkey_tbl->last_pkey_idx = pkey_idx + 1; + + p_prev_pkey_idx = (uint16_t *) cl_map_get(&p_pkey_tbl->accum_pkeys, pkey); + + if (p_prev_pkey_idx != NULL) + cl_map_remove(&p_pkey_tbl->accum_pkeys, pkey); + + if (cl_map_insert(&p_pkey_tbl->accum_pkeys, pkey, (void *) ptr) == NULL) + status = CL_INSUFFICIENT_MEMORY; + + return status; + +} + +/* ++ * Find the next last pkey index ++*/ +void osm_pkey_find_last_accum_pkey_index(IN osm_pkey_tbl_t * p_pkey_tbl) +{ + void *ptr; + uintptr_t pkey_idx_ptr; + uint16_t pkey_idx, last_pkey_idx = 0; + cl_map_iterator_t map_iter = cl_map_head(&p_pkey_tbl->accum_pkeys); + + while (map_iter != cl_map_end(&p_pkey_tbl->accum_pkeys)) { + ptr = (uint16_t *) cl_map_obj(map_iter); + CL_ASSERT(ptr); + pkey_idx_ptr = (uintptr_t) ptr; + pkey_idx = pkey_idx_ptr; + if (pkey_idx > last_pkey_idx) + last_pkey_idx = pkey_idx; + map_iter = cl_map_next(map_iter); + } + p_pkey_tbl->last_pkey_idx = last_pkey_idx; +} + +/* + Store the given pkey in the "new" blocks array. + Also, make sure the regular block exists. +*/ +ib_api_status_t osm_pkey_tbl_set_new_entry(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t block_idx, + IN uint8_t pkey_idx, + IN uint16_t pkey) +{ + ib_pkey_table_t *p_block; + + if (!(p_block = osm_pkey_tbl_new_block_get(p_pkey_tbl, block_idx))) { + p_block = (ib_pkey_table_t *) malloc(sizeof(ib_pkey_table_t)); + if (!p_block) + return IB_ERROR; + memset(p_block, 0, sizeof(ib_pkey_table_t)); + cl_ptr_vector_set(&p_pkey_tbl->new_blocks, block_idx, p_block); + } + + p_block->pkey_entry[pkey_idx] = pkey; + if (p_pkey_tbl->used_blocks <= block_idx) + p_pkey_tbl->used_blocks = block_idx + 1; + + return IB_SUCCESS; +} + +boolean_t osm_pkey_find_next_free_entry(IN osm_pkey_tbl_t * p_pkey_tbl, + OUT uint16_t * p_block_idx, + OUT uint8_t * p_pkey_idx) +{ + ib_pkey_table_t *p_new_block; + + CL_ASSERT(p_block_idx); + CL_ASSERT(p_pkey_idx); + + while (*p_block_idx < p_pkey_tbl->max_blocks) { + if (*p_pkey_idx > IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1) { + *p_pkey_idx = 0; + (*p_block_idx)++; + if (*p_block_idx >= p_pkey_tbl->max_blocks) + return FALSE; + } + + p_new_block = + osm_pkey_tbl_new_block_get(p_pkey_tbl, *p_block_idx); + + if (!p_new_block || + ib_pkey_is_invalid(p_new_block->pkey_entry[*p_pkey_idx])) + return TRUE; + else + (*p_pkey_idx)++; + } + return FALSE; +} + +ib_api_status_t osm_pkey_tbl_get_block_and_idx(IN osm_pkey_tbl_t * p_pkey_tbl, + IN uint16_t * p_pkey, + OUT uint16_t * p_block_idx, + OUT uint8_t * p_pkey_idx) +{ + uint16_t num_of_blocks; + uint16_t block_index; + ib_pkey_table_t *block; + + CL_ASSERT(p_block_idx != NULL); + CL_ASSERT(p_pkey_idx != NULL); + + num_of_blocks = (uint16_t) cl_ptr_vector_get_size(&p_pkey_tbl->blocks); + for (block_index = 0; block_index < num_of_blocks; block_index++) { + block = osm_pkey_tbl_block_get(p_pkey_tbl, block_index); + if ((block->pkey_entry <= p_pkey) && + (p_pkey < + block->pkey_entry + IB_NUM_PKEY_ELEMENTS_IN_BLOCK)) { + *p_block_idx = block_index; + *p_pkey_idx = (uint8_t) (p_pkey - block->pkey_entry); + return IB_SUCCESS; + } + } + return IB_NOT_FOUND; +} + +static boolean_t match_pkey(IN const ib_net16_t * pkey1, + IN const ib_net16_t * pkey2) +{ + + /* if neither pkey is full member - this is not a match */ + if (!(ib_pkey_is_full_member(*pkey1) || ib_pkey_is_full_member(*pkey2))) + return FALSE; + + /* compare if the bases are the same. if they are - then + this is a match */ + if (ib_pkey_get_base(*pkey1) != ib_pkey_get_base(*pkey2)) + return FALSE; + + return TRUE; +} + +boolean_t osm_physp_share_this_pkey(IN const osm_physp_t * p_physp1, + IN const osm_physp_t * p_physp2, + IN ib_net16_t pkey, + IN boolean_t allow_both_pkeys) +{ + ib_net16_t *pkey1, *pkey2; + ib_net16_t full_pkey, limited_pkey; + + if (allow_both_pkeys) { + full_pkey = pkey | IB_PKEY_TYPE_MASK; + limited_pkey = pkey & ~IB_PKEY_TYPE_MASK; + pkey1 = cl_map_get(&(osm_physp_get_pkey_tbl(p_physp1))->keys, + full_pkey); + if (!pkey1) + pkey1 = cl_map_get(&(osm_physp_get_pkey_tbl(p_physp1))->keys, + limited_pkey); + pkey2 = cl_map_get(&(osm_physp_get_pkey_tbl(p_physp2))->keys, + full_pkey); + if (!pkey2) + pkey2 = cl_map_get(&(osm_physp_get_pkey_tbl(p_physp2))->keys, + limited_pkey); + } else { + pkey1 = cl_map_get(&(osm_physp_get_pkey_tbl(p_physp1))->keys, + ib_pkey_get_base(pkey)); + pkey2 = cl_map_get(&(osm_physp_get_pkey_tbl(p_physp2))->keys, + ib_pkey_get_base(pkey)); + } + return (pkey1 && pkey2 && match_pkey(pkey1, pkey2)); +} + +ib_net16_t osm_physp_find_common_pkey(IN const osm_physp_t * p_physp1, + IN const osm_physp_t * p_physp2, + IN boolean_t allow_both_pkeys) +{ + ib_net16_t *pkey1, *pkey2; + uint64_t pkey1_base, pkey2_base; + const osm_pkey_tbl_t *pkey_tbl1, *pkey_tbl2; + cl_map_iterator_t map_iter1, map_iter2; + + pkey_tbl1 = osm_physp_get_pkey_tbl(p_physp1); + pkey_tbl2 = osm_physp_get_pkey_tbl(p_physp2); + + map_iter1 = cl_map_head(&pkey_tbl1->keys); + map_iter2 = cl_map_head(&pkey_tbl2->keys); + + /* we rely on the fact the map are sorted by pkey */ + while ((map_iter1 != cl_map_end(&pkey_tbl1->keys)) && + (map_iter2 != cl_map_end(&pkey_tbl2->keys))) { + pkey1 = (ib_net16_t *) cl_map_obj(map_iter1); + pkey2 = (ib_net16_t *) cl_map_obj(map_iter2); + + if (match_pkey(pkey1, pkey2)) + return *pkey1; + + /* advance the lower value if they are not equal */ + pkey1_base = cl_map_key(map_iter1); + pkey2_base = cl_map_key(map_iter2); + if (pkey2_base == pkey1_base) { + map_iter1 = cl_map_next(map_iter1); + map_iter2 = cl_map_next(map_iter2); + } else if (pkey2_base < pkey1_base) + map_iter2 = cl_map_next(map_iter2); + else + map_iter1 = cl_map_next(map_iter1); + } + + if (!allow_both_pkeys) + return 0; + + /* + When using allow_both_pkeys, the keys in pkey tables are the + pkey value including membership bit. + Therefore, in order to complete the search, we also need to + compare port\s 1 full pkeys with port 2 limited pkeys, and + port 2 full pkeys with port 1 full pkeys. + */ + + map_iter1 = cl_map_head(&pkey_tbl1->keys); + map_iter2 = cl_map_head(&pkey_tbl2->keys); + + /* comparing pkey_tbl1 full with pkey_tbl2 limited */ + while ((map_iter1 != cl_map_end(&pkey_tbl1->keys)) && + (map_iter2 != cl_map_end(&pkey_tbl2->keys))) { + pkey1 = (ib_net16_t *) cl_map_obj(map_iter1); + pkey2 = (ib_net16_t *) cl_map_obj(map_iter2); + + if (!ib_pkey_is_full_member(*pkey1)) { + map_iter1 = cl_map_next(map_iter1); + continue; + } + if (ib_pkey_is_full_member(*pkey2)) { + map_iter2 = cl_map_next(map_iter2); + continue; + } + + if (match_pkey(pkey1, pkey2)) + return *pkey1; + + /* advance the lower value if they are not equal */ + pkey1_base = ib_pkey_get_base(cl_map_key(map_iter1)); + pkey2_base = ib_pkey_get_base(cl_map_key(map_iter2)); + if (pkey2_base == pkey1_base) { + map_iter1 = cl_map_next(map_iter1); + map_iter2 = cl_map_next(map_iter2); + } else if (pkey2_base < pkey1_base) + map_iter2 = cl_map_next(map_iter2); + else + map_iter1 = cl_map_next(map_iter1); + } + + map_iter1 = cl_map_head(&pkey_tbl1->keys); + map_iter2 = cl_map_head(&pkey_tbl2->keys); + + /* comparing pkey_tbl1 limited with pkey_tbl2 full */ + while ((map_iter1 != cl_map_end(&pkey_tbl1->keys)) && + (map_iter2 != cl_map_end(&pkey_tbl2->keys))) { + pkey1 = (ib_net16_t *) cl_map_obj(map_iter1); + pkey2 = (ib_net16_t *) cl_map_obj(map_iter2); + + if (ib_pkey_is_full_member(*pkey1)) { + map_iter1 = cl_map_next(map_iter1); + continue; + } + if (!ib_pkey_is_full_member(*pkey2)) { + map_iter2 = cl_map_next(map_iter2); + continue; + } + + if (match_pkey(pkey1, pkey2)) + return *pkey1; + + /* advance the lower value if they are not equal */ + pkey1_base = ib_pkey_get_base(cl_map_key(map_iter1)); + pkey2_base = ib_pkey_get_base(cl_map_key(map_iter2)); + if (pkey2_base == pkey1_base) { + map_iter1 = cl_map_next(map_iter1); + map_iter2 = cl_map_next(map_iter2); + } else if (pkey2_base < pkey1_base) + map_iter2 = cl_map_next(map_iter2); + else + map_iter1 = cl_map_next(map_iter1); + } + + return 0; +} + +boolean_t osm_physp_share_pkey(IN osm_log_t * p_log, + IN const osm_physp_t * p_physp_1, + IN const osm_physp_t * p_physp_2, + IN boolean_t allow_both_pkeys) +{ + const osm_pkey_tbl_t *pkey_tbl1, *pkey_tbl2; + + if (p_physp_1 == p_physp_2) + return TRUE; + + pkey_tbl1 = osm_physp_get_pkey_tbl(p_physp_1); + pkey_tbl2 = osm_physp_get_pkey_tbl(p_physp_2); + + /* + The spec: 10.9.2 does not require each phys port to have PKey Table. + So actually if it does not, we need to use the default port instead. + + HACK: meanwhile we will ignore the check + */ + if (cl_is_map_empty(&pkey_tbl1->keys) + || cl_is_map_empty(&pkey_tbl2->keys)) + return TRUE; + + return + !ib_pkey_is_invalid(osm_physp_find_common_pkey + (p_physp_1, p_physp_2, allow_both_pkeys)); +} + +boolean_t osm_port_share_pkey(IN osm_log_t * p_log, + IN const osm_port_t * p_port_1, + IN const osm_port_t * p_port_2, + IN boolean_t allow_both_pkeys) +{ + + osm_physp_t *p_physp1, *p_physp2; + boolean_t ret; + + OSM_LOG_ENTER(p_log); + + if (!p_port_1 || !p_port_2) { + ret = FALSE; + goto Exit; + } + + p_physp1 = p_port_1->p_physp; + p_physp2 = p_port_2->p_physp; + + if (!p_physp1 || !p_physp2) { + ret = FALSE; + goto Exit; + } + + ret = osm_physp_share_pkey(p_log, p_physp1, p_physp2, allow_both_pkeys); + +Exit: + OSM_LOG_EXIT(p_log); + return ret; +} + +boolean_t osm_physp_has_pkey(IN osm_log_t * p_log, IN ib_net16_t pkey, + IN const osm_physp_t * p_physp) +{ + ib_net16_t *p_pkey, pkey_base; + const osm_pkey_tbl_t *pkey_tbl; + boolean_t res = FALSE; + + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Search for PKey: 0x%04x\n", cl_ntoh16(pkey)); + + /* if the pkey given is an invalid pkey - return TRUE. */ + if (ib_pkey_is_invalid(pkey)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Given invalid PKey - we treat it loosely and allow it\n"); + res = TRUE; + goto Exit; + } + + pkey_base = ib_pkey_get_base(pkey); + + pkey_tbl = osm_physp_get_pkey_tbl(p_physp); + + p_pkey = cl_map_get(&pkey_tbl->keys, pkey_base); + if (p_pkey) { + res = TRUE; + OSM_LOG(p_log, OSM_LOG_DEBUG, + "PKey 0x%04x was found\n", cl_ntoh16(pkey)); + } else + OSM_LOG(p_log, OSM_LOG_DEBUG, + "PKey 0x%04x was not found\n", cl_ntoh16(pkey)); + +Exit: + OSM_LOG_EXIT(p_log); + return res; +} + +void osm_pkey_tbl_set_indx0_pkey(IN osm_log_t * p_log, IN ib_net16_t pkey, + IN boolean_t full, + OUT osm_pkey_tbl_t * p_pkey_tbl) +{ + p_pkey_tbl->indx0_pkey = (full == TRUE) ? + pkey | cl_hton16(0x8000) : pkey; + OSM_LOG(p_log, OSM_LOG_DEBUG, "pkey 0x%04x set at indx0\n", + cl_ntoh16(p_pkey_tbl->indx0_pkey)); +} diff --git a/opensm/osm_pkey_mgr.c b/opensm/osm_pkey_mgr.c new file mode 100644 index 0000000..3d71832 --- /dev/null +++ b/opensm/osm_pkey_mgr.c @@ -0,0 +1,906 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of the P_Key Manager (Partition Manager). + * This is part of the OpenSM. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_PKEY_MGR_C +#include +#include +#include +#include + +static void clear_accum_pkey_index(osm_pkey_tbl_t * p_pkey_tbl, + uint16_t pkey_index); + +/* + The max number of pkeys/pkey blocks for a physical port is located + in a different place for switch external ports (SwitchInfo) and the + rest of the ports (NodeInfo). +*/ +static uint16_t +pkey_mgr_get_physp_max_pkeys(IN const osm_physp_t * p_physp) +{ + osm_node_t *p_node = osm_physp_get_node_ptr(p_physp); + uint16_t num_pkeys = 0; + + if (!p_node->sw || (osm_physp_get_port_num(p_physp) == 0)) + num_pkeys = cl_ntoh16(p_node->node_info.partition_cap); + else + num_pkeys = cl_ntoh16(p_node->sw->switch_info.enforce_cap); + return num_pkeys; +} + +static uint16_t +pkey_mgr_get_physp_max_blocks(IN const osm_physp_t * p_physp) +{ + return ((pkey_mgr_get_physp_max_pkeys(p_physp) + 31) / 32); +} + +/* + * Insert new pending pkey entry to the specific port pkey table + * pending pkeys. New entries are inserted at the back. + */ +static void +pkey_mgr_process_physical_port(IN osm_log_t * p_log, + IN osm_sm_t * sm, + IN const ib_net16_t pkey, + IN osm_physp_t * p_physp) +{ + osm_node_t *p_node = osm_physp_get_node_ptr(p_physp); + osm_pkey_tbl_t *p_pkey_tbl; + ib_net16_t *p_orig_pkey; + osm_pending_pkey_t *p_pending; + + p_pkey_tbl = &p_physp->pkeys; + p_pending = (osm_pending_pkey_t *) calloc(1, sizeof(osm_pending_pkey_t)); + if (!p_pending) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0502: " + "Failed to allocate new pending pkey entry for node " + "0x%016" PRIx64 " port %u\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp)); + return; + } + p_pending->pkey = pkey; + if (sm->p_subn->opt.allow_both_pkeys) + p_orig_pkey = cl_map_get(&p_pkey_tbl->keys, pkey); + else + p_orig_pkey = cl_map_get(&p_pkey_tbl->keys, + ib_pkey_get_base(pkey)); + + if (!p_orig_pkey) { + p_pending->is_new = TRUE; + } else { + CL_ASSERT(ib_pkey_get_base(*p_orig_pkey) == + ib_pkey_get_base(pkey)); + p_pending->is_new = FALSE; + if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey, + &p_pending->block, + &p_pending->index) != + IB_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0503: " + "Failed to obtain P_Key 0x%04x block and index " + "for node 0x%016" PRIx64 " port %u\n", + cl_ntoh16(ib_pkey_get_base(pkey)), + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp)); + free(p_pending); + return; + } + if (p_physp->pkeys.indx0_pkey) { + /* + * Remove the pkey that should be at index 0 from + * accum pkey if current position is not index 0 + */ + if (((sm->p_subn->opt.allow_both_pkeys && + pkey == p_physp->pkeys.indx0_pkey) || + (!sm->p_subn->opt.allow_both_pkeys && + ib_pkey_get_base(pkey) == ib_pkey_get_base(p_physp->pkeys.indx0_pkey))) && + (p_pending->block != 0 || p_pending->index != 0)) { + p_pending->is_new = TRUE; + clear_accum_pkey_index(p_pkey_tbl, + p_pending->block * + IB_NUM_PKEY_ELEMENTS_IN_BLOCK + + p_pending->index); + } + + if (p_pending->block == 0 && p_pending->index == 0) { + /* Move the pkey away from index 0 */ + if ((sm->p_subn->opt.allow_both_pkeys && + pkey != p_physp->pkeys.indx0_pkey) || + (!sm->p_subn->opt.allow_both_pkeys && + ib_pkey_get_base(pkey) != ib_pkey_get_base(p_physp->pkeys.indx0_pkey))) { + p_pending->is_new = TRUE; + clear_accum_pkey_index(p_pkey_tbl, 0); + } + } + } else { + /* If index 0 is occupied by non-default, it should reoccupied by pkey 0x7FFF */ + if (p_pending->block == 0 && p_pending->index == 0) { + if (ib_pkey_get_base(pkey) != IB_DEFAULT_PARTIAL_PKEY) { + p_pending->is_new = TRUE; + clear_accum_pkey_index(p_pkey_tbl, 0); + } + /* Need to move default pkey to index 0 */ + } else if ((sm->p_subn->opt.allow_both_pkeys && + pkey == IB_DEFAULT_PKEY) || + (!sm->p_subn->opt.allow_both_pkeys && + ib_pkey_get_base(pkey) == IB_DEFAULT_PARTIAL_PKEY)) { + p_pending->is_new = TRUE; + clear_accum_pkey_index(p_pkey_tbl, + p_pending->block * + IB_NUM_PKEY_ELEMENTS_IN_BLOCK + + p_pending->index); + } + } + + } + if (p_pending->is_new == TRUE) + cl_qlist_insert_tail(&p_pkey_tbl->pending, + (cl_list_item_t *) p_pending); + else + cl_qlist_insert_head(&p_pkey_tbl->pending, + (cl_list_item_t *) p_pending); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "pkey 0x%04x was %s for node 0x%016" PRIx64 " port %u\n", + cl_ntoh16(pkey), p_pending->is_new ? "inserted" : "updated", + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp)); +} + +static void +pkey_mgr_process_partition_table(osm_log_t * p_log, osm_sm_t * sm, + const osm_prtn_t * p_prtn, + const boolean_t full) +{ + const cl_map_t *p_tbl = + full ? &p_prtn->full_guid_tbl : &p_prtn->part_guid_tbl; + cl_map_iterator_t i, i_next; + ib_net16_t pkey = p_prtn->pkey; + osm_physp_t *p_physp; + + if (full) + pkey |= cl_hton16(0x8000); + + i_next = cl_map_head(p_tbl); + while (i_next != cl_map_end(p_tbl)) { + i = i_next; + i_next = cl_map_next(i); + p_physp = cl_map_obj(i); + if (p_physp) + pkey_mgr_process_physical_port(p_log, sm, pkey, + p_physp); + } +} + +static ib_api_status_t +pkey_mgr_update_pkey_entry(IN osm_sm_t * sm, + IN const osm_physp_t * p_physp, + IN const ib_pkey_table_t * block, + IN const uint16_t block_index) +{ + osm_madw_context_t context; + osm_node_t *p_node = osm_physp_get_node_ptr(p_physp); + osm_physp_t *physp0; + uint32_t attr_mod; + ib_net64_t m_key; + + context.pkey_context.node_guid = osm_node_get_node_guid(p_node); + context.pkey_context.port_guid = osm_physp_get_port_guid(p_physp); + context.pkey_context.set_method = TRUE; + attr_mod = block_index; + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && + osm_physp_get_port_num(p_physp) != 0) { + attr_mod |= osm_physp_get_port_num(p_physp) << 16; + physp0 = osm_node_get_physp_ptr(p_node, 0); + m_key = ib_port_info_get_m_key(&physp0->port_info); + } else + m_key = ib_port_info_get_m_key(&p_physp->port_info); + return osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), + (uint8_t *) block, sizeof(*block), + IB_MAD_ATTR_P_KEY_TABLE, + cl_hton32(attr_mod), FALSE, m_key, + 0, CL_DISP_MSGID_NONE, &context); +} + +static ib_api_status_t +pkey_mgr_enforce_partition(IN osm_log_t * p_log, osm_sm_t * sm, + IN osm_physp_t * p_physp, + IN osm_partition_enforce_type_enum enforce_type) +{ + osm_madw_context_t context; + uint8_t payload[IB_SMP_DATA_SIZE]; + ib_port_info_t *p_pi; + ib_net64_t m_key; + osm_physp_t *physp0; + ib_api_status_t status; + uint8_t enforce_bits; + + p_pi = &p_physp->port_info; + + if (enforce_type == OSM_PARTITION_ENFORCE_TYPE_BOTH) + enforce_bits = 0xc; + else if (enforce_type == OSM_PARTITION_ENFORCE_TYPE_IN) + enforce_bits = 0x8; + else + enforce_bits = 0x4; + + if ((p_pi->vl_enforce & 0xc) == enforce_bits * + (enforce_type != OSM_PARTITION_ENFORCE_TYPE_OFF)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "No need to update PortInfo for " + "node 0x%016" PRIx64 " port %u (%s)\n", + cl_ntoh64(osm_node_get_node_guid + (osm_physp_get_node_ptr(p_physp))), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + return IB_SUCCESS; + } + + memcpy(payload, p_pi, sizeof(ib_port_info_t)); + + p_pi = (ib_port_info_t *) payload; + p_pi->vl_enforce &= ~0xc; + if (enforce_type != OSM_PARTITION_ENFORCE_TYPE_OFF) + p_pi->vl_enforce |= enforce_bits; + + p_pi->state_info2 = 0; + ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); + + physp0 = osm_node_get_physp_ptr(p_physp->p_node, 0); + m_key = ib_port_info_get_m_key(&physp0->port_info); + + context.pi_context.node_guid = + osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp)); + context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); + context.pi_context.set_method = TRUE; + context.pi_context.light_sweep = FALSE; + context.pi_context.active_transition = FALSE; + context.pi_context.client_rereg = FALSE; + + status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), + payload, sizeof(payload), + IB_MAD_ATTR_PORT_INFO, + cl_hton32(osm_physp_get_port_num(p_physp)), + FALSE, m_key, + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0511: " + "Failed to set PortInfo for " + "node 0x%016" PRIx64 " port %u (%s)\n", + cl_ntoh64(osm_node_get_node_guid + (osm_physp_get_node_ptr(p_physp))), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + else + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Set PortInfo for node 0x%016" PRIx64 " port %u (%s)\n", + cl_ntoh64(osm_node_get_node_guid + (osm_physp_get_node_ptr(p_physp))), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + return status; +} + +static void clear_accum_pkey_index(osm_pkey_tbl_t * p_pkey_tbl, + uint16_t pkey_index) +{ + uint16_t pkey_idx_bias, pkey_idx; + void *ptr; + uintptr_t pkey_idx_ptr; + cl_map_iterator_t map_iter, map_iter_temp; + + map_iter = cl_map_head(&p_pkey_tbl->accum_pkeys); + + pkey_idx_bias = pkey_index + 1; // adjust for pkey index bias in accum_pkeys + + while (map_iter != cl_map_end(&p_pkey_tbl->accum_pkeys)) { + map_iter_temp = cl_map_next(map_iter); + ptr = (uint16_t *) cl_map_obj(map_iter); + CL_ASSERT(ptr); + pkey_idx_ptr = (uintptr_t) ptr; + pkey_idx = pkey_idx_ptr; + if (pkey_idx == pkey_idx_bias) { + cl_map_remove_item(&p_pkey_tbl->accum_pkeys, map_iter); + if (p_pkey_tbl->last_pkey_idx == pkey_idx) + osm_pkey_find_last_accum_pkey_index(p_pkey_tbl); + break; + } + map_iter = map_iter_temp; + } +} + +static int last_accum_pkey_index(osm_pkey_tbl_t * p_pkey_tbl, + uint16_t * p_block_idx, + uint8_t * p_pkey_idx) +{ + if (p_pkey_tbl->last_pkey_idx) { + *p_block_idx = (p_pkey_tbl->last_pkey_idx - 1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + *p_pkey_idx = (p_pkey_tbl->last_pkey_idx - 1) % IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + return 1; + } + + return 0; +} + +static int pkey_mgr_update_port(osm_log_t * p_log, osm_sm_t * sm, + const osm_port_t * const p_port) +{ + osm_physp_t *p_physp; + osm_node_t *p_node; + ib_pkey_table_t *block, *new_block; + osm_pkey_tbl_t *p_pkey_tbl; + uint16_t block_index; + uint8_t pkey_index; + uint16_t last_free_block_index = 0; + uint8_t last_free_pkey_index = 0; + uint16_t num_of_blocks; + uint16_t max_num_of_blocks; + ib_api_status_t status; + osm_pending_pkey_t *p_pending; + boolean_t found; + ib_pkey_table_t empty_block; + int ret = 0, full = 0; + void *ptr; + uintptr_t pkey_idx_ptr; + uint16_t pkey_idx; + + p_physp = p_port->p_physp; + if (!p_physp) + return FALSE; + + memset(&empty_block, 0, sizeof(ib_pkey_table_t)); + + p_node = osm_physp_get_node_ptr(p_physp); + p_pkey_tbl = &p_physp->pkeys; + num_of_blocks = osm_pkey_tbl_get_num_blocks(p_pkey_tbl); + max_num_of_blocks = pkey_mgr_get_physp_max_blocks(p_physp); + if (p_pkey_tbl->max_blocks > max_num_of_blocks) { + OSM_LOG(p_log, OSM_LOG_INFO, + "Max number of blocks reduced from %u to %u " + "for node 0x%016" PRIx64 " port %u (%s)\n", + p_pkey_tbl->max_blocks, max_num_of_blocks, + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + } + p_pkey_tbl->max_blocks = max_num_of_blocks; + + osm_pkey_tbl_init_new_blocks(p_pkey_tbl); + p_pkey_tbl->used_blocks = 0; + + /* + process every pending pkey in order - + first must be "updated" last are "new" + */ + p_pending = + (osm_pending_pkey_t *) cl_qlist_remove_head(&p_pkey_tbl->pending); + while (p_pending != + (osm_pending_pkey_t *) cl_qlist_end(&p_pkey_tbl->pending)) { + + found = FALSE; + ptr = NULL; + + if (p_pending->is_new == FALSE) { + block_index = p_pending->block; + pkey_index = p_pending->index; + found = TRUE; + } else { + ptr = cl_map_get(&p_pkey_tbl->accum_pkeys,p_pending->pkey); + if (ptr != NULL) { + pkey_idx_ptr = (uintptr_t) ptr; + pkey_idx = pkey_idx_ptr; + pkey_idx--; /* adjust pkey index for bias */ + block_index = pkey_idx / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + pkey_index = pkey_idx % IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + + if (((sm->p_subn->opt.allow_both_pkeys && + p_pending->pkey == p_physp->pkeys.indx0_pkey) || + (!sm->p_subn->opt.allow_both_pkeys && + ib_pkey_get_base(p_pending->pkey) == ib_pkey_get_base(p_physp->pkeys.indx0_pkey))) || + ((p_pending->pkey != p_physp->pkeys.indx0_pkey && + pkey_idx == 0))) { + clear_accum_pkey_index(p_pkey_tbl, pkey_idx); + cl_qlist_insert_tail(&p_pkey_tbl->pending, + (cl_list_item_t *)p_pending); + p_pending = + (osm_pending_pkey_t *) cl_qlist_remove_head(&p_pkey_tbl->pending); + continue; + } else + found = TRUE; + } + + if (!found) { + if (!p_pkey_tbl->indx0_pkey && + ((sm->p_subn->opt.allow_both_pkeys && + p_pending->pkey == IB_DEFAULT_PKEY) || + (!sm->p_subn->opt.allow_both_pkeys && + ib_pkey_get_base(p_pending->pkey) == IB_DEFAULT_PARTIAL_PKEY))) { + block_index = 0; + pkey_index = 0; + } else if ((sm->p_subn->opt.allow_both_pkeys && + p_pending->pkey == p_pkey_tbl->indx0_pkey) || + (!sm->p_subn->opt.allow_both_pkeys && + ib_pkey_get_base(p_pending->pkey) == + ib_pkey_get_base(p_pkey_tbl->indx0_pkey))) { + block_index = 0; + pkey_index = 0; + } else if (last_accum_pkey_index(p_pkey_tbl, + &last_free_block_index, + &last_free_pkey_index)) { + block_index = last_free_block_index; + pkey_index = last_free_pkey_index + 1; + if (pkey_index >= IB_NUM_PKEY_ELEMENTS_IN_BLOCK) { + block_index++; + pkey_index -= IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + } + } else { + block_index = 0; + pkey_index = 1; + } + + if (block_index * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + pkey_index >= pkey_mgr_get_physp_max_pkeys(p_physp)) { + if ((sm->p_subn->opt.allow_both_pkeys && + p_pending->pkey != IB_DEFAULT_PKEY) || + (!sm->p_subn->opt.allow_both_pkeys && + ib_pkey_get_base(p_pending->pkey) != IB_DEFAULT_PARTIAL_PKEY)) { + last_free_block_index = 0; + last_free_pkey_index = 1; + found = osm_pkey_find_next_free_entry(p_pkey_tbl, &last_free_block_index, &last_free_pkey_index); + } else + found = FALSE; + if (!found) + full = 1; + else { + block_index = last_free_block_index; + pkey_index = last_free_pkey_index; + if (block_index * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + pkey_index >= pkey_mgr_get_physp_max_pkeys(p_physp)) { + full = 1; + found = FALSE; + } else { + OSM_LOG(p_log, OSM_LOG_INFO, + "Reusing PKeyTable block index %u pkey index %u " + "for pkey 0x%x on 0x%016" PRIx64 " port %u (%s)\n", + block_index, + pkey_index, + cl_ntoh16(p_pending->pkey), + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + + clear_accum_pkey_index(p_pkey_tbl, block_index * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + pkey_index); + } + } + if (full) + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 0512: " + "Failed to set PKey 0x%04x because Pkey table is full " + "for node 0x%016" PRIx64 " port %u (%s)\n", + cl_ntoh16(p_pending->pkey), + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + } else + found = TRUE; + } + } + + if (found) { + if (IB_SUCCESS != + osm_pkey_tbl_set_new_entry(p_pkey_tbl, block_index, + pkey_index, + p_pending->pkey)) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0505: " + "Failed to set PKey 0x%04x in block %u idx %u " + "for node 0x%016" PRIx64 " port %u (%s)\n", + cl_ntoh16(p_pending->pkey), block_index, + pkey_index, + cl_ntoh64(osm_node_get_node_guid + (p_node)), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + } + if (ptr == NULL && + CL_SUCCESS != + osm_pkey_tbl_set_accum_pkeys(p_pkey_tbl, + p_pending->pkey, + block_index * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + pkey_index)) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0508: " + "Failed to set accum_pkeys PKey 0x%04x " + "in block %u idx %u for node 0x%016" + PRIx64 " port %u (%s)\n", + cl_ntoh16(p_pending->pkey), block_index, + pkey_index, + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + } + } + free(p_pending); + p_pending = + (osm_pending_pkey_t *) cl_qlist_remove_head(&p_pkey_tbl-> + pending); + } + + p_pkey_tbl->indx0_pkey = 0; + /* now look for changes and store */ + for (block_index = 0; block_index < num_of_blocks; block_index++) { + block = osm_pkey_tbl_block_get(p_pkey_tbl, block_index); + new_block = osm_pkey_tbl_new_block_get(p_pkey_tbl, block_index); + if (!new_block) + new_block = &empty_block; + if (block && !memcmp(new_block, block, sizeof(*block))) + continue; + + status = + pkey_mgr_update_pkey_entry(sm, p_physp, new_block, + block_index); + if (status == IB_SUCCESS) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Updated pkey table block %u for node 0x%016" + PRIx64 " port %u (%s)\n", block_index, + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + else { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0506: " + "pkey_mgr_update_pkey_entry() failed to update " + "pkey table block %u for node 0x%016" PRIx64 + " port %u (%s)\n", block_index, + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(p_physp), + p_physp->p_node->print_desc); + ret = -1; + } + } + + return ret; +} + +static int last_used_pkey_index(const osm_port_t * const p_port, + const osm_pkey_tbl_t * p_pkey_tbl, + uint16_t * p_last_index) +{ + ib_pkey_table_t *last_block; + uint16_t index, last_index = 0; + + CL_ASSERT(p_last_index); + + last_block = osm_pkey_tbl_new_block_get(p_pkey_tbl, + p_pkey_tbl->used_blocks - 1); + if (!last_block) + return 1; + + if (p_pkey_tbl->used_blocks == p_pkey_tbl->max_blocks) + last_index = cl_ntoh16(p_port->p_node->node_info.partition_cap) % IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + if (last_index == 0) + last_index = IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + index = last_index; + do { + index--; + if (!ib_pkey_is_invalid(last_block->pkey_entry[index])) + break; + } while (index != 0); + + *p_last_index = index; + return 0; +} + +static int update_peer_block(osm_log_t * p_log, osm_sm_t * sm, + osm_physp_t * peer, + osm_pkey_tbl_t * p_peer_pkey_tbl, + ib_pkey_table_t * new_peer_block, + uint16_t peer_block_idx, osm_node_t * p_node) +{ + int ret = 0; + ib_pkey_table_t *peer_block; + + peer_block = osm_pkey_tbl_block_get(p_peer_pkey_tbl, peer_block_idx); + if (!peer_block || + memcmp(peer_block, new_peer_block, sizeof(*peer_block))) { + if (pkey_mgr_update_pkey_entry(sm, peer, new_peer_block, + peer_block_idx) != IB_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0509: " + "pkey_mgr_update_pkey_entry() failed to update " + "pkey table block %u for node 0x%016" + PRIx64 " port %u (%s)\n", + peer_block_idx, + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(peer), + p_node->print_desc); + ret = -1; + } + } + + return ret; +} + +static int new_pkey_exists(osm_pkey_tbl_t * p_pkey_tbl, ib_net16_t pkey) +{ + uint16_t num_blocks; + uint16_t block_index; + ib_pkey_table_t *block; + uint16_t pkey_idx; + + num_blocks = (uint16_t) cl_ptr_vector_get_size(&p_pkey_tbl->new_blocks); + for (block_index = 0; block_index < num_blocks; block_index++) { + block = osm_pkey_tbl_new_block_get(p_pkey_tbl, block_index); + if (!block) + continue; + + for (pkey_idx = 0; pkey_idx < IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + pkey_idx++) { + if (block->pkey_entry[pkey_idx] == pkey) + return 1; + } + } + return 0; +} + +static int pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, + const osm_subn_t * p_subn, + const osm_port_t * const p_port, + osm_partition_enforce_type_enum enforce_type) +{ + osm_physp_t *p_physp, *peer; + osm_node_t *p_node; + ib_pkey_table_t *block; + const osm_pkey_tbl_t *p_pkey_tbl; + osm_pkey_tbl_t *p_peer_pkey_tbl; + uint16_t block_index, peer_block_idx; + uint16_t peer_max_blocks; + uint16_t last_index; + ib_pkey_table_t new_peer_block; + uint16_t pkey_idx, peer_pkey_idx; + ib_net16_t pkey, full_pkey; + int ret = 0, loop_exit = 0; + + p_physp = p_port->p_physp; + if (!p_physp) + return -1; + peer = osm_physp_get_remote(p_physp); + if (!peer) + return -1; + p_node = osm_physp_get_node_ptr(peer); + if (!p_node->sw || !p_node->sw->switch_info.enforce_cap) + return 0; + + if (enforce_type == OSM_PARTITION_ENFORCE_TYPE_OFF) { + pkey_mgr_enforce_partition(p_log, sm, peer, OSM_PARTITION_ENFORCE_TYPE_OFF); + return ret; + } + + p_pkey_tbl = osm_physp_get_pkey_tbl(p_physp); + peer_max_blocks = pkey_mgr_get_physp_max_blocks(peer); + p_peer_pkey_tbl = &peer->pkeys; + peer_block_idx = 0; + peer_pkey_idx = 0; + for (block_index = 0; block_index < p_pkey_tbl->used_blocks; + block_index++) { + if (loop_exit) + break; + block = osm_pkey_tbl_new_block_get(p_pkey_tbl, block_index); + if (!block) + continue; + for (pkey_idx = 0; pkey_idx < IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + pkey_idx++) { + pkey = block->pkey_entry[pkey_idx]; + if (ib_pkey_is_invalid(pkey)) + continue; + if (!ib_pkey_is_full_member(pkey)) { + full_pkey = pkey | IB_PKEY_TYPE_MASK; + if (new_pkey_exists(&p_physp->pkeys, full_pkey)) + continue; + } + new_peer_block.pkey_entry[peer_pkey_idx] = pkey; + if (peer_block_idx >= peer_max_blocks) { + loop_exit = 1; + break; + } + if (++peer_pkey_idx == IB_NUM_PKEY_ELEMENTS_IN_BLOCK) { + if (update_peer_block(p_log, sm, peer, + p_peer_pkey_tbl, + &new_peer_block, + peer_block_idx, p_node)) + ret = -1; + peer_pkey_idx = 0; + peer_block_idx++; + } + } + } + + if (peer_block_idx < peer_max_blocks) { + if (peer_pkey_idx) { + /* Handle partial last block */ + for (; peer_pkey_idx < IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + peer_pkey_idx++) + new_peer_block.pkey_entry[peer_pkey_idx] = 0; + if (update_peer_block(p_log, sm, peer, p_peer_pkey_tbl, + &new_peer_block, peer_block_idx, + p_node)) + ret = -1; + } else + peer_block_idx--; + + p_peer_pkey_tbl->used_blocks = peer_block_idx + 1; + if (p_peer_pkey_tbl->used_blocks == peer_max_blocks) { + /* Is last used pkey index beyond switch peer port capacity ? */ + if (!last_used_pkey_index(p_port, p_peer_pkey_tbl, + &last_index)) { + last_index += peer_block_idx * IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + if (cl_ntoh16(p_node->sw->switch_info.enforce_cap) <= last_index) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0507: " + "Not enough pkey entries (%u <= %u) on switch 0x%016" + PRIx64 " port %u (%s). Clearing Enforcement bit\n", + cl_ntoh16(p_node->sw->switch_info.enforce_cap), + last_index, + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(peer), + p_node->print_desc); + enforce_type = OSM_PARTITION_ENFORCE_TYPE_OFF; + ret = -1; + } + } + } + } else { + p_peer_pkey_tbl->used_blocks = peer_max_blocks; + enforce_type = OSM_PARTITION_ENFORCE_TYPE_OFF; + } + + if (!ret) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Pkey table was successfully updated for node 0x%016" + PRIx64 " port %u (%s)\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(peer), p_node->print_desc); + + if (pkey_mgr_enforce_partition(p_log, sm, peer, enforce_type)) + ret = -1; + + return ret; +} + +int osm_pkey_mgr_process(IN osm_opensm_t * p_osm) +{ + cl_qmap_t *p_tbl; + cl_map_item_t *p_next; + osm_prtn_t *p_prtn; + osm_port_t *p_port; + osm_switch_t *p_sw; + osm_physp_t *p_physp; + osm_pkey_tbl_t *p_pkey_tbl; + osm_node_t *p_remote_node; + uint8_t i; + int ret = 0; + + CL_ASSERT(p_osm); + + OSM_LOG_ENTER(&p_osm->log); + + CL_PLOCK_EXCL_ACQUIRE(&p_osm->lock); + + if (osm_prtn_make_partitions(&p_osm->log, &p_osm->subn) != IB_SUCCESS) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 0510: " + "osm_prtn_make_partitions() failed\n"); + ret = -1; + goto _err; + } + + if (!p_osm->subn.opt.keep_pkey_indexes) { + p_tbl = &p_osm->subn.port_guid_tbl; + p_next = cl_qmap_head(p_tbl); + while (p_next != cl_qmap_end(p_tbl)) { + p_port = (osm_port_t *) p_next; + p_next = cl_qmap_next(p_next); + if (!(p_physp = p_port->p_physp)) + continue; + p_pkey_tbl = &p_physp->pkeys; + cl_map_remove_all(&p_pkey_tbl->keys); + cl_map_remove_all(&p_pkey_tbl->accum_pkeys); + p_pkey_tbl->last_pkey_idx=0; + } + } + + /* populate the pending pkey entries by scanning all partitions */ + p_tbl = &p_osm->subn.prtn_pkey_tbl; + p_next = cl_qmap_head(p_tbl); + while (p_next != cl_qmap_end(p_tbl)) { + p_prtn = (osm_prtn_t *) p_next; + p_next = cl_qmap_next(p_next); + pkey_mgr_process_partition_table(&p_osm->log, &p_osm->sm, + p_prtn, FALSE); + pkey_mgr_process_partition_table(&p_osm->log, &p_osm->sm, + p_prtn, TRUE); + } + + /* calculate and set new pkey tables */ + p_tbl = &p_osm->subn.port_guid_tbl; + p_next = cl_qmap_head(p_tbl); + while (p_next != cl_qmap_end(p_tbl)) { + p_port = (osm_port_t *) p_next; + p_next = cl_qmap_next(p_next); + if (pkey_mgr_update_port(&p_osm->log, &p_osm->sm, p_port)) + ret = -1; + if ((osm_node_get_type(p_port->p_node) != IB_NODE_TYPE_SWITCH) + && pkey_mgr_update_peer_port(&p_osm->log, &p_osm->sm, + &p_osm->subn, p_port, + p_osm->subn.opt.part_enforce_enum)) + ret = -1; + } + + /* clear partition enforcement on inter-switch links */ + p_tbl = &p_osm->subn.sw_guid_tbl; + p_next = cl_qmap_head(p_tbl); + while (p_next != cl_qmap_end(p_tbl)) { + p_sw = (osm_switch_t *) p_next; + p_next = cl_qmap_next(p_next); + for (i = 1; i < p_sw->num_ports; i++) { + p_physp = osm_node_get_physp_ptr(p_sw->p_node, i); + if (p_physp && p_physp->p_remote_physp) + p_remote_node = p_physp->p_remote_physp->p_node; + else + continue; + + if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) + continue; + + if(! (p_physp->port_info.vl_enforce & 0xc )) + continue; + + /* clear partition enforcement */ + if (pkey_mgr_enforce_partition(&p_osm->log, &p_osm->sm, p_physp, OSM_PARTITION_ENFORCE_TYPE_OFF)) + ret = -1; + } + } +_err: + CL_PLOCK_RELEASE(&p_osm->lock); + OSM_LOG_EXIT(&p_osm->log); + return ret; +} diff --git a/opensm/osm_pkey_rcv.c b/opensm/osm_pkey_rcv.c new file mode 100644 index 0000000..b64ed7b --- /dev/null +++ b/opensm/osm_pkey_rcv.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_PKEY_RCV_C +#include +#include +#include +#include +#include +#include + +/* + * WE ONLY RECEIVE GET or SET responses + */ +void osm_pkey_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_pkey_table_t *p_pkey_tbl; + ib_smp_t *p_smp; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_node_t *p_node; + osm_pkey_context_t *p_context; + ib_net64_t port_guid; + ib_net64_t node_guid; + uint8_t port_num; + uint16_t block_num; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + p_context = osm_madw_get_pkey_context_ptr(p_madw); + p_pkey_tbl = ib_smp_get_payload_ptr(p_smp); + + port_guid = p_context->port_guid; + node_guid = p_context->node_guid; + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_P_KEY_TABLE); + + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit2; + } + + cl_plock_excl_acquire(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4806: " + "No port object for port with GUID 0x%" PRIx64 + "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 + ", TID 0x%" PRIx64 "\n", cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + goto Exit; + } + + p_node = p_port->p_node; + CL_ASSERT(p_node); + + block_num = (uint16_t) ((cl_ntoh32(p_smp->attr_mod)) & 0x0000FFFF); + /* in case of a non switch node the attr modifier should be ignored */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { + port_num = + (uint8_t) (((cl_ntoh32(p_smp->attr_mod)) & 0x00FF0000) >> + 16); + p_physp = osm_node_get_physp_ptr(p_node, port_num); + } else { + p_physp = p_port->p_physp; + port_num = p_physp->port_num; + } + + /* + We do not care if this is a result of a set or get - + all we want is to update the subnet. + */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Got GetResp(PKey) block:%u port_num %u with GUID 0x%" + PRIx64 " for parent node GUID 0x%" PRIx64 ", TID 0x%" + PRIx64 "\n", block_num, port_num, cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + + /* + Determine if we encountered a new Physical Port. + If so, ignore it. + */ + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4807: " + "Got invalid port number %u\n", port_num); + goto Exit; + } + + osm_dump_pkey_block_v2(sm->p_log, port_guid, block_num, port_num, + p_pkey_tbl, FILE_ID, OSM_LOG_DEBUG); + + osm_physp_set_pkey_tbl(sm->p_log, sm->p_subn, + p_physp, p_pkey_tbl, block_num, + p_context->set_method); + +Exit: + cl_plock_release(sm->p_lock); + +Exit2: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_port.c b/opensm/osm_port.c new file mode 100644 index 0000000..7b1d58f --- /dev/null +++ b/opensm/osm_port.c @@ -0,0 +1,793 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_physp_t. + * This object represents an Infiniband Port. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_PORT_C +#include +#include +#include +#include +#include +#include + +void osm_physp_construct(IN osm_physp_t * p_physp) +{ + memset(p_physp, 0, sizeof(*p_physp)); + osm_dr_path_construct(&p_physp->dr_path); + cl_ptr_vector_construct(&p_physp->slvl_by_port); + osm_pkey_tbl_construct(&p_physp->pkeys); +} + +void osm_physp_destroy(IN osm_physp_t * p_physp) +{ + size_t num_slvl, i; + + /* the physp might be uninitialized */ + if (p_physp->port_guid) { + if (p_physp->p_guids) + free(p_physp->p_guids); + + /* free the SL2VL Tables */ + num_slvl = cl_ptr_vector_get_size(&p_physp->slvl_by_port); + for (i = 0; i < num_slvl; i++) + free(cl_ptr_vector_get(&p_physp->slvl_by_port, i)); + cl_ptr_vector_destroy(&p_physp->slvl_by_port); + + /* free the P_Key Tables */ + osm_pkey_tbl_destroy(&p_physp->pkeys); + + memset(p_physp, 0, sizeof(*p_physp)); + osm_dr_path_construct(&p_physp->dr_path); /* clear dr_path */ + } +} + +void osm_physp_init(IN osm_physp_t * p_physp, IN ib_net64_t port_guid, + IN uint8_t port_num, IN const struct osm_node *p_node, + IN osm_bind_handle_t h_bind, IN uint8_t hop_count, + IN const uint8_t * p_initial_path) +{ + uint16_t num_slvl, i; + ib_slvl_table_t *p_slvl; + + CL_ASSERT(p_node); + + osm_physp_construct(p_physp); + p_physp->port_guid = port_guid; + p_physp->port_num = port_num; + p_physp->healthy = TRUE; + p_physp->need_update = 2; + p_physp->p_node = (struct osm_node *)p_node; + + osm_dr_path_init(&p_physp->dr_path, hop_count, p_initial_path); + + /* allocate enough SL2VL tables */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) + /* we need node num ports + 1 SL2VL tables */ + num_slvl = osm_node_get_num_physp(p_node) + 1; + else + /* An end node - we need only one SL2VL */ + num_slvl = 1; + + cl_ptr_vector_init(&p_physp->slvl_by_port, num_slvl, 1); + for (i = 0; i < num_slvl; i++) { + p_slvl = (ib_slvl_table_t *) malloc(sizeof(ib_slvl_table_t)); + if (!p_slvl) + break; + memset(p_slvl, 0, sizeof(ib_slvl_table_t)); + cl_ptr_vector_set(&p_physp->slvl_by_port, i, p_slvl); + } + + /* initialize the pkey table */ + osm_pkey_tbl_init(&p_physp->pkeys); +} + +void osm_port_delete(IN OUT osm_port_t ** pp_port) +{ + free(*pp_port); + *pp_port = NULL; +} + +osm_port_t *osm_port_new(IN const ib_node_info_t * p_ni, + IN osm_node_t * p_parent_node) +{ + osm_port_t *p_port; + ib_net64_t port_guid; + osm_physp_t *p_physp; + uint8_t port_num; + + p_port = malloc(sizeof(*p_port)); + if (!p_port) + return NULL; + + memset(p_port, 0, sizeof(*p_port)); + cl_qlist_init(&p_port->mcm_list); + p_port->p_node = (struct osm_node *)p_parent_node; + port_guid = p_ni->port_guid; + p_port->guid = port_guid; + port_num = p_ni->node_type == IB_NODE_TYPE_SWITCH ? + 0 : ib_node_info_get_local_port_num(p_ni); + + /* + Get the pointers to the physical node objects "owned" by this + logical port GUID. + For switches, port '0' is owned; for HCA's and routers, + only the singular part that has this GUID is owned. + */ + p_physp = osm_node_get_physp_ptr(p_parent_node, port_num); + if (!p_physp) { + free(p_port); + return NULL; + } + + CL_ASSERT(port_guid == osm_physp_get_port_guid(p_physp)); + p_port->p_physp = p_physp; + + return p_port; +} + +void osm_port_get_lid_range_ho(IN const osm_port_t * p_port, + IN uint16_t * p_min_lid, IN uint16_t * p_max_lid) +{ + uint8_t lmc; + + *p_min_lid = cl_ntoh16(osm_port_get_base_lid(p_port)); + lmc = osm_port_get_lmc(p_port); + *p_max_lid = (uint16_t) (*p_min_lid + (1 << lmc) - 1); +} + +uint8_t osm_physp_calc_link_mtu(IN osm_log_t * p_log, + IN const osm_physp_t * p_physp, + IN uint8_t current_mtu) +{ + const osm_physp_t *p_remote_physp; + uint8_t mtu; + uint8_t remote_mtu; + + OSM_LOG_ENTER(p_log); + + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_physp) { + /* use the available MTU */ + mtu = ib_port_info_get_mtu_cap(&p_physp->port_info); + + remote_mtu = + ib_port_info_get_mtu_cap(&p_remote_physp->port_info); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Remote port 0x%016" PRIx64 " port = %u : " + "MTU = %u. This Port MTU: %u\n", + cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), + osm_physp_get_port_num(p_remote_physp), + remote_mtu, mtu); + + if (mtu != remote_mtu) { + if (mtu > remote_mtu) + mtu = remote_mtu; + if (mtu != current_mtu) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "MTU mismatch between ports." + "\n\t\t\t\tPort 0x%016" PRIx64 ", port %u" + " and port 0x%016" PRIx64 ", port %u." + "\n\t\t\t\tUsing lower MTU of %u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + osm_physp_get_port_num(p_physp), + cl_ntoh64(osm_physp_get_port_guid + (p_remote_physp)), + osm_physp_get_port_num(p_remote_physp), mtu); + } + } else + mtu = ib_port_info_get_neighbor_mtu(&p_physp->port_info); + + if (mtu == 0) { + OSM_LOG(p_log, OSM_LOG_DEBUG, "ERR 4101: " + "Invalid MTU = 0. Forcing correction to 256\n"); + mtu = 1; + } + + OSM_LOG_EXIT(p_log); + return mtu; +} + +uint8_t osm_physp_calc_link_op_vls(IN osm_log_t * p_log, + IN const osm_subn_t * p_subn, + IN const osm_physp_t * p_physp, + IN uint8_t current_op_vls) +{ + const osm_physp_t *p_remote_physp; + uint8_t op_vls; + uint8_t remote_op_vls; + + OSM_LOG_ENTER(p_log); + + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_physp) { + /* use the available VLCap */ + op_vls = ib_port_info_get_vl_cap(&p_physp->port_info); + + remote_op_vls = + ib_port_info_get_vl_cap(&p_remote_physp->port_info); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Remote port 0x%016" PRIx64 " port = 0x%X : " + "VL_CAP = %u. This port VL_CAP = %u\n", + cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), + osm_physp_get_port_num(p_remote_physp), + remote_op_vls, op_vls); + + if (op_vls != remote_op_vls) { + if (op_vls > remote_op_vls) + op_vls = remote_op_vls; + if (op_vls != current_op_vls) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "OP_VLS mismatch between ports." + "\n\t\t\t\tPort 0x%016" PRIx64 ", port 0x%X" + " and port 0x%016" PRIx64 ", port 0x%X." + "\n\t\t\t\tUsing lower OP_VLS of %u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + osm_physp_get_port_num(p_physp), + cl_ntoh64(osm_physp_get_port_guid + (p_remote_physp)), + osm_physp_get_port_num(p_remote_physp), op_vls); + } + } else + op_vls = ib_port_info_get_op_vls(&p_physp->port_info); + + if (op_vls == 0) { + /* for non compliant implementations */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Invalid OP_VLS = 0. Forcing correction to 1 (VL0)\n"); + op_vls = 1; + } + + /* support user limitation of max_op_vls */ + if (op_vls > p_subn->opt.max_op_vls) + op_vls = p_subn->opt.max_op_vls; + + OSM_LOG_EXIT(p_log); + return op_vls; +} + +static inline uint64_t ptr_to_key(void const *p) +{ + uint64_t k = 0; + + memcpy(&k, p, sizeof(void *)); + return k; +} + +#if 0 +static inline void *key_to_ptr(uint64_t k) +{ + void *p = 0; + + memcpy(&p, &k, sizeof(void *)); + return p; +} +#endif + +/********************************************************************** + Traverse the fabric from the SM node following the DR path given and + add every phys port traversed to the map. Avoid tracking the first and + last phys ports (going into the first switch and into the target port). + **********************************************************************/ +static cl_status_t physp_get_dr_physp_set(IN osm_log_t * p_log, + IN osm_subn_t const *p_subn, + IN osm_dr_path_t const *p_path, + OUT cl_map_t * p_physp_map) +{ + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_node_t *p_node; + uint8_t hop; + cl_status_t status = CL_SUCCESS; + + OSM_LOG_ENTER(p_log); + + /* find the OSM node */ + p_port = osm_get_port_by_guid(p_subn, p_subn->sm_port_guid); + if (!p_port) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4103: " + "Failed to find the SM own port by guid\n"); + status = CL_ERROR; + goto Exit; + } + + /* get the node of the SM */ + p_node = p_port->p_node; + + /* + traverse the path adding the nodes to the table + start after the first dummy hop and stop just before the + last one + */ + for (hop = 1; hop < p_path->hop_count - 1; hop++) { + /* go out using the phys port of the path */ + p_physp = osm_node_get_physp_ptr(p_node, p_path->path[hop]); + + /* make sure we got a valid port and it has a remote port */ + if (!p_physp) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4104: " + "DR Traversal stopped on invalid port at hop:%u\n", + hop); + status = CL_ERROR; + goto Exit; + } + + /* we track the ports we go out along the path */ + if (hop > 1) + cl_map_insert(p_physp_map, ptr_to_key(p_physp), NULL); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Traversed through node: 0x%016" PRIx64 + " port:%u\n", + cl_ntoh64(p_node->node_info.node_guid), + p_path->path[hop]); + + if (!(p_physp = osm_physp_get_remote(p_physp))) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4106: " + "DR Traversal stopped on missing remote physp at hop:%u\n", + hop); + status = CL_ERROR; + goto Exit; + } + + p_node = osm_physp_get_node_ptr(p_physp); + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +static void physp_update_new_dr_path(IN osm_physp_t const *p_dest_physp, + IN cl_map_t * p_visited_map, + IN osm_bind_handle_t * h_bind) +{ + cl_list_t tmpPortsList; + osm_physp_t *p_physp, *p_src_physp = NULL; + uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; + uint8_t i = 0; + osm_dr_path_t *p_dr_path; + + cl_list_construct(&tmpPortsList); + cl_list_init(&tmpPortsList, 10); + + cl_list_insert_head(&tmpPortsList, p_dest_physp); + /* get the output port where we need to come from */ + p_physp = (osm_physp_t *) cl_map_get(p_visited_map, + ptr_to_key(p_dest_physp)); + while (p_physp != NULL) { + cl_list_insert_head(&tmpPortsList, p_physp); + /* get the input port through where we reached the output port */ + p_src_physp = p_physp; + p_physp = (osm_physp_t *) cl_map_get(p_visited_map, + ptr_to_key(p_physp)); + /* if we reached a null p_physp - this means we are at the begining + of the path. Break. */ + if (p_physp == NULL) + break; + /* get the output port */ + p_physp = (osm_physp_t *) cl_map_get(p_visited_map, + ptr_to_key(p_physp)); + } + + memset(path_array, 0, sizeof(path_array)); + p_physp = (osm_physp_t *) cl_list_remove_head(&tmpPortsList); + while (p_physp != NULL) { + i++; + path_array[i] = p_physp->port_num; + p_physp = (osm_physp_t *) cl_list_remove_head(&tmpPortsList); + } + if (p_src_physp) { + p_dr_path = osm_physp_get_dr_path_ptr(p_src_physp); + osm_dr_path_init(p_dr_path, i, path_array); + } + + cl_list_destroy(&tmpPortsList); +} + +void osm_physp_replace_dr_path_with_alternate_dr_path(IN osm_log_t * p_log, + IN osm_subn_t const + *p_subn, IN osm_physp_t const + *p_dest_physp, + IN osm_bind_handle_t * + h_bind) +{ + cl_map_t physp_map; + cl_map_t visited_map; + osm_dr_path_t *p_dr_path; + cl_list_t *p_currPortsList; + cl_list_t *p_nextPortsList; + osm_port_t *p_port; + osm_physp_t *p_physp, *p_remote_physp; + ib_net64_t port_guid; + boolean_t next_list_is_full = TRUE, reached_dest = FALSE; + uint8_t num_ports, port_num; + + p_nextPortsList = (cl_list_t *) malloc(sizeof(cl_list_t)); + if (!p_nextPortsList) + return; + + /* + initialize the map of all port participating in current dr path + not including first and last switches + */ + cl_map_construct(&physp_map); + cl_map_init(&physp_map, 4); + cl_map_construct(&visited_map); + cl_map_init(&visited_map, 4); + p_dr_path = osm_physp_get_dr_path_ptr(p_dest_physp); + physp_get_dr_physp_set(p_log, p_subn, p_dr_path, &physp_map); + + /* + BFS from OSM port until we find the target physp but avoid + going through mapped ports + */ + cl_list_construct(p_nextPortsList); + cl_list_init(p_nextPortsList, 10); + + port_guid = p_subn->sm_port_guid; + + CL_ASSERT(port_guid); + + p_port = osm_get_port_by_guid(p_subn, port_guid); + if (!p_port) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4105: No SM port object\n"); + goto Exit; + } + + /* + HACK: We are assuming SM is running on HCA, so when getting the default + port we'll get the port connected to the rest of the subnet. If SM is + running on SWITCH - we should try to get a dr path from all switch ports. + */ + p_physp = p_port->p_physp; + + CL_ASSERT(p_physp); + + cl_list_insert_tail(p_nextPortsList, p_physp); + + while (next_list_is_full == TRUE) { + next_list_is_full = FALSE; + p_currPortsList = p_nextPortsList; + p_nextPortsList = (cl_list_t *) malloc(sizeof(cl_list_t)); + if (!p_nextPortsList) { + p_nextPortsList = p_currPortsList; + goto Exit; + } + cl_list_construct(p_nextPortsList); + cl_list_init(p_nextPortsList, 10); + p_physp = (osm_physp_t *) cl_list_remove_head(p_currPortsList); + while (p_physp != NULL) { + /* If we are in a switch - need to go out through all + the other physical ports of the switch */ + num_ports = osm_node_get_num_physp(p_physp->p_node); + + for (port_num = 1; port_num < num_ports; port_num++) { + if (osm_node_get_type(p_physp->p_node) == + IB_NODE_TYPE_SWITCH) + p_remote_physp = + osm_node_get_physp_ptr(p_physp-> + p_node, + port_num); + else + /* this is HCA or router - the remote port is just the port connected + on the other side */ + p_remote_physp = + p_physp->p_remote_physp; + + /* + make sure that all of the following occurred: + 1. The port isn't NULL + 2. This is not the port we came from + 3. The port is not in the physp_map + 4. This port haven't been visited before + */ + if (p_remote_physp && + p_remote_physp != p_physp && + cl_map_get(&physp_map, + ptr_to_key(p_remote_physp)) + == NULL + && cl_map_get(&visited_map, + ptr_to_key + (p_remote_physp)) == NULL) { + /* Insert the port into the visited_map, and save its source port */ + cl_map_insert(&visited_map, + ptr_to_key + (p_remote_physp), + p_physp); + + /* Is this the p_dest_physp? */ + if (p_remote_physp == p_dest_physp) { + /* update the new dr path */ + physp_update_new_dr_path + (p_dest_physp, &visited_map, + h_bind); + reached_dest = TRUE; + break; + } + + /* add the p_remote_physp to the nextPortsList */ + cl_list_insert_tail(p_nextPortsList, + p_remote_physp); + next_list_is_full = TRUE; + } + } + + p_physp = (osm_physp_t *) + cl_list_remove_head(p_currPortsList); + if (reached_dest == TRUE) { + /* free the rest of the currPortsList */ + while (p_physp != NULL) + p_physp = (osm_physp_t *) + cl_list_remove_head + (p_currPortsList); + /* free the nextPortsList, if items were added to it */ + p_physp = (osm_physp_t *) + cl_list_remove_head(p_nextPortsList); + while (p_physp != NULL) + p_physp = (osm_physp_t *) + cl_list_remove_head + (p_nextPortsList); + next_list_is_full = FALSE; + } + } + cl_list_destroy(p_currPortsList); + free(p_currPortsList); + } + + /* cleanup */ +Exit: + cl_list_destroy(p_nextPortsList); + free(p_nextPortsList); + cl_map_destroy(&physp_map); + cl_map_destroy(&visited_map); +} + +boolean_t osm_link_is_healthy(IN const osm_physp_t * p_physp) +{ + osm_physp_t *p_remote_physp; + + CL_ASSERT(p_physp); + p_remote_physp = p_physp->p_remote_physp; + if (p_remote_physp != NULL) + return ((p_physp->healthy) & (p_remote_physp->healthy)); + /* the other side is not known - consider the link as healthy */ + return TRUE; +} + +boolean_t osm_link_is_throttled(IN osm_physp_t * p_physp, + IN const boolean_t subn_has_fdr10_enabled) +{ + osm_physp_t *p_remote; + uint8_t speed_physp, speed_remote, width_physp, width_remote; + uint8_t highest_speed, highest_width; + boolean_t physp_has_extended_speeds_capability; + boolean_t remote_has_extended_speeds_capability; + ib_port_info_t *p_physp_info, *p_remote_info; + + CL_ASSERT(p_physp); + p_remote = p_physp->p_remote_physp; + + /* the other side is not known - consider the link as unthrottled */ + if (!p_remote) + return FALSE; + + /* only SP0 (and not Sw Ext.) have a valid CapabilityMask */ + if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) + p_physp_info = + &(osm_node_get_physp_ptr(p_physp->p_node, 0)->port_info); + else + p_physp_info = &p_physp->port_info; + if (osm_node_get_type(p_remote->p_node) == IB_NODE_TYPE_SWITCH) + p_remote_info = + &(osm_node_get_physp_ptr(p_remote->p_node, 0)->port_info); + else + p_remote_info = &p_remote->port_info; + + physp_has_extended_speeds_capability = + p_physp_info->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + remote_has_extended_speeds_capability = + p_remote_info->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + + /* reset again to the original port_info */ + p_physp_info = &p_physp->port_info; + p_remote_info = &p_remote->port_info; + + /* first determine the enabled link speed/width of both sides */ + speed_physp = + (physp_has_extended_speeds_capability ? + ib_port_info_get_link_speed_ext_enabled(p_physp_info) << 4 : 0) + + (subn_has_fdr10_enabled ? + (p_physp->ext_port_info.link_speed_enabled & FDR10) << 3 : 0) + + ib_port_info_get_link_speed_enabled(p_physp_info); + width_physp = p_physp_info->link_width_enabled; + + speed_remote = + (remote_has_extended_speeds_capability ? + ib_port_info_get_link_speed_ext_enabled(p_remote_info) << 4 : 0) + + (subn_has_fdr10_enabled ? + (p_remote->ext_port_info.link_speed_enabled & FDR10) << 3 : 0) + + ib_port_info_get_link_speed_enabled(p_remote_info); + width_remote = p_remote_info->link_width_enabled; + + highest_speed = ib_get_highest_link_speed(speed_physp & speed_remote); + highest_width = ib_get_highest_link_width(width_physp & width_remote); + + /* and now determine the currently active link speed/width */ + speed_physp = + (physp_has_extended_speeds_capability ? + ib_port_info_get_link_speed_ext_active(p_physp_info) << 4 : 0) + + (subn_has_fdr10_enabled ? + (p_physp->ext_port_info.link_speed_active & FDR10) << 3 : 0) + + ib_port_info_get_link_speed_active(p_physp_info); + speed_physp = ib_get_highest_link_speed(speed_physp); + width_physp = p_physp_info->link_width_active; + + speed_remote = + (remote_has_extended_speeds_capability ? + ib_port_info_get_link_speed_ext_active(p_remote_info) << 4 : 0) + + (subn_has_fdr10_enabled ? + (p_remote->ext_port_info.link_speed_active & FDR10) << 3 : 0) + + ib_port_info_get_link_speed_active(p_remote_info); + speed_remote = ib_get_highest_link_speed(speed_remote); + width_remote = p_remote_info->link_width_active; + + /* check if the link supports same speed in both directions + and whether or not it runs at maximum speed/width which is + enabled by both ends (if not then its considered 'throttled') + */ + if (speed_physp != speed_remote || speed_physp != highest_speed || + width_physp != width_remote || width_physp != highest_width) + return TRUE; + + return FALSE; +} + +void osm_physp_set_pkey_tbl(IN osm_log_t * p_log, IN const osm_subn_t * p_subn, + IN osm_physp_t * p_physp, + IN ib_pkey_table_t * p_pkey_tbl, + IN uint16_t block_num, + IN boolean_t is_set) +{ + uint16_t max_blocks; + + CL_ASSERT(p_pkey_tbl); + /* + (14.2.5.7) - the block number valid values are 0-2047, and are + further limited by the size of the P_Key table specified by + the PartitionCap on the node. + */ + if (!p_physp->p_node->sw || p_physp->port_num == 0) + /* + The maximum blocks is defined in the node info: partition cap + for CA, router, and switch management ports. + */ + max_blocks = + (cl_ntoh16(p_physp->p_node->node_info.partition_cap) + + IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1) + / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + else + /* + This is a switch, and not a management port. The maximum + blocks is defined in the switch info: partition enforcement + cap. + */ + max_blocks = + (cl_ntoh16(p_physp->p_node->sw->switch_info.enforce_cap) + + IB_NUM_PKEY_ELEMENTS_IN_BLOCK - + 1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + + if (block_num >= max_blocks) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4108: " + "Got illegal update for block number:%u max:%u " + "for GUID: %" PRIx64 " port number:%u\n", + block_num, max_blocks, + cl_ntoh64(p_physp->p_node->node_info.node_guid), + p_physp->port_num); + return; + } + + /* decrement block received counter */ + if(!is_set) + p_physp->pkeys.rcv_blocks_cnt--; + osm_pkey_tbl_set(&p_physp->pkeys, block_num, p_pkey_tbl, + p_subn->opt.allow_both_pkeys); +} + +osm_alias_guid_t *osm_alias_guid_new(IN const ib_net64_t alias_guid, + IN osm_port_t *p_base_port) +{ + osm_alias_guid_t *p_alias_guid; + + p_alias_guid = calloc(1, sizeof(*p_alias_guid)); + if (p_alias_guid) { + p_alias_guid->alias_guid = alias_guid; + p_alias_guid->p_base_port = p_base_port; + } + return p_alias_guid; +} + +void osm_alias_guid_delete(IN OUT osm_alias_guid_t ** pp_alias_guid) +{ + free(*pp_alias_guid); + *pp_alias_guid = NULL; +} + +void osm_physp_set_port_info(IN osm_physp_t * p_physp, + IN const ib_port_info_t * p_pi, + IN const struct osm_sm * p_sm) +{ + CL_ASSERT(p_pi); + CL_ASSERT(osm_physp_is_valid(p_physp)); + + if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN) { + /* If PortState is down, only copy PortState */ + /* and PortPhysicalState per C14-24-2.1 */ + ib_port_info_set_port_state(&p_physp->port_info, IB_LINK_DOWN); + ib_port_info_set_port_phys_state + (ib_port_info_get_port_phys_state(p_pi), + &p_physp->port_info); + } else { + p_physp->port_info = *p_pi; + + /* The MKey in p_pi can only be considered valid if it's + * for a HCA/router or switch port 0, and it's either + * non-zero or the MKeyProtect bits are also zero. + */ + if ((osm_node_get_type(p_physp->p_node) != + IB_NODE_TYPE_SWITCH || p_physp->port_num == 0) && + (p_pi->m_key != 0 || ib_port_info_get_mpb(p_pi) == 0)) + osm_db_guid2mkey_set(p_sm->p_subn->p_g2m, + cl_ntoh64(p_physp->port_guid), + cl_ntoh64(p_pi->m_key)); + } +} diff --git a/opensm/osm_port_info_rcv.c b/opensm/osm_port_info_rcv.c new file mode 100644 index 0000000..d3eca8c --- /dev/null +++ b/opensm/osm_port_info_rcv.c @@ -0,0 +1,825 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_pi_rcv_t. + * This object represents the PortInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_PORT_INFO_RCV_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void pi_rcv_check_and_fix_lid(osm_log_t * log, ib_port_info_t * pi, + osm_physp_t * p) +{ + if (PF(cl_ntoh16(pi->base_lid) > IB_LID_UCAST_END_HO)) { + OSM_LOG(log, OSM_LOG_ERROR, "ERR 0F04: " + "Got invalid base LID %u from the network. " + "Corrected to %u\n", cl_ntoh16(pi->base_lid), + cl_ntoh16(p->port_info.base_lid)); + pi->base_lid = p->port_info.base_lid; + } +} + +static void pi_rcv_process_endport(IN osm_sm_t * sm, IN osm_physp_t * p_physp, + IN const ib_port_info_t * p_pi) +{ + osm_madw_context_t context; + ib_api_status_t status; + ib_net64_t port_guid; + int extended; + uint8_t rate, mtu, mpb; + unsigned data_vls; + cl_qmap_t *p_sm_tbl; + osm_remote_sm_t *p_sm; + + OSM_LOG_ENTER(sm->p_log); + + port_guid = osm_physp_get_port_guid(p_physp); + + /* HACK extended port 0 should be handled too! */ + if (osm_physp_get_port_num(p_physp) != 0 && + ib_port_info_get_port_state(p_pi) != IB_LINK_DOWN) { + /* track the minimal endport MTU, rate, and operational VLs */ + mtu = ib_port_info_get_mtu_cap(p_pi); + if (mtu < sm->p_subn->min_ca_mtu) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Setting endport minimal MTU to:%u defined by port:0x%" + PRIx64 "\n", mtu, cl_ntoh64(port_guid)); + sm->p_subn->min_ca_mtu = mtu; + } + + extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + rate = ib_port_info_compute_rate(p_pi, extended); + if (ib_path_compare_rates(rate, sm->p_subn->min_ca_rate) < 0) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Setting endport minimal rate to:%u defined by port:0x%" + PRIx64 "\n", rate, cl_ntoh64(port_guid)); + sm->p_subn->min_ca_rate = rate; + } + + data_vls = 1U << (ib_port_info_get_vl_cap(p_pi) - 1); + if (data_vls > 1U << (sm->p_subn->opt.max_op_vls - 1)) + data_vls = 1U << (sm->p_subn->opt.max_op_vls - 1); + if (data_vls >= IB_MAX_NUM_VLS) + data_vls = IB_MAX_NUM_VLS - 1; + if ((uint8_t)data_vls < sm->p_subn->min_data_vls) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Setting endport minimal data VLs to:%u defined by port:0x%" + PRIx64 "\n", data_vls, cl_ntoh64(port_guid)); + sm->p_subn->min_data_vls = data_vls; + } + } + + /* Check M_Key vs M_Key protect, can we control the port ? */ + mpb = ib_port_info_get_mpb(p_pi); + if (mpb > 0 && p_pi->m_key == 0) { + OSM_LOG(sm->p_log, OSM_LOG_INFO, + "Port 0x%" PRIx64 " has unknown M_Key, protection level %u\n", + cl_ntoh64(port_guid), mpb); + } + + if (port_guid != sm->p_subn->sm_port_guid) { + p_sm_tbl = &sm->p_subn->sm_guid_tbl; + if (p_pi->capability_mask & IB_PORT_CAP_IS_SM) { + /* + * Before querying the SM - we want to make sure we + * clean its state, so if the querying fails we + * recognize that this SM is not active. + */ + p_sm = + (osm_remote_sm_t *) cl_qmap_get(p_sm_tbl, + port_guid); + if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl)) + /* clean it up */ + p_sm->smi.pri_state = + 0xF0 & p_sm->smi.pri_state; + if (sm->p_subn->opt.ignore_other_sm) + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Ignoring SM on port 0x%" PRIx64 "\n", + cl_ntoh64(port_guid)); + else { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Detected another SM. Requesting SMInfo " + "from port 0x%" PRIx64 "\n", + cl_ntoh64(port_guid)); + + /* + This port indicates it's an SM and + it's not our own port. + Acquire the SMInfo Attribute. + */ + memset(&context, 0, sizeof(context)); + context.smi_context.set_method = FALSE; + context.smi_context.port_guid = port_guid; + status = osm_req_get(sm, + osm_physp_get_dr_path_ptr + (p_physp), + IB_MAD_ATTR_SM_INFO, 0, + FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, + &context); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0F05: " + "Failure requesting SMInfo (%s) " + "from port 0x%" PRIx64 "\n", + ib_get_err_str(status), + cl_ntoh64(port_guid)); + } + } else { + p_sm = + (osm_remote_sm_t *) cl_qmap_remove(p_sm_tbl, + port_guid); + if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl)) + free(p_sm); + } + } + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void pi_rcv_process_switch_port0(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN osm_physp_t * p_physp, + IN ib_port_info_t * p_pi) +{ + ib_api_status_t status; + osm_madw_context_t context; + uint8_t port, num_ports; + + OSM_LOG_ENTER(sm->p_log); + + if (p_physp->need_update) + sm->p_subn->ignore_existing_lfts = TRUE; + + pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp); + + /* Update the PortInfo attribute */ + osm_physp_set_port_info(p_physp, p_pi, sm); + + /* Determine if base switch port 0 */ + if (p_node->sw && + !ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)) + /* PortState is not used on BSP0 but just in case it is DOWN */ + p_physp->port_info = *p_pi; + + /* Now, query PortInfo for the switch external ports */ + num_ports = osm_node_get_num_physp(p_node); + + context.pi_context.node_guid = osm_node_get_node_guid(p_node); + context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); + context.pi_context.set_method = FALSE; + context.pi_context.light_sweep = FALSE; + context.pi_context.active_transition = FALSE; + context.pi_context.client_rereg = FALSE; + + for (port = 1; port < num_ports; port++) { + status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), + IB_MAD_ATTR_PORT_INFO, cl_hton32(port), + FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F16: " + "Failure initiating PortInfo request (%s)\n", + ib_get_err_str(status)); + } + + pi_rcv_process_endport(sm, p_physp, p_pi); + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void pi_rcv_process_switch_ext_port(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN osm_physp_t * p_physp, + IN ib_port_info_t * p_pi) +{ + ib_api_status_t status = IB_SUCCESS; + osm_madw_context_t context; + osm_physp_t *p_remote_physp, *physp0; + osm_node_t *p_remote_node; + ib_net64_t m_key; + unsigned data_vls; + uint8_t port_num; + uint8_t remote_port_num; + osm_dr_path_t path; + int mlnx_epi_supported = 0; + + OSM_LOG_ENTER(sm->p_log); + + /* + Check the state of the physical port. + If there appears to be something on the other end of the wire, + then ask for NodeInfo. Ignore the switch management port. + */ + port_num = osm_physp_get_port_num(p_physp); + + if (sm->p_subn->opt.fdr10) + mlnx_epi_supported = is_mlnx_ext_port_info_supported( + ib_node_info_get_vendor_id(&p_node->node_info), + p_node->node_info.device_id); + + /* if in_sweep_hop_0 is TRUE, then this means the SM is on the switch, + and we got switchInfo of our local switch. Do not continue + probing through the switch. */ + switch (ib_port_info_get_port_state(p_pi)) { + case IB_LINK_DOWN: + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_physp) { + p_remote_node = + osm_physp_get_node_ptr(p_remote_physp); + remote_port_num = + osm_physp_get_port_num(p_remote_physp); + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Unlinking local node 0x%" PRIx64 + ", port %u" + "\n\t\t\t\tand remote node 0x%" PRIx64 + ", port %u\n", + cl_ntoh64(osm_node_get_node_guid + (p_node)), port_num, + cl_ntoh64(osm_node_get_node_guid + (p_remote_node)), + remote_port_num); + + if (sm->ucast_mgr.cache_valid) + osm_ucast_cache_add_link(&sm->ucast_mgr, + p_physp, + p_remote_physp); + + osm_node_unlink(p_node, (uint8_t) port_num, + p_remote_node, + (uint8_t) remote_port_num); + + } + break; + + case IB_LINK_INIT: + case IB_LINK_ARMED: + case IB_LINK_ACTIVE: + physp0 = osm_node_get_physp_ptr(p_node, 0); + if (mlnx_epi_supported) { + m_key = ib_port_info_get_m_key(&physp0->port_info); + + context.pi_context.node_guid = osm_node_get_node_guid(p_node); + context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); + context.pi_context.set_method = FALSE; + context.pi_context.light_sweep = FALSE; + context.pi_context.active_transition = FALSE; + context.pi_context.client_rereg = FALSE; + status = osm_req_get(sm, + osm_physp_get_dr_path_ptr(p_physp), + IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO, + cl_hton32(port_num), FALSE, m_key, + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F11: " + "Failure initiating MLNX ExtPortInfo request (%s)\n", + ib_get_err_str(status)); + } + if (sm->p_subn->in_sweep_hop_0 == FALSE) { + /* + To avoid looping forever, only probe the port if it + is NOT the port that responded to the SMP. + + Request node info from the other end of this link: + 1) Copy the current path from the parent node. + 2) Extend the path to the next hop thru this port. + 3) Request node info with the new path + + */ + if (p_pi->local_port_num != + osm_physp_get_port_num(p_physp)) { + path = *osm_physp_get_dr_path_ptr(p_physp); + + if (osm_dr_path_extend(&path, + osm_physp_get_port_num + (p_physp))) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0F08: " + "DR path with hop count %d couldn't be extended\n", + path.hop_count); + break; + } + + memset(&context, 0, sizeof(context)); + context.ni_context.node_guid = + osm_node_get_node_guid(p_node); + context.ni_context.port_num = + osm_physp_get_port_num(p_physp); + + status = osm_req_get(sm, &path, + IB_MAD_ATTR_NODE_INFO, 0, + TRUE, 0, 0, + CL_DISP_MSGID_NONE, + &context); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0F02: " + "Failure initiating NodeInfo request (%s)\n", + ib_get_err_str(status)); + } else + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Skipping SMP responder port %u\n", + p_pi->local_port_num); + } + break; + + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F03: " + "Unknown link state = %u, port = %u\n", + ib_port_info_get_port_state(p_pi), + p_pi->local_port_num); + break; + } + + if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw && + !ib_switch_info_get_state_change(&p_node->sw->switch_info) && + p_node->sw->need_update == 1) + p_node->sw->need_update = 0; + + if (p_physp->need_update) + sm->p_subn->ignore_existing_lfts = TRUE; + + /* + Update the PortInfo attribute. + */ + osm_physp_set_port_info(p_physp, p_pi, sm); + + if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN) + goto Exit; + + p_remote_physp = osm_physp_get_remote(p_physp); + if (p_remote_physp) { + p_remote_node = osm_physp_get_node_ptr(p_remote_physp); + if (p_remote_node->sw) { + data_vls = 1U << (ib_port_info_get_vl_cap(p_pi) - 1); + if (data_vls > 1U << (sm->p_subn->opt.max_op_vls - 1)) + data_vls = 1U << (sm->p_subn->opt.max_op_vls - 1); + if (data_vls >= IB_MAX_NUM_VLS) + data_vls = IB_MAX_NUM_VLS - 1; + if ((uint8_t)data_vls < sm->p_subn->min_sw_data_vls) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Setting switch port minimal data VLs " + "to:%u defined by node:0x%" + PRIx64 ", port:%u\n", data_vls, + cl_ntoh64(osm_node_get_node_guid(p_node)), + port_num); + sm->p_subn->min_sw_data_vls = data_vls; + } + } + } + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void pi_rcv_process_ca_or_router_port(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN osm_physp_t * p_physp, + IN ib_port_info_t * p_pi) +{ + OSM_LOG_ENTER(sm->p_log); + + UNUSED_PARAM(p_node); + + pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp); + + osm_physp_set_port_info(p_physp, p_pi, sm); + + pi_rcv_process_endport(sm, p_physp, p_pi); + + OSM_LOG_EXIT(sm->p_log); +} + +#define IBM_VENDOR_ID (0x5076) +static void get_pkey_table(IN osm_log_t * p_log, IN osm_sm_t * sm, + IN osm_node_t * p_node, IN osm_physp_t * p_physp) +{ + + osm_madw_context_t context; + ib_api_status_t status; + osm_dr_path_t path; + osm_physp_t *physp0; + ib_net64_t m_key; + uint8_t port_num; + uint16_t block_num, max_blocks; + uint32_t attr_mod_ho; + + OSM_LOG_ENTER(p_log); + + path = *osm_physp_get_dr_path_ptr(p_physp); + + context.pkey_context.node_guid = osm_node_get_node_guid(p_node); + context.pkey_context.port_guid = osm_physp_get_port_guid(p_physp); + context.pkey_context.set_method = FALSE; + + port_num = p_physp->port_num; + + if (!p_node->sw || port_num == 0) + /* The maximum blocks is defined by the node info partition cap + for CA, router, and switch management ports. */ + max_blocks = + (cl_ntoh16(p_node->node_info.partition_cap) + + IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1) + / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + else { + /* This is a switch, and not a management port. The maximum blocks + is defined in the switch info partition enforcement cap. */ + + /* Check for IBM eHCA firmware defect in reporting partition enforcement cap */ + if (cl_ntoh32(ib_node_info_get_vendor_id(&p_node->node_info)) == + IBM_VENDOR_ID) + p_node->sw->switch_info.enforce_cap = 0; + + /* Bail out if this is a switch with no partition enforcement capability */ + if (cl_ntoh16(p_node->sw->switch_info.enforce_cap) == 0) + goto Exit; + + max_blocks = (cl_ntoh16(p_node->sw->switch_info.enforce_cap) + + IB_NUM_PKEY_ELEMENTS_IN_BLOCK - + 1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; + } + + p_physp->pkeys.rcv_blocks_cnt = max_blocks; + for (block_num = 0; block_num < max_blocks; block_num++) { + if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || + osm_physp_get_port_num(p_physp) == 0) { + attr_mod_ho = block_num; + m_key = ib_port_info_get_m_key(&p_physp->port_info); + } else { + attr_mod_ho = block_num | (port_num << 16); + physp0 = osm_node_get_physp_ptr(p_node, 0); + m_key = ib_port_info_get_m_key(&physp0->port_info); + } + status = osm_req_get(sm, &path, IB_MAD_ATTR_P_KEY_TABLE, + cl_hton32(attr_mod_ho), FALSE, + m_key, 0, CL_DISP_MSGID_NONE, &context); + + if (status != IB_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0F12: " + "Failure initiating PKeyTable request (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + +Exit: + OSM_LOG_EXIT(p_log); +} + +static void pi_rcv_get_pkey_slvl_vla_tables(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN osm_physp_t * p_physp) +{ + OSM_LOG_ENTER(sm->p_log); + + get_pkey_table(sm->p_log, sm, p_node, p_physp); + + OSM_LOG_EXIT(sm->p_log); +} + +static int osm_pi_rcv_update_self(IN osm_sm_t *sm, IN osm_physp_t *p_physp, + IN ib_port_info_t *p_pi) +{ + if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN) + return 0; + + if (sm->p_subn->need_update || p_physp->need_update > 1 || + ib_port_info_get_port_state(p_pi) == IB_LINK_INIT) + return 1; + + return 0; +} + +static void pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * p_node, + IN uint8_t port_num, IN osm_madw_t * p_madw) +{ + osm_physp_t *p_physp; + ib_net64_t port_guid; + ib_smp_t *p_smp; + ib_port_info_t *p_pi; + osm_pi_context_t *p_context; + osm_log_level_t level; + + OSM_LOG_ENTER(sm->p_log); + + p_context = osm_madw_get_pi_context_ptr(p_madw); + + CL_ASSERT(p_node); + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + CL_ASSERT(p_physp); + + port_guid = osm_physp_get_port_guid(p_physp); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_pi = ib_smp_get_payload_ptr(p_smp); + + /* check for error */ + if (cl_ntoh16(p_smp->status) & 0x7fff) { + /* If port already ACTIVE, don't treat status 7 as error */ + if (p_context->active_transition && + (cl_ntoh16(p_smp->status) & 0x7fff) == 0x1c) { + level = OSM_LOG_INFO; + OSM_LOG(sm->p_log, OSM_LOG_INFO, + "Received error status 0x%x for SetResp() during ACTIVE transition\n", + cl_ntoh16(p_smp->status) & 0x7fff); + /* Should there be a subsequent Get to validate that port is ACTIVE ? */ + } else { + level = OSM_LOG_ERROR; + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F10: " + "Received error status for SetResp()\n"); + } + osm_dump_port_info_v2(sm->p_log, osm_node_get_node_guid(p_node), + port_guid, port_num, p_pi, FILE_ID, level); + } else + osm_physp_set_port_info(p_physp, p_pi, sm); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Received logical SetResp() for GUID 0x%" PRIx64 + ", port num %u" + "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 + " TID 0x%" PRIx64 "\n", + cl_ntoh64(port_guid), port_num, + cl_ntoh64(osm_node_get_node_guid(p_node)), + cl_ntoh64(p_smp->trans_id)); + + + OSM_LOG_EXIT(sm->p_log); +} + +static int osm_pi_rcv_update_neighbor(IN osm_physp_t *p_physp) +{ + osm_physp_t *p_rem_physp = p_physp->p_remote_physp; + osm_node_t *p_node; + + /* + * Our own port - this is the only case where CA port + * is discovered before its' neighbor port + */ + if (!p_rem_physp) + return p_physp->need_update; + + p_node = osm_physp_get_node_ptr(p_rem_physp); + CL_ASSERT(p_node); + + /* CA/RTR to CA/RTR connection */ + if (!p_node->sw) + return p_physp->need_update; + + return (ib_switch_info_get_state_change(&p_node->sw->switch_info) ? 1 : p_physp->need_update); +} + +void osm_pi_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_port_info_t *p_pi; + ib_smp_t *p_smp; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_dr_path_t *p_dr_path; + osm_node_t *p_node; + osm_pi_context_t *p_context; + ib_net64_t port_guid, node_guid; + uint8_t port_num; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_context = osm_madw_get_pi_context_ptr(p_madw); + p_pi = ib_smp_get_payload_ptr(p_smp); + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_PORT_INFO); + + /* + * Attribute modifier has already been validated upon MAD receive, + * which means that port_num has to be valid - it originated from + * the request attribute modifier. + */ + port_num = (uint8_t) cl_ntoh32(p_smp->attr_mod); + + port_guid = p_context->port_guid; + node_guid = p_context->node_guid; + + osm_dump_port_info_v2(sm->p_log, node_guid, port_guid, port_num, p_pi, + FILE_ID, OSM_LOG_DEBUG); + + /* On receipt of client reregister, clear the reregister bit so + reregistering won't be sent again and again */ + if (p_context->set_method && + (ib_port_info_get_client_rereg(p_pi) || p_context->client_rereg)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Client reregister received on response\n"); + ib_port_info_set_client_rereg(p_pi, 0); + p_context->client_rereg = FALSE; + } + + /* + we might get a response during a light sweep looking for a change in + the status of a remote port that did not respond in earlier sweeps. + So if the context of the Get was light_sweep - we do not need to + do anything with the response - just flag that we need a heavy sweep + */ + if (p_context->light_sweep == TRUE) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Got light sweep response from remote port of parent node " + "GUID 0x%" PRIx64 " port 0x%016" PRIx64 + ", Commencing heavy sweep\n", + cl_ntoh64(node_guid), cl_ntoh64(port_guid)); + sm->p_subn->force_heavy_sweep = TRUE; + sm->p_subn->ignore_existing_lfts = TRUE; + goto Exit; + } + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (PF(!p_port)) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F06: " + "No port object for port with GUID 0x%" PRIx64 + "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 + ", TID 0x%" PRIx64 "\n", + cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + goto Exit; + } + + p_node = p_port->p_node; + CL_ASSERT(p_node); + + if (PF(p_pi->local_port_num > p_node->node_info.num_ports)) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F15: " + "Received PortInfo for port GUID 0x%" PRIx64 " is " + "non-compliant and is being ignored since the " + "local port num %u > num ports %u\n", + cl_ntoh64(port_guid), p_pi->local_port_num, + p_node->node_info.num_ports); + goto Exit; + } + + /* + If we were setting the PortInfo, then receiving + this attribute was not part of sweeping the subnet. + In this case, just update the PortInfo attribute. + + In an unfortunate blunder, the IB spec defines the + return method for Set() as a GetResp(). Thus, we can't + use the method (what would have been SetResp()) to determine + our course of action. So, we have to carry this extra + boolean around to determine if we were doing Get() or Set(). + */ + if (p_context->set_method) + pi_rcv_process_set(sm, p_node, port_num, p_madw); + else { + + /* + This PortInfo arrived because we did a Get() method, + most likely due to a subnet sweep in progress. + */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Discovered port num %u with GUID 0x%" PRIx64 + " for parent node GUID 0x%" PRIx64 + ", TID 0x%" PRIx64 "\n", + port_num, cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + + CL_ASSERT(p_physp); + + /* Update the directed route path to this port + in case the old path is no longer usable. */ + p_dr_path = osm_physp_get_dr_path_ptr(p_physp); + osm_dr_path_init(p_dr_path, p_smp->hop_count, + p_smp->initial_path); + + p_physp->need_update = osm_pi_rcv_update_self(sm, p_physp, p_pi); + + switch (osm_node_get_type(p_node)) { + case IB_NODE_TYPE_CA: + case IB_NODE_TYPE_ROUTER: + if (!p_node->physp_discovered[port_num]) { + p_port->discovery_count++; + p_node->physp_discovered[port_num] = 1; + } + p_physp->need_update = osm_pi_rcv_update_neighbor(p_physp); + pi_rcv_process_ca_or_router_port(sm, p_node, p_physp, + p_pi); + break; + case IB_NODE_TYPE_SWITCH: + if (!p_node->physp_discovered[port_num]) { + p_port->discovery_count++; + p_node->physp_discovered[port_num] = 1; + } + if (port_num == 0) + pi_rcv_process_switch_port0(sm, p_node, + p_physp, p_pi); + else + pi_rcv_process_switch_ext_port(sm, p_node, + p_physp, p_pi); + + if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN) { + CL_PLOCK_RELEASE(sm->p_lock); + goto Exit; + } + break; + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F07: " + "Unknown node type %u with GUID 0x%" PRIx64 + "\n", osm_node_get_type(p_node), + cl_ntoh64(node_guid)); + break; + } + + /* + Get the tables on the physp. + */ + if (p_physp->need_update || (p_node->sw && + p_node->sw->need_update)) + pi_rcv_get_pkey_slvl_vla_tables(sm, p_node, p_physp); + + } + + CL_PLOCK_RELEASE(sm->p_lock); + +Exit: + /* + Release the lock before jumping here!! + */ + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_prtn.c b/opensm/osm_prtn.c new file mode 100644 index 0000000..a203dff --- /dev/null +++ b/opensm/osm_prtn.c @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2012 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_prtn_t. + * This object represents an IBA partition. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_PRTN_C +#include +#include +#include +#include +#include +#include +#include + +extern int osm_prtn_config_parse_file(osm_log_t * p_log, osm_subn_t * p_subn, + const char *file_name); + +static uint16_t global_pkey_counter; + +osm_prtn_t *osm_prtn_new(IN const char *name, IN uint16_t pkey) +{ + osm_prtn_t *p = malloc(sizeof(*p)); + if (!p) + return NULL; + + memset(p, 0, sizeof(*p)); + p->pkey = pkey; + p->sl = OSM_DEFAULT_SL; + p->mgrps = NULL; + p->nmgrps = 0; + cl_map_construct(&p->full_guid_tbl); + cl_map_init(&p->full_guid_tbl, 32); + cl_map_construct(&p->part_guid_tbl); + cl_map_init(&p->part_guid_tbl, 32); + + if (name && *name) { + strncpy(p->name, name, sizeof(p->name) - 1); + p->name[sizeof(p->name) - 1] = '\0'; + } else + snprintf(p->name, sizeof(p->name), "%04x", cl_ntoh16(pkey)); + + return p; +} + +void osm_prtn_delete(IN osm_subn_t * p_subn, IN OUT osm_prtn_t ** pp_prtn) +{ + char gid_str[INET6_ADDRSTRLEN]; + int i = 0; + osm_prtn_t *p = *pp_prtn; + + cl_map_remove_all(&p->full_guid_tbl); + cl_map_destroy(&p->full_guid_tbl); + cl_map_remove_all(&p->part_guid_tbl); + cl_map_destroy(&p->part_guid_tbl); + + if (p->mgrps) { + /* Clean up mgrps */ + for (i = 0; i < p->nmgrps; i++) { + /* osm_mgrp_cleanup will not delete + * "well_known" groups */ + p->mgrps[i]->well_known = FALSE; + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_DEBUG, + "removing mgroup %s from partition (0x%x)\n", + inet_ntop(AF_INET6, + p->mgrps[i]->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str), + cl_hton16(p->pkey)); + osm_mgrp_cleanup(p_subn, p->mgrps[i]); + } + + free(p->mgrps); + } + + free(p); + *pp_prtn = NULL; +} + +ib_api_status_t osm_prtn_add_port(osm_log_t * p_log, osm_subn_t * p_subn, + osm_prtn_t * p, ib_net64_t guid, + boolean_t full, boolean_t indx0) +{ + ib_api_status_t status = IB_SUCCESS; + cl_map_t *p_tbl; + osm_port_t *p_port; + osm_physp_t *p_physp; + + p_port = osm_get_port_by_guid(p_subn, guid); + if (!p_port) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "port 0x%" PRIx64 " not found\n", cl_ntoh64(guid)); + return status; + } + + p_physp = p_port->p_physp; + if (!p_physp) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "no physical for port 0x%" PRIx64 "\n", + cl_ntoh64(guid)); + return status; + } + /* Set the pkey to be inserted to block 0 index 0 */ + if (indx0) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Setting pkey 0x%04x at indx0 " + "for port 0x%" PRIx64 "\n", + cl_ntoh16(p->pkey), cl_ntoh64(guid)); + osm_pkey_tbl_set_indx0_pkey(p_log, p->pkey, full, + &p_physp->pkeys); + } else if (ib_pkey_get_base(p_physp->pkeys.indx0_pkey) == + ib_pkey_get_base(p->pkey)) + p_physp->pkeys.indx0_pkey = 0; + + p_tbl = (full == TRUE) ? &p->full_guid_tbl : &p->part_guid_tbl; + + if (p_subn->opt.allow_both_pkeys) { + if (cl_map_remove(p_tbl, guid)) + OSM_LOG(p_log, OSM_LOG_VERBOSE, "port 0x%" PRIx64 + " already in partition \'%s\' (0x%04x) full %d." + " Will overwrite\n", + cl_ntoh64(guid), p->name, cl_ntoh16(p->pkey), + full); + } else { + if (cl_map_remove(&p->part_guid_tbl, guid) || + cl_map_remove(&p->full_guid_tbl, guid)) + OSM_LOG(p_log, OSM_LOG_VERBOSE, "port 0x%" PRIx64 + " already in partition \'%s\' (0x%04x)." + " Will overwrite\n", + cl_ntoh64(guid), p->name, cl_ntoh16(p->pkey)); + } + + if (cl_map_insert(p_tbl, guid, p_physp) == NULL) + return IB_INSUFFICIENT_MEMORY; + + return status; +} + +ib_api_status_t osm_prtn_add_all(osm_log_t * p_log, osm_subn_t * p_subn, + osm_prtn_t * p, unsigned type, + boolean_t full, boolean_t indx0) +{ + cl_qmap_t *p_port_tbl = &p_subn->port_guid_tbl; + cl_map_item_t *p_item; + osm_port_t *p_port; + ib_api_status_t status = IB_SUCCESS; + + p_item = cl_qmap_head(p_port_tbl); + while (p_item != cl_qmap_end(p_port_tbl)) { + p_port = (osm_port_t *) p_item; + p_item = cl_qmap_next(p_item); + if (!type || osm_node_get_type(p_port->p_node) == type) { + status = osm_prtn_add_port(p_log, p_subn, p, + osm_port_get_guid(p_port), + full, indx0); + if (status != IB_SUCCESS) + goto _err; + } + } + +_err: + return status; +} + +static ib_api_status_t +track_mgrp_w_partition(osm_log_t *p_log, osm_prtn_t *p, osm_mgrp_t *mgrp, + osm_subn_t *p_subn, const ib_gid_t *mgid, + ib_net16_t pkey) +{ + char gid_str[INET6_ADDRSTRLEN]; + osm_mgrp_t **tmp; + int i = 0; + + /* check if we are already tracking this group */ + for (i = 0; i < p->nmgrps; i++) + if (p->mgrps[i] == mgrp) + return (IB_SUCCESS); + + /* otherwise add it to our list */ + tmp = realloc(p->mgrps, (p->nmgrps +1) * sizeof(*p->mgrps)); + if (tmp) { + p->mgrps = tmp; + p->mgrps[p->nmgrps] = mgrp; + p->nmgrps++; + } else { + OSM_LOG(p_log, OSM_LOG_ERROR, + "realloc error to create MC group (%s) in " + "partition (pkey 0x%04x)\n", + inet_ntop(AF_INET6, mgid->raw, + gid_str, sizeof gid_str), + cl_ntoh16(pkey)); + mgrp->well_known = FALSE; + osm_mgrp_cleanup(p_subn, mgrp); + return (IB_ERROR); + } + mgrp->well_known = TRUE; + return (IB_SUCCESS); +} + +ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, osm_subn_t * p_subn, + osm_prtn_t * p, uint8_t rate, uint8_t mtu, + uint8_t sl, uint8_t scope, uint32_t Q_Key, + uint8_t tclass, uint32_t FlowLabel, + const ib_gid_t *mgid) +{ + char gid_str[INET6_ADDRSTRLEN]; + ib_member_rec_t mc_rec; + ib_net64_t comp_mask; + ib_net16_t pkey; + osm_mgrp_t *mgrp; + osm_sa_t *p_sa = &p_subn->p_osm->sa; + uint8_t hop_limit; + + pkey = p->pkey | cl_hton16(0x8000); + if (!scope) + scope = OSM_DEFAULT_MGRP_SCOPE; + hop_limit = (scope == IB_MC_SCOPE_LINK_LOCAL) ? 0 : IB_HOPLIMIT_MAX; + + memset(&mc_rec, 0, sizeof(mc_rec)); + + mc_rec.mgid = *mgid; + + mc_rec.qkey = CL_HTON32(Q_Key); + mc_rec.mtu = mtu | (IB_PATH_SELECTOR_EXACTLY << 6); + mc_rec.tclass = tclass; + mc_rec.pkey = pkey; + mc_rec.rate = rate | (IB_PATH_SELECTOR_EXACTLY << 6); + mc_rec.pkt_life = p_subn->opt.subnet_timeout; + mc_rec.sl_flow_hop = ib_member_set_sl_flow_hop(sl, FlowLabel, hop_limit); + /* Scope in MCMemberRecord (if present) needs to be consistent with MGID */ + mc_rec.scope_state = + ib_member_set_scope_state(scope, IB_MC_REC_STATE_FULL_MEMBER); + ib_mgid_set_scope(&mc_rec.mgid, scope); + + /* don't update rate, mtu */ + comp_mask = IB_MCR_COMPMASK_MTU | IB_MCR_COMPMASK_MTU_SEL | + IB_MCR_COMPMASK_RATE | IB_MCR_COMPMASK_RATE_SEL; + mgrp = osm_mcmr_rcv_find_or_create_new_mgrp(p_sa, comp_mask, &mc_rec); + if (!mgrp) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed to create MC group (%s) with pkey 0x%04x\n", + inet_ntop(AF_INET6, mgid->raw, gid_str, sizeof gid_str), + cl_ntoh16(pkey)); + return IB_ERROR; + } + + return (track_mgrp_w_partition(p_log, p, mgrp, p_subn, mgid, pkey)); +} + +static uint16_t generate_pkey(osm_subn_t * p_subn) +{ + uint16_t pkey; + + cl_qmap_t *m = &p_subn->prtn_pkey_tbl; + while (global_pkey_counter < cl_ntoh16(IB_DEFAULT_PARTIAL_PKEY) - 1) { + pkey = ++global_pkey_counter; + pkey = cl_hton16(pkey); + if (cl_qmap_get(m, pkey) == cl_qmap_end(m)) + return pkey; + } + return 0; +} + +osm_prtn_t *osm_prtn_find_by_name(osm_subn_t * p_subn, const char *name) +{ + cl_map_item_t *p_next; + osm_prtn_t *p; + + p_next = cl_qmap_head(&p_subn->prtn_pkey_tbl); + while (p_next != cl_qmap_end(&p_subn->prtn_pkey_tbl)) { + p = (osm_prtn_t *) p_next; + p_next = cl_qmap_next(&p->map_item); + if (!strncmp(p->name, name, sizeof(p->name))) + return p; + } + + return NULL; +} + +osm_prtn_t *osm_prtn_make_new(osm_log_t * p_log, osm_subn_t * p_subn, + const char *name, uint16_t pkey) +{ + osm_prtn_t *p = NULL, *p_check; + + pkey &= cl_hton16((uint16_t) ~ 0x8000); + if (!pkey) { + if (name && (p = osm_prtn_find_by_name(p_subn, name))) + return p; + if (!(pkey = generate_pkey(p_subn))) + return NULL; + } + + p = osm_prtn_new(name, pkey); + if (!p) { + OSM_LOG(p_log, OSM_LOG_ERROR, "Unable to create" + " partition \'%s\' (0x%04x)\n", name, cl_ntoh16(pkey)); + return NULL; + } + + p_check = (osm_prtn_t *) cl_qmap_insert(&p_subn->prtn_pkey_tbl, + p->pkey, &p->map_item); + if (p != p_check) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Duplicated partition" + " definition: \'%s\' (0x%04x) prev name \'%s\'" + ". Will use it\n", + name, cl_ntoh16(pkey), p_check->name); + osm_prtn_delete(p_subn, &p); + p = p_check; + } + + return p; +} + +static ib_api_status_t prtn_make_default(osm_log_t * p_log, osm_subn_t * p_subn, + boolean_t no_config) +{ + ib_api_status_t status = IB_UNKNOWN_ERROR; + osm_prtn_t *p; + + p = osm_prtn_make_new(p_log, p_subn, "Default", + IB_DEFAULT_PARTIAL_PKEY); + if (!p) + goto _err; + status = osm_prtn_add_all(p_log, p_subn, p, 0, no_config, FALSE); + if (status != IB_SUCCESS) + goto _err; + cl_map_remove(&p->part_guid_tbl, p_subn->sm_port_guid); + status = + osm_prtn_add_port(p_log, p_subn, p, p_subn->sm_port_guid, TRUE, FALSE); + + /* ipv4 broadcast group */ + if (no_config) + osm_prtn_add_mcgroup(p_log, p_subn, p, OSM_DEFAULT_MGRP_RATE, + OSM_DEFAULT_MGRP_MTU, OSM_DEFAULT_SL, + 0, OSM_IPOIB_BROADCAST_MGRP_QKEY, 0, 0, + &osm_ipoib_broadcast_mgid); + +_err: + return status; +} + +ib_api_status_t osm_prtn_make_partitions(osm_log_t * p_log, osm_subn_t * p_subn) +{ + struct stat statbuf; + const char *file_name; + boolean_t is_config = TRUE; + boolean_t is_wrong_config = FALSE; + ib_api_status_t status = IB_SUCCESS; + cl_map_item_t *p_next; + osm_prtn_t *p; + + file_name = p_subn->opt.partition_config_file ? + p_subn->opt.partition_config_file : OSM_DEFAULT_PARTITION_CONFIG_FILE; + if (stat(file_name, &statbuf)) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Partition configuration " + "%s is not accessible (%s)\n", file_name, + strerror(errno)); + is_config = FALSE; + } + +retry_default: + /* clean up current port maps */ + p_next = cl_qmap_head(&p_subn->prtn_pkey_tbl); + while (p_next != cl_qmap_end(&p_subn->prtn_pkey_tbl)) { + p = (osm_prtn_t *) p_next; + p_next = cl_qmap_next(&p->map_item); + cl_map_remove_all(&p->part_guid_tbl); + cl_map_remove_all(&p->full_guid_tbl); + } + + global_pkey_counter = 0; + + status = prtn_make_default(p_log, p_subn, !is_config); + if (status != IB_SUCCESS) + goto _err; + + if (is_config && osm_prtn_config_parse_file(p_log, p_subn, file_name)) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Partition configuration " + "was not fully processed\n"); + is_wrong_config = TRUE; + } + + /* and now clean up empty partitions */ + p_next = cl_qmap_head(&p_subn->prtn_pkey_tbl); + while (p_next != cl_qmap_end(&p_subn->prtn_pkey_tbl)) { + p = (osm_prtn_t *) p_next; + p_next = cl_qmap_next(&p->map_item); + if (cl_map_count(&p->part_guid_tbl) == 0 && + cl_map_count(&p->full_guid_tbl) == 0) { + cl_qmap_remove_item(&p_subn->prtn_pkey_tbl, + (cl_map_item_t *) p); + osm_prtn_delete(p_subn, &p); + } + } + + if (is_config && is_wrong_config) { + OSM_LOG(p_log, OSM_LOG_ERROR, "Partition configuration " + "in error; retrying with default config\n"); + is_config = FALSE; + goto retry_default; + } + +_err: + return status; +} diff --git a/opensm/osm_prtn_config.c b/opensm/osm_prtn_config.c new file mode 100644 index 0000000..a7293bc --- /dev/null +++ b/opensm/osm_prtn_config.c @@ -0,0 +1,853 @@ +/* + * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2012-2015 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of opensm partition management configuration + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include + +#include +#include +#define FILE_ID OSM_FILE_PRTN_CONFIG_C +#include +#include +#include +#include +#include + +typedef enum { + LIMITED, + FULL, + BOTH +} membership_t; + +const ib_gid_t osm_ipoib_broadcast_mgid = { + { + 0xff, /* multicast field */ + 0x12, /* non-permanent bit, link local scope */ + 0x40, 0x1b, /* IPv4 signature */ + 0xff, 0xff, /* 16 bits of P_Key (to be filled in) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 48 bits of zeros */ + 0xff, 0xff, 0xff, 0xff, /* 32 bit IPv4 broadcast address */ + }, +}; + +struct group_flags { + unsigned mtu, rate, sl, scope_mask; + uint32_t Q_Key; + uint8_t TClass; + uint32_t FlowLabel; +}; + +struct precreate_mgroup { + ib_gid_t mgid; + struct group_flags flags; +}; + +struct part_conf { + osm_log_t *p_log; + osm_subn_t *p_subn; + osm_prtn_t *p_prtn; + unsigned is_ipoib; + struct group_flags flags; + membership_t membership; + boolean_t indx0; +}; + +extern osm_prtn_t *osm_prtn_make_new(osm_log_t * p_log, osm_subn_t * p_subn, + const char *name, uint16_t pkey); +extern ib_api_status_t osm_prtn_add_all(osm_log_t * p_log, osm_subn_t * p_subn, + osm_prtn_t * p, unsigned type, + boolean_t full, boolean_t indx0); +extern ib_api_status_t osm_prtn_add_port(osm_log_t * p_log, + osm_subn_t * p_subn, osm_prtn_t * p, + ib_net64_t guid, boolean_t full, + boolean_t indx0); + +ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, osm_subn_t * p_subn, + osm_prtn_t * p, uint8_t rate, uint8_t mtu, + uint8_t sl, uint8_t scope, uint32_t Q_Key, + uint8_t TClass, uint32_t FlowLabel, + const ib_gid_t *mgid); + + +static inline boolean_t mgid_is_broadcast(const ib_gid_t *mgid) +{ + return (memcmp(mgid, &osm_ipoib_broadcast_mgid, + sizeof(osm_ipoib_broadcast_mgid)) == 0); +} + +static inline boolean_t mgid_is_ip(const ib_gid_t *mgid) +{ + ib_net16_t ipsig = mgid->ip_multicast.signature; + return (ipsig == cl_hton16(0x401b) || ipsig == cl_hton16(0x601b)); +} + +static inline boolean_t ip_mgroup_pkey_ok(struct part_conf *conf, + struct precreate_mgroup *group) +{ + ib_net16_t mpkey = group->mgid.ip_multicast.p_key; + char gid_str[INET6_ADDRSTRLEN]; + + if (mgid_is_broadcast(&group->mgid) + /* user requested "wild card" of pkey */ + || mpkey == 0x0000 + /* user was smart enough to match */ + || mpkey == (conf->p_prtn->pkey | cl_hton16(0x8000))) + return (TRUE); + + OSM_LOG(conf->p_log, OSM_LOG_ERROR, + "IP MC group (%s) specified with invalid pkey 0x%04x " + "for partition pkey = 0x%04x (%s)\n", + inet_ntop(AF_INET6, group->mgid.raw, gid_str, sizeof gid_str), + cl_ntoh16(mpkey), cl_ntoh16(conf->p_prtn->pkey), conf->p_prtn->name); + return (FALSE); +} + +static inline boolean_t ip_mgroup_rate_ok(struct part_conf *conf, + struct precreate_mgroup *group) +{ + char gid_str[INET6_ADDRSTRLEN]; + + if (group->flags.rate == conf->flags.rate) + return (TRUE); + + OSM_LOG(conf->p_log, OSM_LOG_ERROR, + "IP MC group (%s) specified with invalid rate (%d): " + "partition pkey = 0x%04x (%s) " + "[Partition broadcast group rate = %d]\n", + inet_ntop(AF_INET6, group->mgid.raw, gid_str, sizeof gid_str), + group->flags.rate, cl_ntoh16(conf->p_prtn->pkey), + conf->p_prtn->name, conf->flags.rate); + return (FALSE); +} + +static inline boolean_t ip_mgroup_mtu_ok(struct part_conf *conf, + struct precreate_mgroup *group) +{ + char gid_str[INET6_ADDRSTRLEN]; + + if (group->flags.mtu == conf->flags.mtu) + return (TRUE); + + OSM_LOG(conf->p_log, OSM_LOG_ERROR, + "IP MC group (%s) specified with invalid mtu (%d): " + "partition pkey = 0x%04x (%s) " + "[Partition broadcast group mtu = %d]\n", + inet_ntop(AF_INET6, group->mgid.raw, gid_str, sizeof gid_str), + group->flags.mtu, cl_ntoh16(conf->p_prtn->pkey), + conf->p_prtn->name, conf->flags.mtu); + return (FALSE); +} + +static void __create_mgrp(struct part_conf *conf, struct precreate_mgroup *group) +{ + unsigned int scope; + + if (!group->flags.scope_mask) { + osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, + (uint8_t) group->flags.rate, + (uint8_t) group->flags.mtu, + group->flags.sl, + 0, + group->flags.Q_Key, + group->flags.TClass, + group->flags.FlowLabel, + &group->mgid); + } else { + for (scope = 0; scope < 16; scope++) { + if (((1<flags.scope_mask) == 0) + continue; + + osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, + (uint8_t)group->flags.rate, + (uint8_t)group->flags.mtu, + (uint8_t)group->flags.sl, + (uint8_t)scope, + group->flags.Q_Key, + group->flags.TClass, + group->flags.FlowLabel, + &group->mgid); + } + } +} + +static int partition_create(unsigned lineno, struct part_conf *conf, + char *name, char *id, char *flag, char *flag_val) +{ + ib_net16_t pkey; + + if (!id && name && isdigit(*name)) { + id = name; + name = NULL; + } + + if (id) { + char *end; + + pkey = cl_hton16((uint16_t)strtoul(id, &end, 0)); + if (end == id || *end) + return -1; + } else + pkey = 0; + + conf->p_prtn = osm_prtn_make_new(conf->p_log, conf->p_subn, + name, pkey); + if (!conf->p_prtn) + return -1; + + if (!conf->p_subn->opt.qos && conf->flags.sl != OSM_DEFAULT_SL) { + OSM_LOG(conf->p_log, OSM_LOG_DEBUG, "Overriding SL %d" + " to default SL %d on partition %s" + " as QoS is not enabled.\n", + conf->flags.sl, OSM_DEFAULT_SL, name); + conf->flags.sl = OSM_DEFAULT_SL; + } + conf->p_prtn->sl = (uint8_t) conf->flags.sl; + + if (conf->is_ipoib) { + struct precreate_mgroup broadcast_mgroup; + memset(&broadcast_mgroup, 0, sizeof(broadcast_mgroup)); + broadcast_mgroup.mgid = osm_ipoib_broadcast_mgid; + pkey = CL_HTON16(0x8000) | conf->p_prtn->pkey; + memcpy(&broadcast_mgroup.mgid.raw[4], &pkey , sizeof(pkey)); + broadcast_mgroup.flags.mtu = conf->flags.mtu; + broadcast_mgroup.flags.rate = conf->flags.rate; + broadcast_mgroup.flags.sl = conf->flags.sl; + broadcast_mgroup.flags.Q_Key = conf->flags.Q_Key ? + conf->flags.Q_Key : + OSM_IPOIB_BROADCAST_MGRP_QKEY; + broadcast_mgroup.flags.TClass = conf->flags.TClass; + broadcast_mgroup.flags.FlowLabel = conf->flags.FlowLabel; + __create_mgrp(conf, &broadcast_mgroup); + } + + return 0; +} + +static unsigned long int verify_val(unsigned lineno, osm_log_t *p_log, + char *flag, char *val) +{ + char *end; + unsigned long int ret = strtoul(val, &end, 0); + if (val && *end) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "suspicious val=(%s) detected. " + "flag=(%s)\n", lineno, val, flag); + return ret; +} + +/* returns 1 if processed 0 if _not_ */ +static int parse_group_flag(unsigned lineno, osm_log_t * p_log, + struct group_flags *flags, + char *flag, char *val) +{ + int rc = 0; + int len = strlen(flag); + if (!strncmp(flag, "mtu", len)) { + rc = 1; + if (!val || (flags->mtu = verify_val(lineno, p_log, flag, val)) == 0) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'mtu\' requires valid value" + " - skipped\n", lineno); + } else if (!strncmp(flag, "rate", len)) { + rc = 1; + if (!val || (flags->rate = verify_val(lineno, p_log, flag, val)) == 0) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'rate\' requires valid value" + " - skipped\n", lineno); + } else if (!strncmp(flag, "scope", len)) { + unsigned int scope; + rc = 1; + if (!val || (scope = verify_val(lineno, p_log, flag, val)) == 0 || scope > 0xF) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'scope\' requires valid value" + " - skipped\n", lineno); + else + flags->scope_mask |= (1<Q_Key = verify_val(lineno, p_log, flag, val)) == 0) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'Q_Key\' requires valid value" + " - using '0'\n", lineno); + } else if (!strncmp(flag, "TClass", strlen(flag))) { + rc = 1; + if (!val || (flags->TClass = verify_val(lineno, p_log, flag, val)) == 0) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'TClass\' requires valid value" + " - using '0'\n", lineno); + } else if (!strncmp(flag, "sl", len)) { + unsigned sl; + char *end; + rc = 1; + + if (!val || !*val || (sl = strtoul(val, &end, 0)) > 15 || + (*end && !isspace(*end))) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'sl\' requires valid value" + " - skipped\n", lineno); + else + flags->sl = sl; + } else if (!strncmp(flag, "FlowLabel", len)) { + uint32_t FlowLabel; + char *end; + rc = 1; + + if (!val || !*val || + (FlowLabel = strtoul(val, &end, 0)) > 0xFFFFF || + (*end && !isspace(*end))) + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'FlowLabel\' requires valid value" + " - skipped\n", lineno); + else + flags->FlowLabel = FlowLabel; + } + + return rc; +} + +static int partition_add_flag(unsigned lineno, struct part_conf *conf, + char *flag, char *val) +{ + int len = strlen(flag); + + /* ipoib gc group flags are processed here. */ + if (parse_group_flag(lineno, conf->p_log, &conf->flags, flag, val)) + return 0; + + /* partition flags go here. */ + if (!strncmp(flag, "ipoib", len)) { + conf->is_ipoib = 1; + } else if (!strncmp(flag, "defmember", len)) { + if (!val || (strncmp(val, "limited", strlen(val)) + && strncmp(val, "both", strlen(val)) + && strncmp(val, "full", strlen(val)))) + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "flag \'defmember\' requires valid value (limited or full or both)" + " - skipped\n", lineno); + else { + if (!strncmp(val, "full", strlen(val))) + conf->membership = FULL; + else if (!strncmp(val, "both", strlen(val))) + conf->membership = BOTH; + else + conf->membership = LIMITED; + } + } else if (!strcmp(flag, "indx0")) + conf->indx0 = TRUE; + else { + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "unrecognized partition flag \'%s\'" + " - ignored\n", lineno, flag); + } + return 0; +} +static void manage_membership_change(struct part_conf *conf, osm_prtn_t * p, + unsigned type, membership_t membership, + ib_net64_t guid) +{ + cl_map_t *p_tbl; + cl_map_iterator_t p_next, p_item; + osm_physp_t *p_physp; + + /* In allow_both_pkeys mode */ + /* if membership of the PKEY is set to FULL */ + /* need to clean up the part_guid_tbl table entry for this guid */ + /* if membership of the PKEY is set to LIMITED */ + /* need to clean up the full_guid_tbl table entry for this guid */ + /* as it could be populated because of previous definitions */ + + if (!conf->p_subn->opt.allow_both_pkeys || membership == BOTH) + return; + + switch (type){ + /* ALL = 0 */ + case 0: + cl_map_remove_all(membership == LIMITED ? + &p->full_guid_tbl : &p->part_guid_tbl); + break; + /* specific GUID */ + case 0xFF: + cl_map_remove(membership == LIMITED ? + &p->full_guid_tbl : &p->part_guid_tbl, + cl_hton64(guid)); + break; + + case IB_NODE_TYPE_CA: + case IB_NODE_TYPE_SWITCH: + case IB_NODE_TYPE_ROUTER: + p_tbl = (membership == LIMITED) ? + &p->full_guid_tbl : &p->part_guid_tbl; + + p_next = cl_map_head(p_tbl); + while (p_next != cl_map_end(p_tbl)) { + p_item = p_next; + p_next = cl_map_next(p_item); + p_physp = (osm_physp_t *) cl_map_obj(p_item); + if (osm_node_get_type(p_physp->p_node) == type) + cl_map_remove_item(p_tbl, p_item); + } + break; + default: + break; + + } +} +static int partition_add_all(struct part_conf *conf, osm_prtn_t * p, + unsigned type, membership_t membership) +{ + manage_membership_change(conf, p, type, membership, 0); + + if (membership != LIMITED && + osm_prtn_add_all(conf->p_log, conf->p_subn, p, type, TRUE, conf->indx0) != IB_SUCCESS) + return -1; + if ((membership == LIMITED || + (membership == BOTH && conf->p_subn->opt.allow_both_pkeys)) && + osm_prtn_add_all(conf->p_log, conf->p_subn, p, type, FALSE, conf->indx0) != IB_SUCCESS) + return -1; + return 0; +} + +static int partition_add_port(unsigned lineno, struct part_conf *conf, + char *name, char *flag) +{ + osm_prtn_t *p = conf->p_prtn; + ib_net64_t guid; + membership_t membership = conf->membership; + + if (!name || !*name || !strncmp(name, "NONE", strlen(name))) + return 0; + + if (flag) { + /* reset default membership to limited */ + membership = LIMITED; + if (!strncmp(flag, "full", strlen(flag))) + membership = FULL; + else if (!strncmp(flag, "both", strlen(flag))) + membership = BOTH; + else if (strncmp(flag, "limited", strlen(flag))) { + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "unrecognized port flag \'%s\'." + " Assume \'limited\'\n", lineno, flag); + } + } + + if (!strncmp(name, "ALL", strlen(name))) + return partition_add_all(conf, p, 0, membership); + else if (!strncmp(name, "ALL_CAS", strlen(name))) + return partition_add_all(conf, p, IB_NODE_TYPE_CA, membership); + else if (!strncmp(name, "ALL_SWITCHES", strlen(name))) + return partition_add_all(conf, p, IB_NODE_TYPE_SWITCH, + membership); + else if (!strncmp(name, "ALL_ROUTERS", strlen(name))) + return partition_add_all(conf, p, IB_NODE_TYPE_ROUTER, + membership); + else if (!strncmp(name, "SELF", strlen(name))) { + guid = cl_ntoh64(conf->p_subn->sm_port_guid); + } else { + char *end; + guid = strtoull(name, &end, 0); + if (!guid || *end) + return -1; + } + + manage_membership_change(conf, p, 0xFF, membership, guid); + if (membership != LIMITED && + osm_prtn_add_port(conf->p_log, conf->p_subn, p, + cl_hton64(guid), TRUE, conf->indx0) != IB_SUCCESS) + return -1; + if ((membership == LIMITED || + (membership == BOTH && conf->p_subn->opt.allow_both_pkeys)) && + osm_prtn_add_port(conf->p_log, conf->p_subn, p, + cl_hton64(guid), FALSE, conf->indx0) != IB_SUCCESS) + return -1; + return 0; +} + +/* conf file parser */ + +#define STRIP_HEAD_SPACES(p) while (*(p) == ' ' || *(p) == '\t' || \ + *(p) == '\n') { (p)++; } +#define STRIP_TAIL_SPACES(p) { char *q = (p) + strlen(p); \ + while ( q != (p) && ( *q == '\0' || \ + *q == ' ' || *q == '\t' || \ + *q == '\n')) { *q-- = '\0'; }; } + +static int parse_name_token(char *str, char **name, char **val) +{ + int len = 0; + char *p, *q; + + *name = *val = NULL; + + p = str; + + while (*p == ' ' || *p == '\t' || *p == '\n') + p++; + + q = strchr(p, '='); + if (q) + *q++ = '\0'; + + len = strlen(str) + 1; + str = q; + + q = p + strlen(p); + while (q != p && (*q == '\0' || *q == ' ' || *q == '\t' || *q == '\n')) + *q-- = '\0'; + + *name = p; + + p = str; + if (!p) + return len; + + while (*p == ' ' || *p == '\t' || *p == '\n') + p++; + + q = p + strlen(p); + len += (int)(q - str) + 1; + while (q != p && (*q == '\0' || *q == ' ' || *q == '\t' || *q == '\n')) + *q-- = '\0'; + *val = p; + + return len; +} + +static int parse_mgroup_flags(osm_log_t * p_log, + struct precreate_mgroup *mgroup, + char *p, unsigned lineno) +{ + int ret, len = 0; + char *flag, *val, *q; + do { + flag = val = NULL; + q = strchr(p, ','); + if (q) + *q++ = '\0'; + + ret = parse_name_token(p, &flag, &val); + + if (!parse_group_flag(lineno, p_log, &mgroup->flags, + flag, val)) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "PARSE WARN: line %d: " + "unrecognized mgroup flag \'%s\'" + " - ignored\n", lineno, flag); + } + p += ret; + len += ret; + } while (q); + + return (len); +} + +static int mgroup_create(char *p, char *mgid, unsigned lineno, struct part_conf *conf) +{ + int ret = 0; + struct precreate_mgroup mgroup; + + memset(&mgroup, 0, sizeof(mgroup)); + + if (inet_pton(AF_INET6, mgid, &mgroup.mgid) != 1 + || mgroup.mgid.raw[0] != 0xff) { + OSM_LOG(conf->p_log, OSM_LOG_ERROR, + "PARSE ERROR partition conf file line %d: " + "mgid \"%s\": gid is not multicast\n", lineno, mgid); + return 0; + } + + /* inherit partition flags */ + mgroup.flags.mtu = conf->flags.mtu; + mgroup.flags.rate = conf->flags.rate; + mgroup.flags.sl = conf->flags.sl; + mgroup.flags.Q_Key = conf->flags.Q_Key; + mgroup.flags.FlowLabel = conf->flags.FlowLabel; + mgroup.flags.scope_mask = conf->flags.scope_mask; + + /* override with user specified flags */ + ret = parse_mgroup_flags(conf->p_log, &mgroup, p, lineno); + + /* check/verify special IP group parameters */ + if (mgid_is_ip(&mgroup.mgid)) { + ib_net16_t pkey = conf->p_prtn->pkey | cl_hton16(0x8000); + + if (!ip_mgroup_pkey_ok(conf, &mgroup) + || !ip_mgroup_rate_ok(conf, &mgroup) + || !ip_mgroup_mtu_ok(conf, &mgroup)) + goto error; + + /* set special IP settings */ + memcpy(&mgroup.mgid.raw[4], &pkey, sizeof(pkey)); + + if (mgroup.flags.Q_Key == 0) + mgroup.flags.Q_Key = OSM_IPOIB_BROADCAST_MGRP_QKEY; + } + + /* don't create multiple copies of the group */ + if (osm_get_mgrp_by_mgid(conf->p_subn, &mgroup.mgid)) + goto error; + + /* create the group */ + __create_mgrp(conf, &mgroup); + +error: + return ret; +} + +static struct part_conf *new_part_conf(osm_log_t * p_log, osm_subn_t * p_subn) +{ + static struct part_conf part; + struct part_conf *conf = ∂ + + memset(conf, 0, sizeof(*conf)); + conf->p_log = p_log; + conf->p_subn = p_subn; + conf->p_prtn = NULL; + conf->is_ipoib = 0; + conf->flags.sl = OSM_DEFAULT_SL; + conf->flags.rate = OSM_DEFAULT_MGRP_RATE; + conf->flags.mtu = OSM_DEFAULT_MGRP_MTU; + conf->membership = LIMITED; + conf->indx0 = FALSE; + return conf; +} + +static int flush_part_conf(struct part_conf *conf) +{ + memset(conf, 0, sizeof(*conf)); + return 0; +} + +static int parse_part_conf(struct part_conf *conf, char *str, int lineno) +{ + int ret, len = 0; + char *name, *id, *flag, *flval; + char *q, *p; + + p = str; + if (*p == '\t' || *p == '\0' || *p == '\n') + p++; + + len += (int)(p - str); + str = p; + + if (conf->p_prtn) + goto skip_header; + + q = strchr(p, ':'); + if (!q) { + OSM_LOG(conf->p_log, OSM_LOG_ERROR, "PARSE ERROR: line %d: " + "no partition definition found\n", lineno); + fprintf(stderr, "\nPARSE ERROR: line %d: " + "no partition definition found\n", lineno); + return -1; + } + + *q++ = '\0'; + str = q; + + name = id = flag = flval = NULL; + + q = strchr(p, ','); + if (q) + *q = '\0'; + + ret = parse_name_token(p, &name, &id); + p += ret; + len += ret; + + while (q) { + flag = flval = NULL; + q = strchr(p, ','); + if (q) + *q++ = '\0'; + ret = parse_name_token(p, &flag, &flval); + if (!flag) { + OSM_LOG(conf->p_log, OSM_LOG_ERROR, + "PARSE ERROR: line %d: " + "bad partition flags\n", lineno); + fprintf(stderr, "\nPARSE ERROR: line %d: " + "bad partition flags\n", lineno); + return -1; + } + p += ret; + len += ret; + partition_add_flag(lineno, conf, flag, flval); + } + + if (p != str || (partition_create(lineno, conf, + name, id, flag, flval) < 0)) { + OSM_LOG(conf->p_log, OSM_LOG_ERROR, "PARSE ERROR: line %d: " + "bad partition definition\n", lineno); + fprintf(stderr, "\nPARSE ERROR: line %d: " + "bad partition definition\n", lineno); + return -1; + } + +skip_header: + do { + name = flag = NULL; + q = strchr(p, ','); + if (q) + *q++ = '\0'; + ret = parse_name_token(p, &name, &flag); + len += ret; + + if (strcmp(name, "mgid") == 0) { + /* parse an mgid line if specified. */ + len += mgroup_create(p+ret, flag, lineno, conf); + goto done; /* We're done: this consumes the line */ + } + if (partition_add_port(lineno, conf, name, flag) < 0) { + OSM_LOG(conf->p_log, OSM_LOG_ERROR, + "PARSE ERROR: line %d: " + "bad PortGUID\n", lineno); + fprintf(stderr, "PARSE ERROR: line %d: " + "bad PortGUID\n", lineno); + return -1; + } + p += ret; + } while (q); + +done: + return len; +} + +/** + * @return 1 on error, 0 on success + */ +int osm_prtn_config_parse_file(osm_log_t * p_log, osm_subn_t * p_subn, + const char *file_name) +{ + char line[4096]; + struct part_conf *conf = NULL; + FILE *file; + int lineno; + int is_parse_success; + + line[0] = '\0'; + file = fopen(file_name, "r"); + if (!file) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Cannot open config file \'%s\': %s\n", + file_name, strerror(errno)); + return -1; + } + + lineno = 0; + + is_parse_success = 0; + + while (fgets(line, sizeof(line) - 1, file) != NULL) { + char *q, *p = line; + + lineno++; + + p = line; + + q = strchr(p, '#'); + if (q) + *q = '\0'; + + do { + int len; + while (*p == ' ' || *p == '\t' || *p == '\n') + p++; + if (*p == '\0') + break; + + if (!conf && !(conf = new_part_conf(p_log, p_subn))) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "PARSE ERROR: line %d: " + "internal: cannot create config\n", + lineno); + fprintf(stderr, + "PARSE ERROR: line %d: " + "internal: cannot create config\n", + lineno); + is_parse_success = -1; + break; + } + + q = strchr(p, ';'); + if (q) + *q = '\0'; + + len = parse_part_conf(conf, p, lineno); + if (len < 0) { + is_parse_success = -1; + break; + } + + is_parse_success = 1; + + p += len; + + if (q) { + flush_part_conf(conf); + conf = NULL; + } + } while (q); + + if (is_parse_success == -1) + break; + } + + fclose(file); + + return (is_parse_success == 1) ? 0 : 1; +} diff --git a/opensm/osm_qos.c b/opensm/osm_qos.c new file mode 100644 index 0000000..58b8730 --- /dev/null +++ b/opensm/osm_qos.c @@ -0,0 +1,668 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2010-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of OpenSM QoS infrastructure primitives + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_QOS_C +#include +#include +#include + +struct qos_config { + uint8_t max_vls; + uint8_t vl_high_limit; + ib_vl_arb_table_t vlarb_high[2]; + ib_vl_arb_table_t vlarb_low[2]; + ib_slvl_table_t sl2vl; +}; + +typedef struct qos_mad_item { + cl_list_item_t list_item; + osm_madw_t *p_madw; +} qos_mad_item_t; + +typedef struct qos_mad_list { + cl_list_item_t list_item; + cl_qlist_t port_mad_list; +} qos_mad_list_t; + +static void qos_build_config(struct qos_config *cfg, + osm_qos_options_t * opt, + osm_qos_options_t * dflt); + +/* + * QoS primitives + */ + +static qos_mad_item_t *osm_qos_mad_create(IN osm_sm_t * sm, + IN osm_physp_t * p, + IN uint32_t data_size, + IN uint8_t * p_data, + IN ib_net16_t attr_id, + IN uint32_t attr_mod) + +{ + qos_mad_item_t *p_mad; + osm_madw_context_t context; + osm_madw_t *p_madw; + osm_node_t *p_node; + osm_physp_t *physp0; + ib_net64_t m_key; + uint32_t timeout = 0; + + p_node = osm_physp_get_node_ptr(p); + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && + osm_physp_get_port_num(p) != 0) { + physp0 = osm_node_get_physp_ptr(p_node, 0); + m_key = ib_port_info_get_m_key(&physp0->port_info); + } else + m_key = ib_port_info_get_m_key(&p->port_info); + + switch (attr_id){ + case IB_MAD_ATTR_SLVL_TABLE: + context.slvl_context.node_guid = osm_node_get_node_guid(p_node); + context.slvl_context.port_guid = osm_physp_get_port_guid(p); + context.slvl_context.set_method = TRUE; + if ((attr_mod & 0x30000) != 0) /* optimized ? */ + timeout = sm->p_subn->opt.long_transaction_timeout; + break; + case IB_MAD_ATTR_VL_ARBITRATION: + context.vla_context.node_guid = osm_node_get_node_guid(p_node); + context.vla_context.port_guid = osm_physp_get_port_guid(p); + context.vla_context.set_method = TRUE; + break; + default: + return NULL; + } + + p_mad = (qos_mad_item_t *) malloc(sizeof(*p_mad)); + if (!p_mad) + return NULL; + + memset(p_mad, 0, sizeof(*p_mad)); + + p_madw = osm_prepare_req_set(sm, osm_physp_get_dr_path_ptr(p), + p_data, data_size, + attr_id, cl_hton32(attr_mod), + FALSE, m_key, + timeout, CL_DISP_MSGID_NONE, &context); + + if (p_madw == NULL) { + free(p_mad); + return NULL; + } + p_mad->p_madw = p_madw; + return p_mad; +} + +static void osm_qos_mad_delete(qos_mad_item_t ** p_item) +{ + free(*p_item); + *p_item = NULL; +} + +static ib_api_status_t vlarb_update_table_block(osm_sm_t * sm, + osm_physp_t * p, + uint8_t port_num, + unsigned force_update, + const ib_vl_arb_table_t * + table_block, + unsigned block_length, + unsigned block_num, + cl_qlist_t *mad_list) +{ + struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used; + ib_vl_arb_table_t block; + uint32_t attr_mod; + unsigned vl_mask, i; + qos_mad_item_t *p_mad; + vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1; + + memset(&block, 0, sizeof(block)); + memcpy(&block, table_block, block_length * sizeof(block.vl_entry[0])); + + if (re && re->update_vlarb) + re->update_vlarb(re->context, p, port_num, &block, + block_length, block_num); + + for (i = 0; i < block_length; i++) + block.vl_entry[i].vl &= vl_mask; + + if (!force_update && + !memcmp(&p->vl_arb[block_num], &block, + block_length * sizeof(block.vl_entry[0]))) + return IB_SUCCESS; + + attr_mod = ((block_num + 1) << 16) | port_num; + + p_mad = osm_qos_mad_create(sm,p,sizeof(block),(uint8_t *) & block, + IB_MAD_ATTR_VL_ARBITRATION, attr_mod); + + if (!p_mad) + return IB_INSUFFICIENT_MEMORY; + + /* + * Zero the stored VL Arbitration block, so in case the MAD will + * end up with error, we will resend it in the next sweep. + */ + memset(&p->vl_arb[block_num], 0, + block_length * sizeof(block.vl_entry[0])); + + cl_qlist_insert_tail(mad_list, &p_mad->list_item); + + return IB_SUCCESS; +} + +static ib_api_status_t vlarb_update(osm_sm_t * sm, osm_physp_t * p, + uint8_t port_num, unsigned force_update, + const struct qos_config *qcfg, + cl_qlist_t *mad_list) +{ + ib_api_status_t status = IB_SUCCESS; + ib_port_info_t *p_pi = &p->port_info; + unsigned len; + + if (ib_port_info_get_vl_cap(p_pi) == 1) { /* no VLArb if 1 data VL */ + p->vl_high_limit = 0; + return status; + } + if (p_pi->vl_arb_low_cap > 0) { + len = p_pi->vl_arb_low_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ? + p_pi->vl_arb_low_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; + if ((status = vlarb_update_table_block(sm, p, port_num, + force_update, + &qcfg->vlarb_low[0], + len, 0, + mad_list)) != IB_SUCCESS) + return status; + } + if (p_pi->vl_arb_low_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) { + len = p_pi->vl_arb_low_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; + if ((status = vlarb_update_table_block(sm, p, port_num, + force_update, + &qcfg->vlarb_low[1], + len, 1, + mad_list)) != IB_SUCCESS) + return status; + } + if (p_pi->vl_arb_high_cap > 0) { + len = p_pi->vl_arb_high_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ? + p_pi->vl_arb_high_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; + if ((status = vlarb_update_table_block(sm, p, port_num, + force_update, + &qcfg->vlarb_high[0], + len, 2, + mad_list)) != IB_SUCCESS) + return status; + } + if (p_pi->vl_arb_high_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) { + len = p_pi->vl_arb_high_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; + if ((status = vlarb_update_table_block(sm, p, port_num, + force_update, + &qcfg->vlarb_high[1], + len, 3, + mad_list)) != IB_SUCCESS) + return status; + } + + return status; +} + +static ib_api_status_t sl2vl_update_table(osm_sm_t * sm, osm_physp_t * p, + uint8_t in_port, uint32_t attr_mod, + unsigned force_update, + const ib_slvl_table_t * sl2vl_table, + cl_qlist_t *mad_list) +{ + ib_slvl_table_t tbl, *p_tbl; + unsigned vl_mask; + uint8_t vl1, vl2; + int i; + qos_mad_item_t *p_mad; + + vl_mask = (1 << (ib_port_info_get_op_vls(&p->port_info) - 1)) - 1; + + for (i = 0; i < IB_MAX_NUM_VLS / 2; i++) { + vl1 = sl2vl_table->raw_vl_by_sl[i] >> 4; + vl2 = sl2vl_table->raw_vl_by_sl[i] & 0xf; + if (vl1 != 15) + vl1 &= vl_mask; + if (vl2 != 15) + vl2 &= vl_mask; + tbl.raw_vl_by_sl[i] = (vl1 << 4) | vl2; + } + + p_tbl = osm_physp_get_slvl_tbl(p, in_port); + + if (!force_update && !memcmp(p_tbl, &tbl, sizeof(tbl))) + return IB_SUCCESS; + + p_mad = osm_qos_mad_create(sm, p, sizeof(tbl), (uint8_t *) & tbl, + IB_MAD_ATTR_SLVL_TABLE, attr_mod); + if (!p_mad) + return IB_INSUFFICIENT_MEMORY; + + /* + * Zero the stored SL2VL block, so in case the MAD will + * end up with error, we will resend it in the next sweep. + */ + memset(p_tbl, 0, sizeof(tbl)); + + cl_qlist_insert_tail(mad_list, &p_mad->list_item); + return IB_SUCCESS; +} + +static int qos_extports_setup(osm_sm_t * sm, osm_node_t *node, + const struct qos_config *qcfg, + cl_qlist_t *port_mad_list) + +{ + osm_physp_t *p0, *p; + unsigned force_update; + unsigned num_ports = osm_node_get_num_physp(node); + struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used; + int ret = 0; + unsigned in, out; + uint8_t op_vl, common_op_vl = 0, max_num = 0; + uint8_t op_vl_arr[15]; + + /* + * Do nothing unless the most recent routing attempt was successful. + */ + if (!re) + return ret; + + for (out = 1; out < num_ports; out++) { + p = osm_node_get_physp_ptr(node, out); + if (!p) + continue; + if (ib_port_info_get_port_state(&p->port_info) == IB_LINK_DOWN) + continue; + force_update = p->need_update || sm->p_subn->need_update; + p->vl_high_limit = qcfg->vl_high_limit; + if (vlarb_update(sm, p, p->port_num, force_update, qcfg, + port_mad_list)) + ret = -1; + } + + p0 = osm_node_get_physp_ptr(node, 0); + if (!(p0->port_info.capability_mask & IB_PORT_CAP_HAS_SL_MAP)) + return ret; + + if (ib_switch_info_get_opt_sl2vlmapping(&node->sw->switch_info) && + sm->p_subn->opt.use_optimized_slvl && !re->update_sl2vl) { + + /* we should find the op_vl that is used by majority of ports */ + memset(&op_vl_arr[0], 0, sizeof(op_vl_arr)); + p0 = osm_node_get_physp_ptr(node, 1); + + for (out = 1; out < num_ports; out++) { + p = osm_node_get_physp_ptr(node, out); + if (!p) + continue; + if (ib_port_info_get_port_state(&p->port_info) == + IB_LINK_DOWN) + continue; + op_vl = ib_port_info_get_op_vls(&p->port_info); + op_vl_arr[op_vl]++; + if (op_vl_arr[op_vl] > max_num){ + max_num = op_vl_arr[op_vl]; + common_op_vl = op_vl; + /* remember the port with most common op_vl */ + p0 = p; + } + + } + if (!p0) + return -1; + force_update = node->sw->need_update || sm->p_subn->need_update; + if (sl2vl_update_table(sm, p0, p0->port_num, 0x30000, force_update, + &qcfg->sl2vl, port_mad_list)) + ret = -1; + /* + * Overwrite default ALL configuration if port's + * op_vl is different. + */ + for (out = 1; out < num_ports; out++) { + p = osm_node_get_physp_ptr(node, out); + if (!p) + continue; + if (ib_port_info_get_port_state(&p->port_info) == + IB_LINK_DOWN) + continue; + + force_update = p->need_update || force_update; + if (ib_port_info_get_op_vls(&p->port_info) != + common_op_vl && + sl2vl_update_table(sm, p, p->port_num, 0x20000 | out, + force_update, &qcfg->sl2vl, + port_mad_list)) + ret = -1; + } + return ret; + } + + /* non optimized sl2vl configuration */ + out = ib_switch_info_is_enhanced_port0(&node->sw->switch_info) ? 0 : 1; + for (; out < num_ports; out++) { + p = osm_node_get_physp_ptr(node, out); + if (!p) + continue; + if (ib_port_info_get_port_state(&p->port_info) == IB_LINK_DOWN) + continue; + force_update = p->need_update || sm->p_subn->need_update; + /* go over all in ports */ + for (in = 0; in < num_ports; in++) { + const ib_slvl_table_t *port_sl2vl = &qcfg->sl2vl; + ib_slvl_table_t routing_sl2vl; + + if (re->update_sl2vl) { + routing_sl2vl = *port_sl2vl; + re->update_sl2vl(re->context, + p, in, out, &routing_sl2vl); + port_sl2vl = &routing_sl2vl; + } + if (sl2vl_update_table(sm, p, in, in << 8 | out, + force_update, port_sl2vl, + port_mad_list)) + ret = -1; + } + } + + return ret; +} + +static int qos_endport_setup(osm_sm_t * sm, osm_physp_t * p, + const struct qos_config *qcfg, int vlarb_only, + cl_qlist_t *port_mad_list) +{ + unsigned force_update = p->need_update || sm->p_subn->need_update; + struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used; + const ib_slvl_table_t *port_sl2vl = &qcfg->sl2vl; + ib_slvl_table_t routing_sl2vl; + + p->vl_high_limit = qcfg->vl_high_limit; + if (vlarb_update(sm, p, 0, force_update, qcfg, port_mad_list)) + return -1; + if (vlarb_only) + return 0; + + if (!(p->port_info.capability_mask & IB_PORT_CAP_HAS_SL_MAP)) + return 0; + + if (re && re->update_sl2vl) { + routing_sl2vl = *port_sl2vl; + re->update_sl2vl(re->context, p, 0, 0, &routing_sl2vl); + port_sl2vl = &routing_sl2vl; + } + if (sl2vl_update_table(sm, p, 0, 0, force_update, port_sl2vl, + port_mad_list)) + return -1; + + return 0; +} + +int osm_qos_setup(osm_opensm_t * p_osm) +{ + struct qos_config ca_config, sw0_config, swe_config, rtr_config; + struct qos_config *cfg; + cl_qmap_t *p_tbl; + cl_map_item_t *p_next; + osm_port_t *p_port; + osm_node_t *p_node; + int ret = 0; + int vlarb_only; + qos_mad_list_t *p_list, *p_list_next; + qos_mad_item_t *p_port_mad; + cl_qlist_t qos_mad_list; + + if (!p_osm->subn.opt.qos) + return 0; + + OSM_LOG_ENTER(&p_osm->log); + + qos_build_config(&ca_config, &p_osm->subn.opt.qos_ca_options, + &p_osm->subn.opt.qos_options); + qos_build_config(&sw0_config, &p_osm->subn.opt.qos_sw0_options, + &p_osm->subn.opt.qos_options); + qos_build_config(&swe_config, &p_osm->subn.opt.qos_swe_options, + &p_osm->subn.opt.qos_options); + qos_build_config(&rtr_config, &p_osm->subn.opt.qos_rtr_options, + &p_osm->subn.opt.qos_options); + + cl_qlist_init(&qos_mad_list); + + cl_plock_excl_acquire(&p_osm->lock); + + /* read QoS policy config file */ + osm_qos_parse_policy_file(&p_osm->subn); + p_tbl = &p_osm->subn.port_guid_tbl; + p_next = cl_qmap_head(p_tbl); + while (p_next != cl_qmap_end(p_tbl)) { + vlarb_only = 0; + p_port = (osm_port_t *) p_next; + p_next = cl_qmap_next(p_next); + + p_list = (qos_mad_list_t *) malloc(sizeof(*p_list)); + if (!p_list) { + cl_plock_release(&p_osm->lock); + return -1; + } + + memset(p_list, 0, sizeof(*p_list)); + + cl_qlist_init(&p_list->port_mad_list); + + p_node = p_port->p_node; + if (p_node->sw) { + if (qos_extports_setup(&p_osm->sm, p_node, &swe_config, + &p_list->port_mad_list)) { + cl_plock_release(&p_osm->lock); + ret = -1; + } + + /* skip base port 0 */ + if (!ib_switch_info_is_enhanced_port0 + (&p_node->sw->switch_info)) + goto Continue; + + if (ib_switch_info_get_opt_sl2vlmapping(&p_node->sw->switch_info) && + p_osm->sm.p_subn->opt.use_optimized_slvl && + !memcmp(&swe_config.sl2vl, &sw0_config.sl2vl, + sizeof(swe_config.sl2vl))) + vlarb_only = 1; + + cfg = &sw0_config; + } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_ROUTER) + cfg = &rtr_config; + else + cfg = &ca_config; + + if (qos_endport_setup(&p_osm->sm, p_port->p_physp, cfg, + vlarb_only, &p_list->port_mad_list)) { + cl_plock_release(&p_osm->lock); + ret = -1; + } +Continue: + /* if MAD list is not empty, add it to the global MAD list */ + if (cl_qlist_count(&p_list->port_mad_list)) { + cl_qlist_insert_tail(&qos_mad_list, &p_list->list_item); + } else { + free(p_list); + } + } + while (cl_qlist_count(&qos_mad_list)) { + p_list_next = (qos_mad_list_t *) cl_qlist_head(&qos_mad_list); + while (p_list_next != + (qos_mad_list_t *) cl_qlist_end(&qos_mad_list)) { + p_list = p_list_next; + p_list_next = (qos_mad_list_t *) + cl_qlist_next(&p_list->list_item); + /* next MAD to send*/ + p_port_mad = (qos_mad_item_t *) + cl_qlist_remove_head(&p_list->port_mad_list); + osm_send_req_mad(&p_osm->sm, p_port_mad->p_madw); + osm_qos_mad_delete(&p_port_mad); + /* remove the QoS MAD from global MAD list */ + if (cl_qlist_count(&p_list->port_mad_list) == 0) { + cl_qlist_remove_item(&qos_mad_list, &p_list->list_item); + free(p_list); + } + } + } + + cl_plock_release(&p_osm->lock); + OSM_LOG_EXIT(&p_osm->log); + + return ret; +} + +/* + * QoS config stuff + */ +static int parse_one_unsigned(const char *str, char delim, unsigned *val) +{ + char *end; + *val = strtoul(str, &end, 0); + if (*end) + end++; + return (int)(end - str); +} + +static int parse_vlarb_entry(const char *str, ib_vl_arb_element_t * e) +{ + unsigned val; + const char *p = str; + p += parse_one_unsigned(p, ':', &val); + e->vl = val % 15; + p += parse_one_unsigned(p, ',', &val); + e->weight = (uint8_t) val; + return (int)(p - str); +} + +static int parse_sl2vl_entry(const char *str, uint8_t * raw) +{ + unsigned val1, val2; + const char *p = str; + p += parse_one_unsigned(p, ',', &val1); + p += parse_one_unsigned(p, ',', &val2); + *raw = (val1 << 4) | (val2 & 0xf); + return (int)(p - str); +} + +static void qos_build_config(struct qos_config *cfg, osm_qos_options_t * opt, + osm_qos_options_t * dflt) +{ + int i; + const char *p; + + memset(cfg, 0, sizeof(*cfg)); + + if (opt->max_vls > 0) + cfg->max_vls = opt->max_vls; + else { + if (dflt->max_vls > 0) + cfg->max_vls = dflt->max_vls; + else + cfg->max_vls = OSM_DEFAULT_QOS_MAX_VLS; + } + + if (opt->high_limit >= 0) + cfg->vl_high_limit = (uint8_t) opt->high_limit; + else { + if (dflt->high_limit >= 0) + cfg->vl_high_limit = (uint8_t) dflt->high_limit; + else + cfg->vl_high_limit = (uint8_t) OSM_DEFAULT_QOS_HIGH_LIMIT; + } + + if (opt->vlarb_high) + p = opt->vlarb_high; + else { + if (dflt->vlarb_high) + p = dflt->vlarb_high; + else + p = OSM_DEFAULT_QOS_VLARB_HIGH; + } + for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) { + p += parse_vlarb_entry(p, + &cfg->vlarb_high[i / + IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]. + vl_entry[i % + IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]); + } + + if (opt->vlarb_low) + p = opt->vlarb_low; + else { + if (dflt->vlarb_low) + p = dflt->vlarb_low; + else + p = OSM_DEFAULT_QOS_VLARB_LOW; + } + for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) { + p += parse_vlarb_entry(p, + &cfg->vlarb_low[i / + IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]. + vl_entry[i % + IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]); + } + + p = opt->sl2vl ? opt->sl2vl : dflt->sl2vl; + if (opt->sl2vl) + p = opt->sl2vl; + else { + if (dflt->sl2vl) + p = dflt->sl2vl; + else + p = OSM_DEFAULT_QOS_SL2VL; + } + for (i = 0; i < IB_MAX_NUM_VLS / 2; i++) + p += parse_sl2vl_entry(p, &cfg->sl2vl.raw_vl_by_sl[i]); +} diff --git a/opensm/osm_qos_parser_l.l b/opensm/osm_qos_parser_l.l new file mode 100644 index 0000000..6054397 --- /dev/null +++ b/opensm/osm_qos_parser_l.l @@ -0,0 +1,404 @@ +%{ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Lexer of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#define FILE_ID OSM_FILE_QOS_PARSER_L_L +#include +#include "osm_qos_parser_y.h" + +#define HANDLE_IF_IN_DESCRIPTION if (in_description) { yylval = strdup(yytext); return TK_TEXT; } + +#define SAVE_POS save_pos() +static void save_pos(); + +extern int column_num; +extern int line_num; +extern FILE * yyin; +extern YYSTYPE yylval; + +boolean_t in_description = FALSE; +boolean_t in_list_of_hex_num_ranges = FALSE; +boolean_t in_node_type = FALSE; +boolean_t in_list_of_numbers = FALSE; +boolean_t in_list_of_strings = FALSE; +boolean_t in_list_of_num_pairs = FALSE; +boolean_t in_asterisk_or_list_of_numbers = FALSE; +boolean_t in_list_of_num_ranges = FALSE; +boolean_t in_single_string = FALSE; +boolean_t in_single_number = FALSE; + +static void reset_new_line_flags(); +#define RESET_NEW_LINE_FLAGS reset_new_line_flags() + +#define START_USE {in_description = TRUE;} /* list of strings including whitespace (description) */ +#define START_PORT_GUID {in_list_of_hex_num_ranges = TRUE;} /* comma-separated list of hex num ranges */ +#define START_PORT_NAME {in_list_of_strings = TRUE;} /* comma-separated list of following strings: ../../.. */ +#define START_PARTITION {in_single_string = TRUE;} /* single string w/o whitespaces (partition name) */ +#define START_NAME {in_single_string = TRUE;} /* single string w/o whitespaces (port group name) */ +#define START_QOS_LEVEL_NAME {in_single_string = TRUE;} /* single string w/o whitespaces (qos level name in match rule) */ + +#define START_NODE_TYPE {in_node_type = TRUE;} /* comma-separated list of node types (ROUTER,CA,...) */ +#define START_SL2VL_TABLE {in_list_of_numbers = TRUE;} /* comma-separated list of hex or dec numbers */ + +#define START_GROUP {in_list_of_strings = TRUE;} /* list of strings w/o whitespaces (group names) */ +#define START_ACROSS {in_list_of_strings = TRUE;} /* list of strings w/o whitespaces (group names) */ +#define START_ACROSS_TO {in_list_of_strings = TRUE;} /* list of strings w/o whitespaces (group names) */ +#define START_ACROSS_FROM {in_list_of_strings = TRUE;} /* list of strings w/o whitespaces (group names) */ +#define START_SOURCE {in_list_of_strings = TRUE;} /* list of strings w/o whitespaces (group names) */ +#define START_DESTINATION {in_list_of_strings = TRUE;} /* list of strings w/o whitespaces (group names) */ + +#define START_VLARB_HIGH {in_list_of_num_pairs = TRUE;} /* comma-separated list of hex or dec num pairs: "num1:num2" */ +#define START_VLARB_LOW {in_list_of_num_pairs = TRUE;} /* comma-separated list of hex or dec num pairs: "num1:num2" */ + +#define START_TO {in_asterisk_or_list_of_numbers = TRUE;} /* (asterisk) or (comma-separated list of hex or dec numbers) */ +#define START_FROM {in_asterisk_or_list_of_numbers = TRUE;} /* (asterisk) or (comma-separated list of hex or dec numbers) */ + +#define START_PATH_BITS {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_QOS_CLASS {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_SERVICE_ID {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_PKEY {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ + +#define START_SL {in_single_number = TRUE;} /* single number */ +#define START_VLARB_HIGH_LIMIT {in_single_number = TRUE;} /* single number */ +#define START_MTU_LIMIT {in_single_number = TRUE;} /* single number */ +#define START_RATE_LIMIT {in_single_number = TRUE;} /* single number */ +#define START_PACKET_LIFE {in_single_number = TRUE;} /* single number */ + +#define START_ULP_DEFAULT {in_single_number = TRUE;} /* single number */ +#define START_ULP_ANY {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_ULP_SDP_DEFAULT {in_single_number = TRUE;} /* single number */ +#define START_ULP_SDP_PORT {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_ULP_RDS_DEFAULT {in_single_number = TRUE;} /* single number */ +#define START_ULP_RDS_PORT {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_ULP_ISER_DEFAULT {in_single_number = TRUE;} /* single number */ +#define START_ULP_ISER_PORT {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_ULP_SRP_GUID {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ +#define START_ULP_IPOIB_DEFAULT {in_single_number = TRUE;} /* single number */ +#define START_ULP_IPOIB_PKEY {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ + + +%} + +%option nounput noinput + +QOS_ULPS_START qos\-ulps +QOS_ULPS_END end\-qos\-ulps +PORT_GROUPS_START port\-groups +PORT_GROUPS_END end\-port\-groups +PORT_GROUP_START port\-group +PORT_GROUP_END end\-port\-group +PORT_NUM port\-num +NAME name +USE use +PORT_GUID port\-guid +TARGET_PORT_GUID target\-port\-guid +SOURCE_PORT_GUID source\-port\-guid +SOURCE_TARGET_PORT_GUID source\-target\-port\-guid +PORT_NAME port\-name +PARTITION partition +NODE_TYPE node\-type +QOS_SETUP_START qos\-setup +QOS_SETUP_END end\-qos\-setup +VLARB_TABLES_START vlarb\-tables +VLARB_TABLES_END end\-vlarb\-tables +VLARB_SCOPE_START vlarb\-scope +VLARB_SCOPE_END end\-vlarb\-scope +GROUP group +ACROSS across +VLARB_HIGH vlarb\-high +VLARB_LOW vlarb\-low +VLARB_HIGH_LIMIT vl\-high\-limit +SL2VL_TABLES_START sl2vl\-tables +SL2VL_TABLES_END end\-sl2vl\-tables +SL2VL_SCOPE_START sl2vl\-scope +SL2VL_SCOPE_END end\-sl2vl\-scope +TO to +FROM from +ACROSS_TO across\-to +ACROSS_FROM across\-from +SL2VL_TABLE sl2vl\-table +QOS_LEVELS_START qos\-levels +QOS_LEVELS_END end\-qos\-levels +QOS_LEVEL_START qos\-level +QOS_LEVEL_END end\-qos\-level +SL sl +MTU_LIMIT mtu\-limit +RATE_LIMIT rate\-limit +PACKET_LIFE packet\-life +PATH_BITS path\-bits +QOS_MATCH_RULES_START qos\-match\-rules +QOS_MATCH_RULES_END end\-qos\-match\-rules +QOS_MATCH_RULE_START qos\-match\-rule +QOS_MATCH_RULE_END end\-qos\-match\-rule +QOS_CLASS qos\-class +SOURCE source +DESTINATION destination +SERVICE_ID service\-id +PKEY pkey +QOS_LEVEL_NAME qos\-level\-name + +ROUTER [Rr][Oo][Uu][Tt][Ee][Rr] +CA [Cc][Aa] +SWITCH [Ss][Ww][Ii][Tt][Cc][Hh] +SELF [Ss][Ee][Ll][Ff] +ALL [Aa][Ll][Ll] + +ULP_SDP [Ss][Dd][Pp] +ULP_SRP [Ss][Rr][Pp] +ULP_RDS [Rr][Dd][Ss] +ULP_IPOIB [Ii][Pp][Oo][Ii][Bb] +ULP_ISER [Ii][Ss][Ee][Rr] +ULP_ANY [Aa][Nn][Yy] +ULP_DEFAULT [Dd][Ee][Ff][Aa][Uu][Ll][Tt] + +WHITE [ \t]+ +NEW_LINE \n +COMMENT \#.*\n +WHITE_DOTDOT_WHITE [ \t]*:[ \t]* +WHITE_COMMA_WHITE [ \t]*,[ \t]* +QUOTED_TEXT \"[^\"]*\" + +%% + + +{COMMENT} { SAVE_POS; RESET_NEW_LINE_FLAGS; } /* swallow comment */ +{WHITE}{NEW_LINE} { SAVE_POS; RESET_NEW_LINE_FLAGS; } /* trailing blanks with new line */ +{WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; } +{NEW_LINE} { SAVE_POS; RESET_NEW_LINE_FLAGS; } + +{QOS_ULPS_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_ULPS_START; } +{QOS_ULPS_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_ULPS_END; } + +{PORT_GROUPS_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_PORT_GROUPS_START; } +{PORT_GROUPS_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_PORT_GROUPS_END; } +{PORT_GROUP_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_PORT_GROUP_START; } +{PORT_GROUP_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_PORT_GROUP_END; } + +{QOS_SETUP_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_SETUP_START; } +{QOS_SETUP_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_SETUP_END; } +{VLARB_TABLES_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_VLARB_TABLES_START; } +{VLARB_TABLES_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_VLARB_TABLES_END; } +{VLARB_SCOPE_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_VLARB_SCOPE_START; } +{VLARB_SCOPE_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_VLARB_SCOPE_END; } + +{SL2VL_TABLES_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_SL2VL_TABLES_START; } +{SL2VL_TABLES_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_SL2VL_TABLES_END; } +{SL2VL_SCOPE_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_SL2VL_SCOPE_START; } +{SL2VL_SCOPE_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_SL2VL_SCOPE_END; } + +{QOS_LEVELS_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_LEVELS_START; } +{QOS_LEVELS_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_LEVELS_END; } +{QOS_LEVEL_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_LEVEL_START; } +{QOS_LEVEL_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_LEVEL_END; } + +{QOS_MATCH_RULES_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_MATCH_RULES_START; } +{QOS_MATCH_RULES_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_MATCH_RULES_END; } +{QOS_MATCH_RULE_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_MATCH_RULE_START; } +{QOS_MATCH_RULE_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_MATCH_RULE_END; } + +{PORT_GUID}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_PORT_GUID; return TK_PORT_GUID; } +{PORT_NAME}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_PORT_NAME; return TK_PORT_NAME; } +{PARTITION}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_PARTITION; return TK_PARTITION; } +{NODE_TYPE}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_NODE_TYPE; return TK_NODE_TYPE; } +{NAME}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_NAME; return TK_NAME; } +{USE}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_USE; return TK_USE; } +{GROUP}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_GROUP; return TK_GROUP; } +{VLARB_HIGH}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_VLARB_HIGH; return TK_VLARB_HIGH; } +{VLARB_LOW}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_VLARB_LOW; return TK_VLARB_LOW; } +{VLARB_HIGH_LIMIT}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_VLARB_HIGH_LIMIT; return TK_VLARB_HIGH_LIMIT;} +{TO}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_TO; return TK_TO; } +{FROM}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_FROM; return TK_FROM; } +{ACROSS_TO}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ACROSS_TO; return TK_ACROSS_TO; } +{ACROSS_FROM}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ACROSS_FROM; return TK_ACROSS_FROM;} +{ACROSS}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ACROSS; return TK_ACROSS; } +{SL2VL_TABLE}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_SL2VL_TABLE; return TK_SL2VL_TABLE;} +{SL}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_SL; return TK_SL; } +{MTU_LIMIT}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_MTU_LIMIT; return TK_MTU_LIMIT; } +{RATE_LIMIT}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_RATE_LIMIT; return TK_RATE_LIMIT; } +{PACKET_LIFE}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_PACKET_LIFE; return TK_PACKET_LIFE;} +{PATH_BITS}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_PATH_BITS; return TK_PATH_BITS; } +{QOS_CLASS}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_QOS_CLASS; return TK_QOS_CLASS; } +{SOURCE}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_SOURCE; return TK_SOURCE; } +{DESTINATION}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_DESTINATION; return TK_DESTINATION;} +{SERVICE_ID}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_SERVICE_ID; return TK_SERVICE_ID; } +{PKEY}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_PKEY; return TK_PKEY; } +{QOS_LEVEL_NAME}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_QOS_LEVEL_NAME; return TK_QOS_LEVEL_NAME;} + +{ROUTER} { SAVE_POS; if (in_node_type) return TK_NODE_TYPE_ROUTER; yylval = strdup(yytext); return TK_TEXT; } +{CA} { SAVE_POS; if (in_node_type) return TK_NODE_TYPE_CA; yylval = strdup(yytext); return TK_TEXT; } +{SWITCH} { SAVE_POS; if (in_node_type) return TK_NODE_TYPE_SWITCH; yylval = strdup(yytext); return TK_TEXT; } +{SELF} { SAVE_POS; if (in_node_type) return TK_NODE_TYPE_SELF; yylval = strdup(yytext); return TK_TEXT; } +{ALL} { SAVE_POS; if (in_node_type) return TK_NODE_TYPE_ALL; yylval = strdup(yytext); return TK_TEXT; } + +{ULP_DEFAULT}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_DEFAULT; return TK_ULP_DEFAULT; } +{ULP_ANY}{WHITE_COMMA_WHITE}{SERVICE_ID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_SERVICE_ID; } +{ULP_ANY}{WHITE_COMMA_WHITE}{PKEY} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_PKEY; } +{ULP_ANY}{WHITE_COMMA_WHITE}{TARGET_PORT_GUID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_TARGET_PORT_GUID; } +{ULP_ANY}{WHITE_COMMA_WHITE}{SOURCE_PORT_GUID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_SOURCE_PORT_GUID; } +{ULP_ANY}{WHITE_COMMA_WHITE}{SOURCE_TARGET_PORT_GUID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_SOURCE_TARGET_PORT_GUID; } + +{ULP_SDP}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_DEFAULT; return TK_ULP_SDP_DEFAULT; } +{ULP_SDP}{WHITE_COMMA_WHITE}{PORT_NUM} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_PORT; return TK_ULP_SDP_PORT; } + +{ULP_RDS}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_RDS_DEFAULT; return TK_ULP_RDS_DEFAULT; } +{ULP_RDS}{WHITE_COMMA_WHITE}{PORT_NUM} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_RDS_PORT; return TK_ULP_RDS_PORT; } + +{ULP_ISER}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_DEFAULT; return TK_ULP_ISER_DEFAULT; } +{ULP_ISER}{WHITE_COMMA_WHITE}{PORT_NUM} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_PORT; return TK_ULP_ISER_PORT; } + +{ULP_SRP}{WHITE_COMMA_WHITE}{TARGET_PORT_GUID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SRP_GUID; return TK_ULP_SRP_GUID; } + +{ULP_IPOIB}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_IPOIB_DEFAULT; return TK_ULP_IPOIB_DEFAULT; } +{ULP_IPOIB}{WHITE_COMMA_WHITE}{PKEY} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_IPOIB_PKEY; return TK_ULP_IPOIB_PKEY; } + +0[xX][0-9a-fA-F]+ { + SAVE_POS; + yylval = strdup(yytext); + if (in_description || in_list_of_strings || in_single_string) + return TK_TEXT; + return TK_NUMBER; + } + +[0-9]+ { + SAVE_POS; + yylval = strdup(yytext); + if (in_description || in_list_of_strings || in_single_string) + return TK_TEXT; + return TK_NUMBER; + } + + +- { + SAVE_POS; + if (in_description || in_list_of_strings || in_single_string) + { + yylval = strdup(yytext); + return TK_TEXT; + } + return TK_DASH; + } + +: { + SAVE_POS; + if (in_description || in_list_of_strings || in_single_string) + { + yylval = strdup(yytext); + return TK_TEXT; + } + return TK_DOTDOT; + } + +, { + SAVE_POS; + if (in_description) + { + yylval = strdup(yytext); + return TK_TEXT; + } + return TK_COMMA; + } + +\* { + SAVE_POS; + if (in_description || in_list_of_strings || in_single_string) + { + yylval = strdup(yytext); + return TK_TEXT; + } + return TK_ASTERISK; + } + +{QUOTED_TEXT} { + SAVE_POS; + yylval = strdup(&yytext[1]); + yylval[strlen(yylval)-1] = '\0'; + return TK_TEXT; + } + +. { SAVE_POS; yylval = strdup(yytext); return TK_TEXT;} + +<> { + YY_NEW_FILE; + yyterminate(); + } +%% + + +/********************************************* + *********************************************/ + +static void save_pos() +{ + int i; + for (i = 0; i < yyleng; i++) + { + if (yytext[i] == '\n') + { + line_num ++; + column_num = 1; + } + else + column_num ++; + } +} + +/********************************************* + *********************************************/ + +static void reset_new_line_flags() +{ + in_description = FALSE; + in_list_of_hex_num_ranges = FALSE; + in_node_type = FALSE; + in_list_of_numbers = FALSE; + in_list_of_strings = FALSE; + in_list_of_num_pairs = FALSE; + in_asterisk_or_list_of_numbers = FALSE; + in_list_of_num_ranges = FALSE; + in_single_string = FALSE; + in_single_number = FALSE; +} diff --git a/opensm/osm_qos_parser_y.y b/opensm/osm_qos_parser_y.y new file mode 100644 index 0000000..b860242 --- /dev/null +++ b/opensm/osm_qos_parser_y.y @@ -0,0 +1,3179 @@ +%{ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Grammar of OSM QoS parser. + * + * Environment: + * Linux User Mode + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_QOS_PARSER_Y_Y +#include +#include + +#define OSM_QOS_POLICY_MAX_LINE_LEN 1024*10 +#define OSM_QOS_POLICY_SL2VL_TABLE_LEN IB_MAX_NUM_VLS +#define OSM_QOS_POLICY_MAX_VL_NUM IB_MAX_NUM_VLS +#define OSM_QOS_POLICY_MAX_RATE IB_MAX_RATE +#define OSM_QOS_POLICY_MIN_RATE IB_MIN_RATE +#define OSM_QOS_POLICY_MAX_MTU IB_MAX_MTU +#define OSM_QOS_POLICY_MIN_MTU IB_MIN_MTU + +typedef struct tmp_parser_struct_t_ { + char str[OSM_QOS_POLICY_MAX_LINE_LEN]; + uint64_t num_pair[2]; + cl_list_t str_list; + cl_list_t num_list; + cl_list_t num_pair_list; +} tmp_parser_struct_t; + +static void __parser_tmp_struct_init(); +static void __parser_tmp_struct_reset(); +static void __parser_tmp_struct_destroy(); + +static char * __parser_strip_white(char * str); + +static void __parser_str2uint64(uint64_t * p_val, char * str); + +static void __parser_port_group_start(); +static int __parser_port_group_end(); + +static void __parser_sl2vl_scope_start(); +static int __parser_sl2vl_scope_end(); + +static void __parser_vlarb_scope_start(); +static int __parser_vlarb_scope_end(); + +static void __parser_qos_level_start(); +static int __parser_qos_level_end(); + +static void __parser_match_rule_start(); +static int __parser_match_rule_end(); + +static void __parser_ulp_match_rule_start(); +static int __parser_ulp_match_rule_end(); + +static void __pkey_rangelist2rangearr( + cl_list_t * p_list, + uint64_t ** * p_arr, + unsigned * p_arr_len); + +static void __rangelist2rangearr( + cl_list_t * p_list, + uint64_t ** * p_arr, + unsigned * p_arr_len); + +static void __merge_rangearr( + uint64_t ** range_arr_1, + unsigned range_len_1, + uint64_t ** range_arr_2, + unsigned range_len_2, + uint64_t ** * p_arr, + unsigned * p_arr_len ); + +static void __parser_add_port_to_port_map( + cl_qmap_t * p_map, + osm_physp_t * p_physp); + +static void __parser_add_guid_range_to_port_map( + cl_qmap_t * p_map, + uint64_t ** range_arr, + unsigned range_len); + +static void __parser_add_pkey_range_to_port_map( + cl_qmap_t * p_map, + uint64_t ** range_arr, + unsigned range_len); + +static void __parser_add_partition_list_to_port_map( + cl_qmap_t * p_map, + cl_list_t * p_list); + +static void __parser_add_map_to_port_map( + cl_qmap_t * p_dmap, + cl_map_t * p_smap); + +static int __validate_pkeys( + uint64_t ** range_arr, + unsigned range_len, + boolean_t is_ipoib); + +static void __setup_simple_qos_levels(); +static void __clear_simple_qos_levels(); +static void __setup_ulp_match_rules(); +static void __process_ulp_match_rules(); +static void yyerror(const char *format, ...); + +extern char * yytext; +extern int yylex (void); +extern FILE * yyin; +extern int errno; +extern void yyrestart(FILE *input_file); +int yyparse(); + +#define RESET_BUFFER __parser_tmp_struct_reset() + +tmp_parser_struct_t tmp_parser_struct; + +int column_num; +int line_num; + +osm_qos_policy_t * p_qos_policy = NULL; +osm_qos_port_group_t * p_current_port_group = NULL; +osm_qos_sl2vl_scope_t * p_current_sl2vl_scope = NULL; +osm_qos_vlarb_scope_t * p_current_vlarb_scope = NULL; +osm_qos_level_t * p_current_qos_level = NULL; +osm_qos_match_rule_t * p_current_qos_match_rule = NULL; +osm_log_t * p_qos_parser_osm_log; + +/* 16 Simple QoS Levels - one for each SL */ +static osm_qos_level_t osm_qos_policy_simple_qos_levels[16]; + +/* Default Simple QoS Level */ +osm_qos_level_t __default_simple_qos_level; + +/* + * List of match rules that will be generated by the + * qos-ulp section. These rules are concatenated to + * the end of the usual matching rules list at the + * end of parsing. + */ +static cl_list_t __ulp_match_rules; + +/***************************************************/ + +%} + +%token TK_NUMBER +%token TK_DASH +%token TK_DOTDOT +%token TK_COMMA +%token TK_ASTERISK +%token TK_TEXT + +%token TK_QOS_ULPS_START +%token TK_QOS_ULPS_END + +%token TK_PORT_GROUPS_START +%token TK_PORT_GROUPS_END +%token TK_PORT_GROUP_START +%token TK_PORT_GROUP_END + +%token TK_QOS_SETUP_START +%token TK_QOS_SETUP_END +%token TK_VLARB_TABLES_START +%token TK_VLARB_TABLES_END +%token TK_VLARB_SCOPE_START +%token TK_VLARB_SCOPE_END + +%token TK_SL2VL_TABLES_START +%token TK_SL2VL_TABLES_END +%token TK_SL2VL_SCOPE_START +%token TK_SL2VL_SCOPE_END + +%token TK_QOS_LEVELS_START +%token TK_QOS_LEVELS_END +%token TK_QOS_LEVEL_START +%token TK_QOS_LEVEL_END + +%token TK_QOS_MATCH_RULES_START +%token TK_QOS_MATCH_RULES_END +%token TK_QOS_MATCH_RULE_START +%token TK_QOS_MATCH_RULE_END + +%token TK_NAME +%token TK_USE +%token TK_PORT_GUID +%token TK_PORT_NAME +%token TK_PARTITION +%token TK_NODE_TYPE +%token TK_GROUP +%token TK_ACROSS +%token TK_VLARB_HIGH +%token TK_VLARB_LOW +%token TK_VLARB_HIGH_LIMIT +%token TK_TO +%token TK_FROM +%token TK_ACROSS_TO +%token TK_ACROSS_FROM +%token TK_SL2VL_TABLE +%token TK_SL +%token TK_MTU_LIMIT +%token TK_RATE_LIMIT +%token TK_PACKET_LIFE +%token TK_PATH_BITS +%token TK_QOS_CLASS +%token TK_SOURCE +%token TK_DESTINATION +%token TK_SERVICE_ID +%token TK_QOS_LEVEL_NAME +%token TK_PKEY + +%token TK_NODE_TYPE_ROUTER +%token TK_NODE_TYPE_CA +%token TK_NODE_TYPE_SWITCH +%token TK_NODE_TYPE_SELF +%token TK_NODE_TYPE_ALL + +%token TK_ULP_DEFAULT +%token TK_ULP_ANY_SERVICE_ID +%token TK_ULP_ANY_PKEY +%token TK_ULP_ANY_TARGET_PORT_GUID +%token TK_ULP_ANY_SOURCE_PORT_GUID +%token TK_ULP_ANY_SOURCE_TARGET_PORT_GUID +%token TK_ULP_SDP_DEFAULT +%token TK_ULP_SDP_PORT +%token TK_ULP_RDS_DEFAULT +%token TK_ULP_RDS_PORT +%token TK_ULP_ISER_DEFAULT +%token TK_ULP_ISER_PORT +%token TK_ULP_SRP_GUID +%token TK_ULP_IPOIB_DEFAULT +%token TK_ULP_IPOIB_PKEY + +%start head + +%% + +head: qos_policy_entries + ; + +qos_policy_entries: /* empty */ + | qos_policy_entries qos_policy_entry + ; + +qos_policy_entry: qos_ulps_section + | port_groups_section + | qos_setup_section + | qos_levels_section + | qos_match_rules_section + ; + + /* + * Parsing qos-ulps: + * ------------------- + * qos-ulps + * default : 0 #default SL + * sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 + * sdp, port-num 10000-20000 : 2 + * sdp : 0 #default SL for SDP + * srp, target-port-guid 0x1234 : 2 + * rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 + * rds, : 0 #default SL for RDS + * iser, port-num 900 : 5 #SL for iSER where target port is 900 + * iser : 4 #default SL for iSER + * ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 + * ipoib : 6 #default IPoIB partition - pkey=0x7FFF + * any, service-id 0x6234 : 2 + * any, pkey 0x0ABC : 3 + * any, target-port-guid 0x0ABC-0xFFFFF : 6 + * any, source-port-guid 0x1234 : 7 + * any, source-target-port-guid 0x5678 : 8 + * end-qos-ulps + */ + +qos_ulps_section: TK_QOS_ULPS_START qos_ulps TK_QOS_ULPS_END + ; + +qos_ulps: qos_ulp + | qos_ulps qos_ulp + ; + + /* + * Parsing port groups: + * ------------------- + * port-groups + * port-group + * name: Storage + * use: our SRP storage targets + * port-guid: 0x1000000000000001,0x1000000000000002 + * ... + * port-name: vs1 HCA-1/P1 + * port-name: node_description/P2 + * ... + * pkey: 0x00FF-0x0FFF + * ... + * partition: Part1 + * ... + * node-type: ROUTER,CA,SWITCH,SELF,ALL + * ... + * end-port-group + * port-group + * ... + * end-port-group + * end-port-groups + */ + + +port_groups_section: TK_PORT_GROUPS_START port_groups TK_PORT_GROUPS_END + ; + +port_groups: port_group + | port_groups port_group + ; + +port_group: port_group_start port_group_entries port_group_end + ; + +port_group_start: TK_PORT_GROUP_START { + __parser_port_group_start(); + } + ; + +port_group_end: TK_PORT_GROUP_END { + if ( __parser_port_group_end() ) + return 1; + } + ; + +port_group_entries: /* empty */ + | port_group_entries port_group_entry + ; + +port_group_entry: port_group_name + | port_group_use + | port_group_port_guid + | port_group_port_name + | port_group_pkey + | port_group_partition + | port_group_node_type + ; + + + /* + * Parsing qos setup: + * ----------------- + * qos-setup + * vlarb-tables + * vlarb-scope + * ... + * end-vlarb-scope + * vlarb-scope + * ... + * end-vlarb-scope + * end-vlarb-tables + * sl2vl-tables + * sl2vl-scope + * ... + * end-sl2vl-scope + * sl2vl-scope + * ... + * end-sl2vl-scope + * end-sl2vl-tables + * end-qos-setup + */ + +qos_setup_section: TK_QOS_SETUP_START qos_setup_items TK_QOS_SETUP_END + ; + +qos_setup_items: /* empty */ + | qos_setup_items vlarb_tables + | qos_setup_items sl2vl_tables + ; + + /* Parsing vlarb-tables */ + +vlarb_tables: TK_VLARB_TABLES_START vlarb_scope_items TK_VLARB_TABLES_END + ; + +vlarb_scope_items: /* empty */ + | vlarb_scope_items vlarb_scope + ; + +vlarb_scope: vlarb_scope_start vlarb_scope_entries vlarb_scope_end + ; + +vlarb_scope_start: TK_VLARB_SCOPE_START { + __parser_vlarb_scope_start(); + } + ; + +vlarb_scope_end: TK_VLARB_SCOPE_END { + if ( __parser_vlarb_scope_end() ) + return 1; + } + ; + +vlarb_scope_entries:/* empty */ + | vlarb_scope_entries vlarb_scope_entry + ; + + /* + * vlarb-scope + * group: Storage + * ... + * across: Storage + * ... + * vlarb-high: 0:255,1:127,2:63,3:31,4:15,5:7,6:3,7:1 + * vlarb-low: 8:255,9:127,10:63,11:31,12:15,13:7,14:3 + * vl-high-limit: 10 + * end-vlarb-scope + */ + +vlarb_scope_entry: vlarb_scope_group + | vlarb_scope_across + | vlarb_scope_vlarb_high + | vlarb_scope_vlarb_low + | vlarb_scope_vlarb_high_limit + ; + + /* Parsing sl2vl-tables */ + +sl2vl_tables: TK_SL2VL_TABLES_START sl2vl_scope_items TK_SL2VL_TABLES_END + ; + +sl2vl_scope_items: /* empty */ + | sl2vl_scope_items sl2vl_scope + ; + +sl2vl_scope: sl2vl_scope_start sl2vl_scope_entries sl2vl_scope_end + ; + +sl2vl_scope_start: TK_SL2VL_SCOPE_START { + __parser_sl2vl_scope_start(); + } + ; + +sl2vl_scope_end: TK_SL2VL_SCOPE_END { + if ( __parser_sl2vl_scope_end() ) + return 1; + } + ; + +sl2vl_scope_entries:/* empty */ + | sl2vl_scope_entries sl2vl_scope_entry + ; + + /* + * sl2vl-scope + * group: Part1 + * ... + * from: * + * ... + * to: * + * ... + * across-to: Storage2 + * ... + * across-from: Storage1 + * ... + * sl2vl-table: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + * end-sl2vl-scope + */ + +sl2vl_scope_entry: sl2vl_scope_group + | sl2vl_scope_across + | sl2vl_scope_across_from + | sl2vl_scope_across_to + | sl2vl_scope_from + | sl2vl_scope_to + | sl2vl_scope_sl2vl_table + ; + + /* + * Parsing qos-levels: + * ------------------ + * qos-levels + * qos-level + * name: qos_level_1 + * use: for the lowest priority communication + * sl: 15 + * mtu-limit: 1 + * rate-limit: 1 + * packet-life: 12 + * path-bits: 2,4,8-32 + * pkey: 0x00FF-0x0FFF + * end-qos-level + * ... + * qos-level + * end-qos-level + * end-qos-levels + */ + + +qos_levels_section: TK_QOS_LEVELS_START qos_levels TK_QOS_LEVELS_END + ; + +qos_levels: /* empty */ + | qos_levels qos_level + ; + +qos_level: qos_level_start qos_level_entries qos_level_end + ; + +qos_level_start: TK_QOS_LEVEL_START { + __parser_qos_level_start(); + } + ; + +qos_level_end: TK_QOS_LEVEL_END { + if ( __parser_qos_level_end() ) + return 1; + } + ; + +qos_level_entries: /* empty */ + | qos_level_entries qos_level_entry + ; + +qos_level_entry: qos_level_name + | qos_level_use + | qos_level_sl + | qos_level_mtu_limit + | qos_level_rate_limit + | qos_level_packet_life + | qos_level_path_bits + | qos_level_pkey + ; + + /* + * Parsing qos-match-rules: + * ----------------------- + * qos-match-rules + * qos-match-rule + * use: low latency by class 7-9 or 11 and bla bla + * qos-class: 7-9,11 + * qos-level-name: default + * source: Storage + * destination: Storage + * service-id: 22,4719-5000 + * pkey: 0x00FF-0x0FFF + * end-qos-match-rule + * qos-match-rule + * ... + * end-qos-match-rule + * end-qos-match-rules + */ + +qos_match_rules_section: TK_QOS_MATCH_RULES_START qos_match_rules TK_QOS_MATCH_RULES_END + ; + +qos_match_rules: /* empty */ + | qos_match_rules qos_match_rule + ; + +qos_match_rule: qos_match_rule_start qos_match_rule_entries qos_match_rule_end + ; + +qos_match_rule_start: TK_QOS_MATCH_RULE_START { + __parser_match_rule_start(); + } + ; + +qos_match_rule_end: TK_QOS_MATCH_RULE_END { + if ( __parser_match_rule_end() ) + return 1; + } + ; + +qos_match_rule_entries: /* empty */ + | qos_match_rule_entries qos_match_rule_entry + ; + +qos_match_rule_entry: qos_match_rule_use + | qos_match_rule_qos_class + | qos_match_rule_qos_level_name + | qos_match_rule_source + | qos_match_rule_destination + | qos_match_rule_service_id + | qos_match_rule_pkey + ; + + + /* + * Parsing qos-ulps: + * ----------------- + * default + * sdp + * sdp with port-num + * rds + * rds with port-num + * srp with target-port-guid + * iser + * iser with port-num + * ipoib + * ipoib with pkey + * any with service-id + * any with pkey + * any with target-port-guid + * any with source-port-guid + * any with source-target-port-guid + */ + +qos_ulp: TK_ULP_DEFAULT single_number { + /* parsing default ulp rule: "default: num" */ + cl_list_iterator_t list_iterator; + uint64_t * p_tmp_num; + + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_tmp_num = (uint64_t*)cl_list_obj(list_iterator); + if (*p_tmp_num > 15) + { + yyerror("illegal SL value"); + return 1; + } + __default_simple_qos_level.sl = (uint8_t)(*p_tmp_num); + __default_simple_qos_level.sl_set = TRUE; + free(p_tmp_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + } + + | qos_ulp_type_any_service list_of_ranges TK_DOTDOT { + /* "any, service-id ... : sl" - one instance of list of ranges */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("ULP rule doesn't have service ids"); + return 1; + } + + /* get all the service id ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_any_pkey list_of_ranges TK_DOTDOT { + /* "any, pkey ... : sl" - one instance of list of ranges */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("ULP rule doesn't have pkeys"); + return 1; + } + + /* get all the pkey ranges */ + __pkey_rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_any_target_port_guid list_of_ranges TK_DOTDOT { + /* any, target-port-guid ... : sl */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_ULP_Targets_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the destination + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_any_source_port_guid list_of_ranges TK_DOTDOT { + /* any, source-port-guid ... : sl */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_ULP_Sources_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the source + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->source_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_any_source_target_port_guid list_of_ranges TK_DOTDOT { + /* any, source-target-port-guid ... : sl */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_ULP_Sources_Targets_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the source and destination + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->source_group_list, + p_current_port_group); + + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_sdp_default { + /* "sdp : sl" - default SL for SDP */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + range_arr[0][1] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID + 0xFFFF; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_sdp_port list_of_ranges TK_DOTDOT { + /* sdp with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("SDP ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + yyerror("SDP port number out of range"); + free(range_arr); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_rds_default { + /* "rds : sl" - default SL for RDS */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = range_arr[0][1] = + OSM_QOS_POLICY_ULP_RDS_SERVICE_ID + OSM_QOS_POLICY_ULP_RDS_PORT; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_rds_port list_of_ranges TK_DOTDOT { + /* rds with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("RDS ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + yyerror("SDP port number out of range"); + free(range_arr); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_iser_default { + /* "iSER : sl" - default SL for iSER */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = range_arr[0][1] = + OSM_QOS_POLICY_ULP_ISER_SERVICE_ID + OSM_QOS_POLICY_ULP_ISER_PORT; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_iser_port list_of_ranges TK_DOTDOT { + /* iser with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("iSER ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + yyerror("SDP port number out of range"); + free(range_arr); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_srp_guid list_of_ranges TK_DOTDOT { + /* srp with target guids - this rule is similar + to writing 'any' ulp with target port guids */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("SRP ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_SRP_Targets_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the destination + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_ipoib_default { + /* ipoib w/o any pkeys (default pkey) */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = range_arr[0][1] = 0x7fff; + + /* + * Although we know that the default partition exists, + * we still need to validate it by checking that it has + * at least two full members. Otherwise IPoIB won't work. + */ + if (__validate_pkeys(range_arr, 1, TRUE)) { + free(range_arr[0]); + free(range_arr); + return 1; + } + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_ipoib_pkey list_of_ranges TK_DOTDOT { + /* ipoib with pkeys */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + yyerror("IPoIB ULP rule doesn't have pkeys"); + return 1; + } + + /* get all the pkey ranges */ + __pkey_rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + /* + * Validate pkeys. + * For IPoIB pkeys the validation is strict. + * If some problem would be found, parsing will + * be aborted with a proper error messages. + */ + if (__validate_pkeys(range_arr, range_len, TRUE)) { + free(range_arr); + return 1; + } + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = range_len; + + } qos_ulp_sl + ; + +qos_ulp_type_any_service: TK_ULP_ANY_SERVICE_ID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_pkey: TK_ULP_ANY_PKEY + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_target_port_guid: TK_ULP_ANY_TARGET_PORT_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_source_port_guid: TK_ULP_ANY_SOURCE_PORT_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_source_target_port_guid: TK_ULP_ANY_SOURCE_TARGET_PORT_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_sdp_default: TK_ULP_SDP_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_sdp_port: TK_ULP_SDP_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_rds_default: TK_ULP_RDS_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_rds_port: TK_ULP_RDS_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_iser_default: TK_ULP_ISER_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_iser_port: TK_ULP_ISER_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_srp_guid: TK_ULP_SRP_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_ipoib_default: TK_ULP_IPOIB_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_ipoib_pkey: TK_ULP_IPOIB_PKEY + { __parser_ulp_match_rule_start(); }; + + +qos_ulp_sl: single_number { + /* get the SL for ULP rules */ + cl_list_iterator_t list_iterator; + uint64_t * p_tmp_num; + uint8_t sl; + + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_tmp_num = (uint64_t*)cl_list_obj(list_iterator); + if (*p_tmp_num > 15) + { + yyerror("illegal SL value"); + return 1; + } + + sl = (uint8_t)(*p_tmp_num); + free(p_tmp_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + + p_current_qos_match_rule->p_qos_level = + &osm_qos_policy_simple_qos_levels[sl]; + p_current_qos_match_rule->qos_level_name = + strdup(osm_qos_policy_simple_qos_levels[sl].name); + + if (__parser_ulp_match_rule_end()) + return 1; + } + ; + + /* + * port_group_entry values: + * port_group_name + * port_group_use + * port_group_port_guid + * port_group_port_name + * port_group_pkey + * port_group_partition + * port_group_node_type + */ + +port_group_name: port_group_name_start single_string { + /* 'name' of 'port-group' - one instance */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + if (p_current_port_group->name) + { + yyerror("port-group has multiple 'name' tags"); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + if ( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + p_current_port_group->name = tmp_str; + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +port_group_name_start: TK_NAME { + RESET_BUFFER; + } + ; + +port_group_use: port_group_use_start single_string { + /* 'use' of 'port-group' - one instance */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + if (p_current_port_group->use) + { + yyerror("port-group has multiple 'use' tags"); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + if ( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + p_current_port_group->use = tmp_str; + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +port_group_use_start: TK_USE { + RESET_BUFFER; + } + ; + +port_group_port_name: port_group_port_name_start string_list { + /* 'port-name' in 'port-group' - any num of instances */ + cl_list_iterator_t list_iterator; + osm_node_t * p_node; + osm_physp_t * p_physp; + unsigned port_num; + char * tmp_str; + char * port_str; + + /* parsing port name strings */ + for (list_iterator = cl_list_head(&tmp_parser_struct.str_list); + list_iterator != cl_list_end(&tmp_parser_struct.str_list); + list_iterator = cl_list_next(list_iterator)) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + { + /* last slash in port name string is a separator + between node name and port number */ + port_str = strrchr(tmp_str, '/'); + if (!port_str || (strlen(port_str) < 3) || + (port_str[1] != 'p' && port_str[1] != 'P')) { + yyerror("'%s' - illegal port name", + tmp_str); + free(tmp_str); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + + if (!(port_num = strtoul(&port_str[2],NULL,0))) { + yyerror( + "'%s' - illegal port number in port name", + tmp_str); + free(tmp_str); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + + /* separate node name from port number */ + port_str[0] = '\0'; + + if (st_lookup(p_qos_policy->p_node_hash, + (st_data_t)tmp_str, + (void *)&p_node)) + { + /* we found the node, now get the right port */ + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) { + yyerror( + "'%s' - port number out of range in port name", + tmp_str); + free(tmp_str); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + /* we found the port, now add it to guid table */ + __parser_add_port_to_port_map(&p_current_port_group->port_map, + p_physp); + } + free(tmp_str); + } + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +port_group_port_name_start: TK_PORT_NAME { + RESET_BUFFER; + } + ; + +port_group_port_guid: port_group_port_guid_start list_of_ranges { + /* 'port-guid' in 'port-group' - any num of instances */ + /* list of guid ranges */ + if (cl_list_count(&tmp_parser_struct.num_pair_list)) + { + uint64_t ** range_arr; + unsigned range_len; + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + } + } + ; + +port_group_port_guid_start: TK_PORT_GUID { + RESET_BUFFER; + } + ; + +port_group_pkey: port_group_pkey_start list_of_ranges { + /* 'pkey' in 'port-group' - any num of instances */ + /* list of pkey ranges */ + if (cl_list_count(&tmp_parser_struct.num_pair_list)) + { + uint64_t ** range_arr; + unsigned range_len; + + __pkey_rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_pkey_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + } + } + ; + +port_group_pkey_start: TK_PKEY { + RESET_BUFFER; + } + ; + +port_group_partition: port_group_partition_start string_list { + /* 'partition' in 'port-group' - any num of instances */ + __parser_add_partition_list_to_port_map( + &p_current_port_group->port_map, + &tmp_parser_struct.str_list); + } + ; + +port_group_partition_start: TK_PARTITION { + RESET_BUFFER; + } + ; + +port_group_node_type: port_group_node_type_start port_group_node_type_list { + /* 'node-type' in 'port-group' - any num of instances */ + } + ; + +port_group_node_type_start: TK_NODE_TYPE { + RESET_BUFFER; + } + ; + +port_group_node_type_list: node_type_item + | port_group_node_type_list TK_COMMA node_type_item + ; + +node_type_item: node_type_ca + | node_type_switch + | node_type_router + | node_type_all + | node_type_self + ; + +node_type_ca: TK_NODE_TYPE_CA { + p_current_port_group->node_types |= + OSM_QOS_POLICY_NODE_TYPE_CA; + } + ; + +node_type_switch: TK_NODE_TYPE_SWITCH { + p_current_port_group->node_types |= + OSM_QOS_POLICY_NODE_TYPE_SWITCH; + } + ; + +node_type_router: TK_NODE_TYPE_ROUTER { + p_current_port_group->node_types |= + OSM_QOS_POLICY_NODE_TYPE_ROUTER; + } + ; + +node_type_all: TK_NODE_TYPE_ALL { + p_current_port_group->node_types |= + (OSM_QOS_POLICY_NODE_TYPE_CA | + OSM_QOS_POLICY_NODE_TYPE_SWITCH | + OSM_QOS_POLICY_NODE_TYPE_ROUTER); + } + ; + +node_type_self: TK_NODE_TYPE_SELF { + osm_port_t * p_osm_port = + osm_get_port_by_guid(p_qos_policy->p_subn, + p_qos_policy->p_subn->sm_port_guid); + if (p_osm_port) + __parser_add_port_to_port_map( + &p_current_port_group->port_map, + p_osm_port->p_physp); + } + ; + + /* + * vlarb_scope_entry values: + * vlarb_scope_group + * vlarb_scope_across + * vlarb_scope_vlarb_high + * vlarb_scope_vlarb_low + * vlarb_scope_vlarb_high_limit + */ + + + +vlarb_scope_group: vlarb_scope_group_start string_list { + /* 'group' in 'vlarb-scope' - any num of instances */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + cl_list_insert_tail(&p_current_vlarb_scope->group_list,tmp_str); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +vlarb_scope_group_start: TK_GROUP { + RESET_BUFFER; + } + ; + +vlarb_scope_across: vlarb_scope_across_start string_list { + /* 'across' in 'vlarb-scope' - any num of instances */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + cl_list_insert_tail(&p_current_vlarb_scope->across_list,tmp_str); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +vlarb_scope_across_start: TK_ACROSS { + RESET_BUFFER; + } + ; + +vlarb_scope_vlarb_high_limit: vlarb_scope_vlarb_high_limit_start single_number { + /* 'vl-high-limit' in 'vlarb-scope' - one instance of one number */ + cl_list_iterator_t list_iterator; + uint64_t * p_tmp_num; + + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_tmp_num = (uint64_t*)cl_list_obj(list_iterator); + if (p_tmp_num) + { + p_current_vlarb_scope->vl_high_limit = (uint32_t)(*p_tmp_num); + p_current_vlarb_scope->vl_high_limit_set = TRUE; + free(p_tmp_num); + } + + cl_list_remove_all(&tmp_parser_struct.num_list); + } + ; + +vlarb_scope_vlarb_high_limit_start: TK_VLARB_HIGH_LIMIT { + RESET_BUFFER; + } + ; + +vlarb_scope_vlarb_high: vlarb_scope_vlarb_high_start num_list_with_dotdot { + /* 'vlarb-high' in 'vlarb-scope' - list of pairs of numbers with ':' and ',' */ + cl_list_iterator_t list_iterator; + uint64_t * num_pair; + + list_iterator = cl_list_head(&tmp_parser_struct.num_pair_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.num_pair_list) ) + { + num_pair = (uint64_t*)cl_list_obj(list_iterator); + if (num_pair) + cl_list_insert_tail(&p_current_vlarb_scope->vlarb_high_list,num_pair); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.num_pair_list); + } + ; + +vlarb_scope_vlarb_high_start: TK_VLARB_HIGH { + RESET_BUFFER; + } + ; + +vlarb_scope_vlarb_low: vlarb_scope_vlarb_low_start num_list_with_dotdot { + /* 'vlarb-low' in 'vlarb-scope' - list of pairs of numbers with ':' and ',' */ + cl_list_iterator_t list_iterator; + uint64_t * num_pair; + + list_iterator = cl_list_head(&tmp_parser_struct.num_pair_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.num_pair_list) ) + { + num_pair = (uint64_t*)cl_list_obj(list_iterator); + if (num_pair) + cl_list_insert_tail(&p_current_vlarb_scope->vlarb_low_list,num_pair); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.num_pair_list); + } + ; + +vlarb_scope_vlarb_low_start: TK_VLARB_LOW { + RESET_BUFFER; + } + ; + + /* + * sl2vl_scope_entry values: + * sl2vl_scope_group + * sl2vl_scope_across + * sl2vl_scope_across_from + * sl2vl_scope_across_to + * sl2vl_scope_from + * sl2vl_scope_to + * sl2vl_scope_sl2vl_table + */ + +sl2vl_scope_group: sl2vl_scope_group_start string_list { + /* 'group' in 'sl2vl-scope' - any num of instances */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + cl_list_insert_tail(&p_current_sl2vl_scope->group_list,tmp_str); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +sl2vl_scope_group_start: TK_GROUP { + RESET_BUFFER; + } + ; + +sl2vl_scope_across: sl2vl_scope_across_start string_list { + /* 'across' in 'sl2vl-scope' - any num of instances */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) { + cl_list_insert_tail(&p_current_sl2vl_scope->across_from_list,tmp_str); + cl_list_insert_tail(&p_current_sl2vl_scope->across_to_list,strdup(tmp_str)); + } + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +sl2vl_scope_across_start: TK_ACROSS { + RESET_BUFFER; + } + ; + +sl2vl_scope_across_from: sl2vl_scope_across_from_start string_list { + /* 'across-from' in 'sl2vl-scope' - any num of instances */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + cl_list_insert_tail(&p_current_sl2vl_scope->across_from_list,tmp_str); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +sl2vl_scope_across_from_start: TK_ACROSS_FROM { + RESET_BUFFER; + } + ; + +sl2vl_scope_across_to: sl2vl_scope_across_to_start string_list { + /* 'across-to' in 'sl2vl-scope' - any num of instances */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) { + cl_list_insert_tail(&p_current_sl2vl_scope->across_to_list,tmp_str); + } + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +sl2vl_scope_across_to_start: TK_ACROSS_TO { + RESET_BUFFER; + } + ; + +sl2vl_scope_from: sl2vl_scope_from_start sl2vl_scope_from_list_or_asterisk { + /* 'from' in 'sl2vl-scope' - any num of instances */ + } + ; + +sl2vl_scope_from_start: TK_FROM { + RESET_BUFFER; + } + ; + +sl2vl_scope_to: sl2vl_scope_to_start sl2vl_scope_to_list_or_asterisk { + /* 'to' in 'sl2vl-scope' - any num of instances */ + } + ; + +sl2vl_scope_to_start: TK_TO { + RESET_BUFFER; + } + ; + +sl2vl_scope_from_list_or_asterisk: sl2vl_scope_from_asterisk + | sl2vl_scope_from_list_of_ranges + ; + +sl2vl_scope_from_asterisk: TK_ASTERISK { + int i; + for (i = 0; i < OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH; i++) + p_current_sl2vl_scope->from[i] = TRUE; + } + ; + +sl2vl_scope_to_list_or_asterisk: sl2vl_scope_to_asterisk + | sl2vl_scope_to_list_of_ranges + ; + +sl2vl_scope_to_asterisk: TK_ASTERISK { + int i; + for (i = 0; i < OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH; i++) + p_current_sl2vl_scope->to[i] = TRUE; + } + ; + +sl2vl_scope_from_list_of_ranges: list_of_ranges { + int i; + cl_list_iterator_t list_iterator; + uint64_t * num_pair; + uint8_t num1, num2; + + list_iterator = cl_list_head(&tmp_parser_struct.num_pair_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.num_pair_list) ) + { + num_pair = (uint64_t*)cl_list_obj(list_iterator); + if (num_pair) + { + if ( num_pair[1] >= OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH ) + { + yyerror("port number out of range 'from' list"); + free(num_pair); + cl_list_remove_all(&tmp_parser_struct.num_pair_list); + return 1; + } + num1 = (uint8_t)num_pair[0]; + num2 = (uint8_t)num_pair[1]; + free(num_pair); + for (i = num1; i <= num2; i++) + p_current_sl2vl_scope->from[i] = TRUE; + } + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.num_pair_list); + } + ; + +sl2vl_scope_to_list_of_ranges: list_of_ranges { + int i; + cl_list_iterator_t list_iterator; + uint64_t * num_pair; + uint8_t num1, num2; + + list_iterator = cl_list_head(&tmp_parser_struct.num_pair_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.num_pair_list) ) + { + num_pair = (uint64_t*)cl_list_obj(list_iterator); + if (num_pair) + { + if ( num_pair[1] >= OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH ) + { + yyerror("port number out of range 'to' list"); + free(num_pair); + cl_list_remove_all(&tmp_parser_struct.num_pair_list); + return 1; + } + num1 = (uint8_t)num_pair[0]; + num2 = (uint8_t)num_pair[1]; + free(num_pair); + for (i = num1; i <= num2; i++) + p_current_sl2vl_scope->to[i] = TRUE; + } + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.num_pair_list); + } + ; + + +sl2vl_scope_sl2vl_table: sl2vl_scope_sl2vl_table_start num_list { + /* 'sl2vl-table' - one instance of exactly + OSM_QOS_POLICY_SL2VL_TABLE_LEN numbers */ + cl_list_iterator_t list_iterator; + uint64_t num; + uint64_t * p_num; + int i = 0; + + if (p_current_sl2vl_scope->sl2vl_table_set) + { + yyerror("sl2vl-scope has more than one sl2vl-table"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + + if (cl_list_count(&tmp_parser_struct.num_list) != OSM_QOS_POLICY_SL2VL_TABLE_LEN) + { + yyerror("wrong number of values in 'sl2vl-table' (should be 16)"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.num_list) ) + { + p_num = (uint64_t*)cl_list_obj(list_iterator); + num = *p_num; + free(p_num); + if (num >= OSM_QOS_POLICY_MAX_VL_NUM) + { + yyerror("wrong VL value in 'sl2vl-table' (should be 0 to 15)"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + + p_current_sl2vl_scope->sl2vl_table[i++] = (uint8_t)num; + list_iterator = cl_list_next(list_iterator); + } + p_current_sl2vl_scope->sl2vl_table_set = TRUE; + cl_list_remove_all(&tmp_parser_struct.num_list); + } + ; + +sl2vl_scope_sl2vl_table_start: TK_SL2VL_TABLE { + RESET_BUFFER; + } + ; + + /* + * qos_level_entry values: + * qos_level_name + * qos_level_use + * qos_level_sl + * qos_level_mtu_limit + * qos_level_rate_limit + * qos_level_packet_life + * qos_level_path_bits + * qos_level_pkey + */ + +qos_level_name: qos_level_name_start single_string { + /* 'name' of 'qos-level' - one instance */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + if (p_current_qos_level->name) + { + yyerror("qos-level has multiple 'name' tags"); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + if ( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + p_current_qos_level->name = tmp_str; + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +qos_level_name_start: TK_NAME { + RESET_BUFFER; + } + ; + +qos_level_use: qos_level_use_start single_string { + /* 'use' of 'qos-level' - one instance */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + if (p_current_qos_level->use) + { + yyerror("qos-level has multiple 'use' tags"); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + if ( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + p_current_qos_level->use = tmp_str; + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +qos_level_use_start: TK_USE { + RESET_BUFFER; + } + ; + +qos_level_sl: qos_level_sl_start single_number { + /* 'sl' in 'qos-level' - one instance */ + cl_list_iterator_t list_iterator; + uint64_t * p_num; + + if (p_current_qos_level->sl_set) + { + yyerror("'qos-level' has multiple 'sl' tags"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_num = (uint64_t*)cl_list_obj(list_iterator); + p_current_qos_level->sl = (uint8_t)(*p_num); + free(p_num); + p_current_qos_level->sl_set = TRUE; + cl_list_remove_all(&tmp_parser_struct.num_list); + } + ; + +qos_level_sl_start: TK_SL { + RESET_BUFFER; + } + ; + +qos_level_mtu_limit: qos_level_mtu_limit_start single_number { + /* 'mtu-limit' in 'qos-level' - one instance */ + cl_list_iterator_t list_iterator; + uint64_t * p_num; + + if (p_current_qos_level->mtu_limit_set) + { + yyerror("'qos-level' has multiple 'mtu-limit' tags"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_num = (uint64_t*)cl_list_obj(list_iterator); + if (*p_num > OSM_QOS_POLICY_MAX_MTU || *p_num < OSM_QOS_POLICY_MIN_MTU) + { + yyerror("mtu limit is out of range, value: %d", *p_num); + free(p_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + p_current_qos_level->mtu_limit = (uint8_t)(*p_num); + free(p_num); + p_current_qos_level->mtu_limit_set = TRUE; + cl_list_remove_all(&tmp_parser_struct.num_list); + } + ; + +qos_level_mtu_limit_start: TK_MTU_LIMIT { + /* 'mtu-limit' in 'qos-level' - one instance */ + RESET_BUFFER; + } + ; + +qos_level_rate_limit: qos_level_rate_limit_start single_number { + /* 'rate-limit' in 'qos-level' - one instance */ + cl_list_iterator_t list_iterator; + uint64_t * p_num; + + if (p_current_qos_level->rate_limit_set) + { + yyerror("'qos-level' has multiple 'rate-limit' tags"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_num = (uint64_t*)cl_list_obj(list_iterator); + if (*p_num > OSM_QOS_POLICY_MAX_RATE || *p_num < OSM_QOS_POLICY_MIN_RATE) + { + yyerror("rate limit is out of range, value: %d", *p_num); + free(p_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + p_current_qos_level->rate_limit = (uint8_t)(*p_num); + free(p_num); + p_current_qos_level->rate_limit_set = TRUE; + cl_list_remove_all(&tmp_parser_struct.num_list); + } + ; + +qos_level_rate_limit_start: TK_RATE_LIMIT { + /* 'rate-limit' in 'qos-level' - one instance */ + RESET_BUFFER; + } + ; + +qos_level_packet_life: qos_level_packet_life_start single_number { + /* 'packet-life' in 'qos-level' - one instance */ + cl_list_iterator_t list_iterator; + uint64_t * p_num; + + if (p_current_qos_level->pkt_life_set) + { + yyerror("'qos-level' has multiple 'packet-life' tags"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_num = (uint64_t*)cl_list_obj(list_iterator); + p_current_qos_level->pkt_life = (uint8_t)(*p_num); + free(p_num); + p_current_qos_level->pkt_life_set= TRUE; + cl_list_remove_all(&tmp_parser_struct.num_list); + } + ; + +qos_level_packet_life_start: TK_PACKET_LIFE { + /* 'packet-life' in 'qos-level' - one instance */ + RESET_BUFFER; + } + ; + +qos_level_path_bits: qos_level_path_bits_start list_of_ranges { + /* 'path-bits' in 'qos-level' - any num of instances */ + /* list of path bit ranges */ + + if (cl_list_count(&tmp_parser_struct.num_pair_list)) + { + uint64_t ** range_arr; + unsigned range_len; + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + if ( !p_current_qos_level->path_bits_range_len ) + { + p_current_qos_level->path_bits_range_arr = range_arr; + p_current_qos_level->path_bits_range_len = range_len; + } + else + { + uint64_t ** new_range_arr; + unsigned new_range_len; + __merge_rangearr( p_current_qos_level->path_bits_range_arr, + p_current_qos_level->path_bits_range_len, + range_arr, + range_len, + &new_range_arr, + &new_range_len ); + p_current_qos_level->path_bits_range_arr = new_range_arr; + p_current_qos_level->path_bits_range_len = new_range_len; + } + } + } + ; + +qos_level_path_bits_start: TK_PATH_BITS { + RESET_BUFFER; + } + ; + +qos_level_pkey: qos_level_pkey_start list_of_ranges { + /* 'pkey' in 'qos-level' - num of instances of list of ranges */ + if (cl_list_count(&tmp_parser_struct.num_pair_list)) + { + uint64_t ** range_arr; + unsigned range_len; + + __pkey_rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + if ( !p_current_qos_level->pkey_range_len ) + { + p_current_qos_level->pkey_range_arr = range_arr; + p_current_qos_level->pkey_range_len = range_len; + } + else + { + uint64_t ** new_range_arr; + unsigned new_range_len; + __merge_rangearr( p_current_qos_level->pkey_range_arr, + p_current_qos_level->pkey_range_len, + range_arr, + range_len, + &new_range_arr, + &new_range_len ); + p_current_qos_level->pkey_range_arr = new_range_arr; + p_current_qos_level->pkey_range_len = new_range_len; + } + } + } + ; + +qos_level_pkey_start: TK_PKEY { + RESET_BUFFER; + } + ; + + /* + * qos_match_rule_entry values: + * qos_match_rule_use + * qos_match_rule_qos_class + * qos_match_rule_qos_level_name + * qos_match_rule_source + * qos_match_rule_destination + * qos_match_rule_service_id + * qos_match_rule_pkey + */ + + +qos_match_rule_use: qos_match_rule_use_start single_string { + /* 'use' of 'qos-match-rule' - one instance */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + if (p_current_qos_match_rule->use) + { + yyerror("'qos-match-rule' has multiple 'use' tags"); + cl_list_remove_all(&tmp_parser_struct.str_list); + return 1; + } + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + if ( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + p_current_qos_match_rule->use = tmp_str; + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +qos_match_rule_use_start: TK_USE { + RESET_BUFFER; + } + ; + +qos_match_rule_qos_class: qos_match_rule_qos_class_start list_of_ranges { + /* 'qos-class' in 'qos-match-rule' - num of instances of list of ranges */ + /* list of class ranges (QoS Class is 12-bit value) */ + if (cl_list_count(&tmp_parser_struct.num_pair_list)) + { + uint64_t ** range_arr; + unsigned range_len; + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + if ( !p_current_qos_match_rule->qos_class_range_len ) + { + p_current_qos_match_rule->qos_class_range_arr = range_arr; + p_current_qos_match_rule->qos_class_range_len = range_len; + } + else + { + uint64_t ** new_range_arr; + unsigned new_range_len; + __merge_rangearr( p_current_qos_match_rule->qos_class_range_arr, + p_current_qos_match_rule->qos_class_range_len, + range_arr, + range_len, + &new_range_arr, + &new_range_len ); + p_current_qos_match_rule->qos_class_range_arr = new_range_arr; + p_current_qos_match_rule->qos_class_range_len = new_range_len; + } + } + } + ; + +qos_match_rule_qos_class_start: TK_QOS_CLASS { + RESET_BUFFER; + } + ; + +qos_match_rule_source: qos_match_rule_source_start string_list { + /* 'source' in 'qos-match-rule' - text */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + cl_list_insert_tail(&p_current_qos_match_rule->source_list,tmp_str); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +qos_match_rule_source_start: TK_SOURCE { + RESET_BUFFER; + } + ; + +qos_match_rule_destination: qos_match_rule_destination_start string_list { + /* 'destination' in 'qos-match-rule' - text */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + while( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + cl_list_insert_tail(&p_current_qos_match_rule->destination_list,tmp_str); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +qos_match_rule_destination_start: TK_DESTINATION { + RESET_BUFFER; + } + ; + +qos_match_rule_qos_level_name: qos_match_rule_qos_level_name_start single_string { + /* 'qos-level-name' in 'qos-match-rule' - single string */ + cl_list_iterator_t list_iterator; + char * tmp_str; + + if (p_current_qos_match_rule->qos_level_name) + { + yyerror("qos-match-rule has multiple 'qos-level-name' tags"); + cl_list_remove_all(&tmp_parser_struct.num_list); + return 1; + } + + list_iterator = cl_list_head(&tmp_parser_struct.str_list); + if ( list_iterator != cl_list_end(&tmp_parser_struct.str_list) ) + { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) + p_current_qos_match_rule->qos_level_name = tmp_str; + } + cl_list_remove_all(&tmp_parser_struct.str_list); + } + ; + +qos_match_rule_qos_level_name_start: TK_QOS_LEVEL_NAME { + RESET_BUFFER; + } + ; + +qos_match_rule_service_id: qos_match_rule_service_id_start list_of_ranges { + /* 'service-id' in 'qos-match-rule' - num of instances of list of ranges */ + if (cl_list_count(&tmp_parser_struct.num_pair_list)) + { + uint64_t ** range_arr; + unsigned range_len; + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + if ( !p_current_qos_match_rule->service_id_range_len ) + { + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + } + else + { + uint64_t ** new_range_arr; + unsigned new_range_len; + __merge_rangearr( p_current_qos_match_rule->service_id_range_arr, + p_current_qos_match_rule->service_id_range_len, + range_arr, + range_len, + &new_range_arr, + &new_range_len ); + p_current_qos_match_rule->service_id_range_arr = new_range_arr; + p_current_qos_match_rule->service_id_range_len = new_range_len; + } + } + } + ; + +qos_match_rule_service_id_start: TK_SERVICE_ID { + RESET_BUFFER; + } + ; + +qos_match_rule_pkey: qos_match_rule_pkey_start list_of_ranges { + /* 'pkey' in 'qos-match-rule' - num of instances of list of ranges */ + if (cl_list_count(&tmp_parser_struct.num_pair_list)) + { + uint64_t ** range_arr; + unsigned range_len; + + __pkey_rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + if ( !p_current_qos_match_rule->pkey_range_len ) + { + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = range_len; + } + else + { + uint64_t ** new_range_arr; + unsigned new_range_len; + __merge_rangearr( p_current_qos_match_rule->pkey_range_arr, + p_current_qos_match_rule->pkey_range_len, + range_arr, + range_len, + &new_range_arr, + &new_range_len ); + p_current_qos_match_rule->pkey_range_arr = new_range_arr; + p_current_qos_match_rule->pkey_range_len = new_range_len; + } + } + } + ; + +qos_match_rule_pkey_start: TK_PKEY { + RESET_BUFFER; + } + ; + + + /* + * Common part + */ + + +single_string: single_string_elems { + cl_list_insert_tail(&tmp_parser_struct.str_list, + strdup(__parser_strip_white(tmp_parser_struct.str))); + tmp_parser_struct.str[0] = '\0'; + } + ; + +single_string_elems: single_string_element + | single_string_elems single_string_element + ; + +single_string_element: TK_TEXT { + strcat(tmp_parser_struct.str,$1); + free($1); + } + ; + + +string_list: single_string + | string_list TK_COMMA single_string + ; + + + +single_number: number + ; + +num_list: number + | num_list TK_COMMA number + ; + +number: TK_NUMBER { + uint64_t * p_num = (uint64_t*)malloc(sizeof(uint64_t)); + __parser_str2uint64(p_num,$1); + free($1); + cl_list_insert_tail(&tmp_parser_struct.num_list, p_num); + } + ; + +num_list_with_dotdot: number_from_pair_1 TK_DOTDOT number_from_pair_2 { + uint64_t * num_pair = (uint64_t*)malloc(sizeof(uint64_t)*2); + num_pair[0] = tmp_parser_struct.num_pair[0]; + num_pair[1] = tmp_parser_struct.num_pair[1]; + cl_list_insert_tail(&tmp_parser_struct.num_pair_list, num_pair); + } + | num_list_with_dotdot TK_COMMA number_from_pair_1 TK_DOTDOT number_from_pair_2 { + uint64_t * num_pair = (uint64_t*)malloc(sizeof(uint64_t)*2); + num_pair[0] = tmp_parser_struct.num_pair[0]; + num_pair[1] = tmp_parser_struct.num_pair[1]; + cl_list_insert_tail(&tmp_parser_struct.num_pair_list, num_pair); + } + ; + +number_from_pair_1: TK_NUMBER { + __parser_str2uint64(&tmp_parser_struct.num_pair[0],$1); + free($1); + } + ; + +number_from_pair_2: TK_NUMBER { + __parser_str2uint64(&tmp_parser_struct.num_pair[1],$1); + free($1); + } + ; + +list_of_ranges: num_list_with_dash + ; + +num_list_with_dash: single_number_from_range { + uint64_t * num_pair = (uint64_t*)malloc(sizeof(uint64_t)*2); + num_pair[0] = tmp_parser_struct.num_pair[0]; + num_pair[1] = tmp_parser_struct.num_pair[1]; + cl_list_insert_tail(&tmp_parser_struct.num_pair_list, num_pair); + } + | number_from_range_1 TK_DASH number_from_range_2 { + uint64_t * num_pair = (uint64_t*)malloc(sizeof(uint64_t)*2); + if (tmp_parser_struct.num_pair[0] <= tmp_parser_struct.num_pair[1]) { + num_pair[0] = tmp_parser_struct.num_pair[0]; + num_pair[1] = tmp_parser_struct.num_pair[1]; + } + else { + num_pair[1] = tmp_parser_struct.num_pair[0]; + num_pair[0] = tmp_parser_struct.num_pair[1]; + } + cl_list_insert_tail(&tmp_parser_struct.num_pair_list, num_pair); + } + | num_list_with_dash TK_COMMA number_from_range_1 TK_DASH number_from_range_2 { + uint64_t * num_pair = (uint64_t*)malloc(sizeof(uint64_t)*2); + if (tmp_parser_struct.num_pair[0] <= tmp_parser_struct.num_pair[1]) { + num_pair[0] = tmp_parser_struct.num_pair[0]; + num_pair[1] = tmp_parser_struct.num_pair[1]; + } + else { + num_pair[1] = tmp_parser_struct.num_pair[0]; + num_pair[0] = tmp_parser_struct.num_pair[1]; + } + cl_list_insert_tail(&tmp_parser_struct.num_pair_list, num_pair); + } + | num_list_with_dash TK_COMMA single_number_from_range { + uint64_t * num_pair = (uint64_t*)malloc(sizeof(uint64_t)*2); + num_pair[0] = tmp_parser_struct.num_pair[0]; + num_pair[1] = tmp_parser_struct.num_pair[1]; + cl_list_insert_tail(&tmp_parser_struct.num_pair_list, num_pair); + } + ; + +single_number_from_range: TK_NUMBER { + __parser_str2uint64(&tmp_parser_struct.num_pair[0],$1); + __parser_str2uint64(&tmp_parser_struct.num_pair[1],$1); + free($1); + } + ; + +number_from_range_1: TK_NUMBER { + __parser_str2uint64(&tmp_parser_struct.num_pair[0],$1); + free($1); + } + ; + +number_from_range_2: TK_NUMBER { + __parser_str2uint64(&tmp_parser_struct.num_pair[1],$1); + free($1); + } + ; + +%% + +/*************************************************** + ***************************************************/ + +int osm_qos_parse_policy_file(IN osm_subn_t * p_subn) +{ + int res = 0; + static boolean_t first_time = TRUE; + p_qos_parser_osm_log = &p_subn->p_osm->log; + + OSM_LOG_ENTER(p_qos_parser_osm_log); + + osm_qos_policy_destroy(p_subn->p_qos_policy); + p_subn->p_qos_policy = NULL; + + if (!p_subn->opt.qos_policy_file) { + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC06: " + "QoS policy file name is empty\n"); + res = 1; + goto Exit; + } + + yyin = fopen (p_subn->opt.qos_policy_file, "r"); + if (!yyin) + { + if (strcmp(p_subn->opt.qos_policy_file,OSM_DEFAULT_QOS_POLICY_FILE)) { + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC01: " + "Failed opening QoS policy file %s - %s\n", + p_subn->opt.qos_policy_file, strerror(errno)); + res = 1; + } + else + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_VERBOSE, + "QoS policy file not found (%s)\n", + p_subn->opt.qos_policy_file); + + goto Exit; + } + + if (first_time) + { + first_time = FALSE; + __setup_simple_qos_levels(); + __setup_ulp_match_rules(); + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_INFO, + "Loading QoS policy file (%s)\n", + p_subn->opt.qos_policy_file); + } + else + /* + * ULP match rules list was emptied at the end of + * previous parsing iteration. + * What's left is to clear simple QoS levels. + */ + __clear_simple_qos_levels(); + + column_num = 1; + line_num = 1; + + p_subn->p_qos_policy = osm_qos_policy_create(p_subn); + + __parser_tmp_struct_init(); + p_qos_policy = p_subn->p_qos_policy; + + res = yyparse(); + + __parser_tmp_struct_destroy(); + + if (res != 0) + { + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC03: " + "Failed parsing QoS policy file (%s)\n", + p_subn->opt.qos_policy_file); + osm_qos_policy_destroy(p_subn->p_qos_policy); + p_subn->p_qos_policy = NULL; + res = 1; + goto Exit; + } + + /* add generated ULP match rules to the usual match rules */ + __process_ulp_match_rules(); + + if (osm_qos_policy_validate(p_subn->p_qos_policy,p_qos_parser_osm_log)) + { + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC04: " + "Error(s) in QoS policy file (%s)\n", + p_subn->opt.qos_policy_file); + fprintf(stderr, "Error(s) in QoS policy file (%s)\n", + p_subn->opt.qos_policy_file); + osm_qos_policy_destroy(p_subn->p_qos_policy); + p_subn->p_qos_policy = NULL; + res = 1; + goto Exit; + } + + Exit: + if (yyin) + { + yyrestart(yyin); + fclose(yyin); + } + OSM_LOG_EXIT(p_qos_parser_osm_log); + return res; +} + +/*************************************************** + ***************************************************/ + +int yywrap() +{ + return(1); +} + +/*************************************************** + ***************************************************/ + +static void yyerror(const char *format, ...) +{ + char s[256]; + va_list pvar; + + OSM_LOG_ENTER(p_qos_parser_osm_log); + + va_start(pvar, format); + vsnprintf(s, sizeof(s), format, pvar); + va_end(pvar); + + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC05: " + "Syntax error (line %d:%d): %s\n", + line_num, column_num, s); + fprintf(stderr, "Error in QoS Policy File (line %d:%d): %s.\n", + line_num, column_num, s); + OSM_LOG_EXIT(p_qos_parser_osm_log); +} + +/*************************************************** + ***************************************************/ + +static char * __parser_strip_white(char * str) +{ + char *p; + + while (isspace(*str)) + str++; + if (!*str) + return str; + p = str + strlen(str) - 1; + while (isspace(*p)) + *p-- = '\0'; + + return str; +} + +/*************************************************** + ***************************************************/ + +static void __parser_str2uint64(uint64_t * p_val, char * str) +{ + *p_val = strtoull(str, NULL, 0); +} + +/*************************************************** + ***************************************************/ + +static void __parser_port_group_start() +{ + p_current_port_group = osm_qos_policy_port_group_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_port_group_end() +{ + if(!p_current_port_group->name) + { + yyerror("port-group validation failed - no port group name specified"); + return -1; + } + + cl_list_insert_tail(&p_qos_policy->port_groups, + p_current_port_group); + p_current_port_group = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + +static void __parser_vlarb_scope_start() +{ + p_current_vlarb_scope = osm_qos_policy_vlarb_scope_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_vlarb_scope_end() +{ + if ( !cl_list_count(&p_current_vlarb_scope->group_list) && + !cl_list_count(&p_current_vlarb_scope->across_list) ) + { + yyerror("vlarb-scope validation failed - no port groups specified by 'group' or by 'across'"); + return -1; + } + + cl_list_insert_tail(&p_qos_policy->vlarb_tables, + p_current_vlarb_scope); + p_current_vlarb_scope = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + +static void __parser_sl2vl_scope_start() +{ + p_current_sl2vl_scope = osm_qos_policy_sl2vl_scope_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_sl2vl_scope_end() +{ + if (!p_current_sl2vl_scope->sl2vl_table_set) + { + yyerror("sl2vl-scope validation failed - no sl2vl table specified"); + return -1; + } + if ( !cl_list_count(&p_current_sl2vl_scope->group_list) && + !cl_list_count(&p_current_sl2vl_scope->across_to_list) && + !cl_list_count(&p_current_sl2vl_scope->across_from_list) ) + { + yyerror("sl2vl-scope validation failed - no port groups specified by 'group', 'across-to' or 'across-from'"); + return -1; + } + + cl_list_insert_tail(&p_qos_policy->sl2vl_tables, + p_current_sl2vl_scope); + p_current_sl2vl_scope = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + +static void __parser_qos_level_start() +{ + p_current_qos_level = osm_qos_policy_qos_level_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_qos_level_end() +{ + if (!p_current_qos_level->sl_set) + { + yyerror("qos-level validation failed - no 'sl' specified"); + return -1; + } + if (!p_current_qos_level->name) + { + yyerror("qos-level validation failed - no 'name' specified"); + return -1; + } + + cl_list_insert_tail(&p_qos_policy->qos_levels, + p_current_qos_level); + p_current_qos_level = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + +static void __parser_match_rule_start() +{ + p_current_qos_match_rule = osm_qos_policy_match_rule_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_match_rule_end() +{ + if (!p_current_qos_match_rule->qos_level_name) + { + yyerror("match-rule validation failed - no 'qos-level-name' specified"); + return -1; + } + + cl_list_insert_tail(&p_qos_policy->qos_match_rules, + p_current_qos_match_rule); + p_current_qos_match_rule = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + +static void __parser_ulp_match_rule_start() +{ + p_current_qos_match_rule = osm_qos_policy_match_rule_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_ulp_match_rule_end() +{ + CL_ASSERT(p_current_qos_match_rule->p_qos_level); + cl_list_insert_tail(&__ulp_match_rules, + p_current_qos_match_rule); + p_current_qos_match_rule = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + +static void __parser_tmp_struct_init() +{ + tmp_parser_struct.str[0] = '\0'; + cl_list_construct(&tmp_parser_struct.str_list); + cl_list_init(&tmp_parser_struct.str_list, 10); + cl_list_construct(&tmp_parser_struct.num_list); + cl_list_init(&tmp_parser_struct.num_list, 10); + cl_list_construct(&tmp_parser_struct.num_pair_list); + cl_list_init(&tmp_parser_struct.num_pair_list, 10); +} + +/*************************************************** + ***************************************************/ + +/* + * Do NOT free objects from the temp struct. + * Either they are inserted into the parse tree data + * structure, or they are already freed when copying + * their values to the parse tree data structure. + */ +static void __parser_tmp_struct_reset() +{ + tmp_parser_struct.str[0] = '\0'; + cl_list_remove_all(&tmp_parser_struct.str_list); + cl_list_remove_all(&tmp_parser_struct.num_list); + cl_list_remove_all(&tmp_parser_struct.num_pair_list); +} + +/*************************************************** + ***************************************************/ + +static void __parser_tmp_struct_destroy() +{ + __parser_tmp_struct_reset(); + cl_list_destroy(&tmp_parser_struct.str_list); + cl_list_destroy(&tmp_parser_struct.num_list); + cl_list_destroy(&tmp_parser_struct.num_pair_list); +} + +/*************************************************** + ***************************************************/ + +#define __SIMPLE_QOS_LEVEL_NAME "SimpleQoSLevel_SL" +#define __SIMPLE_QOS_LEVEL_DEFAULT_NAME "SimpleQoSLevel_DEFAULT" + +static void __setup_simple_qos_levels() +{ + uint8_t i; + char tmp_buf[30]; + memset(osm_qos_policy_simple_qos_levels, 0, + sizeof(osm_qos_policy_simple_qos_levels)); + for (i = 0; i < 16; i++) + { + osm_qos_policy_simple_qos_levels[i].sl = i; + osm_qos_policy_simple_qos_levels[i].sl_set = TRUE; + sprintf(tmp_buf, "%s%u", __SIMPLE_QOS_LEVEL_NAME, i); + osm_qos_policy_simple_qos_levels[i].name = strdup(tmp_buf); + } + + memset(&__default_simple_qos_level, 0, + sizeof(__default_simple_qos_level)); + __default_simple_qos_level.name = + strdup(__SIMPLE_QOS_LEVEL_DEFAULT_NAME); +} + +/*************************************************** + ***************************************************/ + +static void __clear_simple_qos_levels() +{ + /* + * Simple QoS levels are static. + * What's left is to invalidate default simple QoS level. + */ + __default_simple_qos_level.sl_set = FALSE; +} + +/*************************************************** + ***************************************************/ + +static void __setup_ulp_match_rules() +{ + cl_list_construct(&__ulp_match_rules); + cl_list_init(&__ulp_match_rules, 10); +} + +/*************************************************** + ***************************************************/ + +static void __process_ulp_match_rules() +{ + cl_list_iterator_t list_iterator; + osm_qos_match_rule_t *p_qos_match_rule = NULL; + + list_iterator = cl_list_head(&__ulp_match_rules); + while (list_iterator != cl_list_end(&__ulp_match_rules)) + { + p_qos_match_rule = (osm_qos_match_rule_t *) cl_list_obj(list_iterator); + if (p_qos_match_rule) + cl_list_insert_tail(&p_qos_policy->qos_match_rules, + p_qos_match_rule); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&__ulp_match_rules); +} + +/*************************************************** + ***************************************************/ + +static int __cmp_num_range(const void * p1, const void * p2) +{ + uint64_t * pair1 = *((uint64_t **)p1); + uint64_t * pair2 = *((uint64_t **)p2); + + if (pair1[0] < pair2[0]) + return -1; + if (pair1[0] > pair2[0]) + return 1; + + if (pair1[1] < pair2[1]) + return -1; + if (pair1[1] > pair2[1]) + return 1; + + return 0; +} + +/*************************************************** + ***************************************************/ + +static void __sort_reduce_rangearr( + uint64_t ** arr, + unsigned arr_len, + uint64_t ** * p_res_arr, + unsigned * p_res_arr_len ) +{ + unsigned i = 0; + unsigned j = 0; + unsigned last_valid_ind = 0; + unsigned valid_cnt = 0; + uint64_t ** res_arr; + boolean_t * is_valid_arr; + + *p_res_arr = NULL; + *p_res_arr_len = 0; + + qsort(arr, arr_len, sizeof(uint64_t*), __cmp_num_range); + + is_valid_arr = (boolean_t *)malloc(arr_len * sizeof(boolean_t)); + is_valid_arr[last_valid_ind] = TRUE; + valid_cnt++; + for (i = 1; i < arr_len; i++) + { + if (arr[i][0] <= arr[last_valid_ind][1]) + { + if (arr[i][1] > arr[last_valid_ind][1]) + arr[last_valid_ind][1] = arr[i][1]; + free(arr[i]); + arr[i] = NULL; + is_valid_arr[i] = FALSE; + } + else if ((arr[i][0] - 1) == arr[last_valid_ind][1]) + { + arr[last_valid_ind][1] = arr[i][1]; + free(arr[i]); + arr[i] = NULL; + is_valid_arr[i] = FALSE; + } + else + { + is_valid_arr[i] = TRUE; + last_valid_ind = i; + valid_cnt++; + } + } + + res_arr = (uint64_t **)malloc(valid_cnt * sizeof(uint64_t *)); + for (i = 0; i < arr_len; i++) + { + if (is_valid_arr[i]) + res_arr[j++] = arr[i]; + } + free(is_valid_arr); + free(arr); + + *p_res_arr = res_arr; + *p_res_arr_len = valid_cnt; +} + +/*************************************************** + ***************************************************/ + +static void __pkey_rangelist2rangearr( + cl_list_t * p_list, + uint64_t ** * p_arr, + unsigned * p_arr_len) +{ + uint64_t tmp_pkey; + uint64_t * p_pkeys; + cl_list_iterator_t list_iterator; + + list_iterator= cl_list_head(p_list); + while( list_iterator != cl_list_end(p_list) ) + { + p_pkeys = (uint64_t *)cl_list_obj(list_iterator); + p_pkeys[0] &= 0x7fff; + p_pkeys[1] &= 0x7fff; + if (p_pkeys[0] > p_pkeys[1]) + { + tmp_pkey = p_pkeys[1]; + p_pkeys[1] = p_pkeys[0]; + p_pkeys[0] = tmp_pkey; + } + list_iterator = cl_list_next(list_iterator); + } + + __rangelist2rangearr(p_list, p_arr, p_arr_len); +} + +/*************************************************** + ***************************************************/ + +static void __rangelist2rangearr( + cl_list_t * p_list, + uint64_t ** * p_arr, + unsigned * p_arr_len) +{ + cl_list_iterator_t list_iterator; + unsigned len = cl_list_count(p_list); + unsigned i = 0; + uint64_t ** tmp_arr; + uint64_t ** res_arr = NULL; + unsigned res_arr_len = 0; + + tmp_arr = (uint64_t **)malloc(len * sizeof(uint64_t *)); + + list_iterator = cl_list_head(p_list); + while( list_iterator != cl_list_end(p_list) ) + { + tmp_arr[i++] = (uint64_t *)cl_list_obj(list_iterator); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(p_list); + + __sort_reduce_rangearr( tmp_arr, + len, + &res_arr, + &res_arr_len ); + *p_arr = res_arr; + *p_arr_len = res_arr_len; +} + +/*************************************************** + ***************************************************/ + +static void __merge_rangearr( + uint64_t ** range_arr_1, + unsigned range_len_1, + uint64_t ** range_arr_2, + unsigned range_len_2, + uint64_t ** * p_arr, + unsigned * p_arr_len ) +{ + unsigned i = 0; + unsigned j = 0; + unsigned len = range_len_1 + range_len_2; + uint64_t ** tmp_arr; + uint64_t ** res_arr = NULL; + unsigned res_arr_len = 0; + + *p_arr = NULL; + *p_arr_len = 0; + + tmp_arr = (uint64_t **)malloc(len * sizeof(uint64_t *)); + + for (i = 0; i < range_len_1; i++) + tmp_arr[j++] = range_arr_1[i]; + for (i = 0; i < range_len_2; i++) + tmp_arr[j++] = range_arr_2[i]; + free(range_arr_1); + free(range_arr_2); + + __sort_reduce_rangearr( tmp_arr, + len, + &res_arr, + &res_arr_len ); + *p_arr = res_arr; + *p_arr_len = res_arr_len; +} + +/*************************************************** + ***************************************************/ + +static void __parser_add_port_to_port_map( + cl_qmap_t * p_map, + osm_physp_t * p_physp) +{ + if (cl_qmap_get(p_map, cl_ntoh64(osm_physp_get_port_guid(p_physp))) == + cl_qmap_end(p_map)) + { + osm_qos_port_t * p_port = osm_qos_policy_port_create(p_physp); + if (p_port) + cl_qmap_insert(p_map, + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + &p_port->map_item); + } +} + +/*************************************************** + ***************************************************/ + +static void __parser_add_guid_range_to_port_map( + cl_qmap_t * p_map, + uint64_t ** range_arr, + unsigned range_len) +{ + unsigned i; + uint64_t guid_ho; + osm_port_t * p_osm_port; + + if (!range_arr || !range_len) + return; + + for (i = 0; i < range_len; i++) { + for (guid_ho = range_arr[i][0]; guid_ho <= range_arr[i][1]; guid_ho++) { + p_osm_port = + osm_get_port_by_guid(p_qos_policy->p_subn, cl_hton64(guid_ho)); + if (p_osm_port) + __parser_add_port_to_port_map(p_map, p_osm_port->p_physp); + } + free(range_arr[i]); + } + free(range_arr); +} + +/*************************************************** + ***************************************************/ + +static void __parser_add_pkey_range_to_port_map( + cl_qmap_t * p_map, + uint64_t ** range_arr, + unsigned range_len) +{ + unsigned i; + uint64_t pkey_64; + ib_net16_t pkey; + osm_prtn_t * p_prtn; + + if (!range_arr || !range_len) + return; + + for (i = 0; i < range_len; i++) { + for (pkey_64 = range_arr[i][0]; pkey_64 <= range_arr[i][1]; pkey_64++) { + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); + p_prtn = (osm_prtn_t *) + cl_qmap_get(&p_qos_policy->p_subn->prtn_pkey_tbl, pkey); + if (p_prtn != (osm_prtn_t *)cl_qmap_end( + &p_qos_policy->p_subn->prtn_pkey_tbl)) { + __parser_add_map_to_port_map(p_map, &p_prtn->part_guid_tbl); + __parser_add_map_to_port_map(p_map, &p_prtn->full_guid_tbl); + } + } + free(range_arr[i]); + } + free(range_arr); +} + +/*************************************************** + ***************************************************/ + +static void __parser_add_partition_list_to_port_map( + cl_qmap_t * p_map, + cl_list_t * p_list) +{ + cl_list_iterator_t list_iterator; + char * tmp_str; + osm_prtn_t * p_prtn; + + /* extract all the ports from the partition + to the port map of this port group */ + list_iterator = cl_list_head(p_list); + while(list_iterator != cl_list_end(p_list)) { + tmp_str = (char*)cl_list_obj(list_iterator); + if (tmp_str) { + p_prtn = osm_prtn_find_by_name(p_qos_policy->p_subn, tmp_str); + if (p_prtn) { + __parser_add_map_to_port_map(p_map, &p_prtn->part_guid_tbl); + __parser_add_map_to_port_map(p_map, &p_prtn->full_guid_tbl); + } + free(tmp_str); + } + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(p_list); +} + +/*************************************************** + ***************************************************/ + +static void __parser_add_map_to_port_map( + cl_qmap_t * p_dmap, + cl_map_t * p_smap) +{ + cl_map_iterator_t map_iterator; + osm_physp_t * p_physp; + + if (!p_dmap || !p_smap) + return; + + map_iterator = cl_map_head(p_smap); + while (map_iterator != cl_map_end(p_smap)) { + p_physp = (osm_physp_t*)cl_map_obj(map_iterator); + __parser_add_port_to_port_map(p_dmap, p_physp); + map_iterator = cl_map_next(map_iterator); + } +} + +/*************************************************** + ***************************************************/ + +static int __validate_pkeys( uint64_t ** range_arr, + unsigned range_len, + boolean_t is_ipoib) +{ + unsigned i; + uint64_t pkey_64; + ib_net16_t pkey; + osm_prtn_t * p_prtn; + + if (!range_arr || !range_len) + return 0; + + for (i = 0; i < range_len; i++) { + for (pkey_64 = range_arr[i][0]; pkey_64 <= range_arr[i][1]; pkey_64++) { + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); + p_prtn = (osm_prtn_t *) + cl_qmap_get(&p_qos_policy->p_subn->prtn_pkey_tbl, pkey); + + if (p_prtn == (osm_prtn_t *)cl_qmap_end( + &p_qos_policy->p_subn->prtn_pkey_tbl)) + p_prtn = NULL; + + if (is_ipoib) { + /* + * Be very strict for IPoIB partition: + * - the partition for the pkey have to exist + * - it has to have at least 2 full members + */ + if (!p_prtn) { + yyerror("IPoIB partition, pkey 0x%04X - " + "partition doesn't exist", + cl_ntoh16(pkey)); + return 1; + } + else if (cl_map_count(&p_prtn->full_guid_tbl) < 2) { + yyerror("IPoIB partition, pkey 0x%04X - " + "partition has less than two full members", + cl_ntoh16(pkey)); + return 1; + } + } + else if (!p_prtn) { + /* + * For non-IPoIB pkey we just want to check that + * the relevant partition exists. + * And even if it doesn't, don't exit - just print + * error message and continue. + */ + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC02: " + "pkey 0x%04X - partition doesn't exist", + cl_ntoh16(pkey)); + } + } + } + return 0; +} + +/*************************************************** + ***************************************************/ diff --git a/opensm/osm_qos_policy.c b/opensm/osm_qos_policy.c new file mode 100644 index 0000000..54d351a --- /dev/null +++ b/opensm/osm_qos_policy.c @@ -0,0 +1,1065 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * OSM QoS Policy functions. + * + * Author: + * Yevgeny Kliteynik, Mellanox + */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_QOS_POLICY_C +#include +#include +#include +#include +#include +#include + +extern osm_qos_level_t __default_simple_qos_level; + +/*************************************************** + ***************************************************/ + +static void +__build_nodebyname_hash(osm_qos_policy_t * p_qos_policy) +{ + osm_node_t * p_node; + cl_qmap_t * p_node_guid_tbl = &p_qos_policy->p_subn->node_guid_tbl; + + p_qos_policy->p_node_hash = st_init_strtable(); + CL_ASSERT(p_qos_policy->p_node_hash); + + if (!p_node_guid_tbl || !cl_qmap_count(p_node_guid_tbl)) + return; + + for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); + p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl); + p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item)) { + if (!st_lookup(p_qos_policy->p_node_hash, + (st_data_t)p_node->print_desc, NULL)) + st_insert(p_qos_policy->p_node_hash, + (st_data_t)p_node->print_desc, + (st_data_t)p_node); + } +} + +/*************************************************** + ***************************************************/ + +static boolean_t +__is_num_in_range_arr(uint64_t ** range_arr, + unsigned range_arr_len, uint64_t num) +{ + unsigned ind_1 = 0; + unsigned ind_2 = range_arr_len - 1; + unsigned ind_mid; + + if (!range_arr || !range_arr_len) + return FALSE; + + while (ind_1 <= ind_2) { + if (num < range_arr[ind_1][0] || num > range_arr[ind_2][1]) + return FALSE; + else if (num <= range_arr[ind_1][1] || num >= range_arr[ind_2][0]) + return TRUE; + + ind_mid = ind_1 + (ind_2 - ind_1 + 1)/2; + + if (num < range_arr[ind_mid][0]) + ind_2 = ind_mid; + else if (num > range_arr[ind_mid][1]) + ind_1 = ind_mid; + else + return TRUE; + + ind_1++; + ind_2--; + } + + return FALSE; +} + +/*************************************************** + ***************************************************/ + +static void __free_single_element(void *p_element, void *context) +{ + if (p_element) + free(p_element); +} + +/*************************************************** + ***************************************************/ + +osm_qos_port_t *osm_qos_policy_port_create(osm_physp_t *p_physp) +{ + osm_qos_port_t *p = + (osm_qos_port_t *) calloc(1, sizeof(osm_qos_port_t)); + if (p) + p->p_physp = p_physp; + return p; +} + +/*************************************************** + ***************************************************/ + +osm_qos_port_group_t *osm_qos_policy_port_group_create() +{ + osm_qos_port_group_t *p = + (osm_qos_port_group_t *) calloc(1, sizeof(osm_qos_port_group_t)); + if (p) + cl_qmap_init(&p->port_map); + return p; +} + +/*************************************************** + ***************************************************/ + +void osm_qos_policy_port_group_destroy(osm_qos_port_group_t * p) +{ + osm_qos_port_t * p_port; + osm_qos_port_t * p_old_port; + + if (!p) + return; + + if (p->name) + free(p->name); + if (p->use) + free(p->use); + + p_port = (osm_qos_port_t *) cl_qmap_head(&p->port_map); + while (p_port != (osm_qos_port_t *) cl_qmap_end(&p->port_map)) + { + p_old_port = p_port; + p_port = (osm_qos_port_t *) cl_qmap_next(&p_port->map_item); + free(p_old_port); + } + cl_qmap_remove_all(&p->port_map); + + free(p); +} + +/*************************************************** + ***************************************************/ + +osm_qos_vlarb_scope_t *osm_qos_policy_vlarb_scope_create() +{ + osm_qos_vlarb_scope_t *p = + (osm_qos_vlarb_scope_t *) calloc(1, sizeof(osm_qos_vlarb_scope_t)); + if (p) { + cl_list_init(&p->group_list, 10); + cl_list_init(&p->across_list, 10); + cl_list_init(&p->vlarb_high_list, 10); + cl_list_init(&p->vlarb_low_list, 10); + } + return p; +} + +/*************************************************** + ***************************************************/ + +void osm_qos_policy_vlarb_scope_destroy(osm_qos_vlarb_scope_t * p) +{ + if (!p) + return; + + cl_list_apply_func(&p->group_list, __free_single_element, NULL); + cl_list_apply_func(&p->across_list, __free_single_element, NULL); + cl_list_apply_func(&p->vlarb_high_list, __free_single_element, NULL); + cl_list_apply_func(&p->vlarb_low_list, __free_single_element, NULL); + + cl_list_remove_all(&p->group_list); + cl_list_remove_all(&p->across_list); + cl_list_remove_all(&p->vlarb_high_list); + cl_list_remove_all(&p->vlarb_low_list); + + cl_list_destroy(&p->group_list); + cl_list_destroy(&p->across_list); + cl_list_destroy(&p->vlarb_high_list); + cl_list_destroy(&p->vlarb_low_list); + + free(p); +} + +/*************************************************** + ***************************************************/ + +osm_qos_sl2vl_scope_t *osm_qos_policy_sl2vl_scope_create() +{ + osm_qos_sl2vl_scope_t *p = + (osm_qos_sl2vl_scope_t *) calloc(1, sizeof(osm_qos_sl2vl_scope_t)); + if (p) { + cl_list_init(&p->group_list, 10); + cl_list_init(&p->across_from_list, 10); + cl_list_init(&p->across_to_list, 10); + } + return p; +} + +/*************************************************** + ***************************************************/ + +void osm_qos_policy_sl2vl_scope_destroy(osm_qos_sl2vl_scope_t * p) +{ + if (!p) + return; + + cl_list_apply_func(&p->group_list, __free_single_element, NULL); + cl_list_apply_func(&p->across_from_list, __free_single_element, NULL); + cl_list_apply_func(&p->across_to_list, __free_single_element, NULL); + + cl_list_remove_all(&p->group_list); + cl_list_remove_all(&p->across_from_list); + cl_list_remove_all(&p->across_to_list); + + cl_list_destroy(&p->group_list); + cl_list_destroy(&p->across_from_list); + cl_list_destroy(&p->across_to_list); + + free(p); +} + +/*************************************************** + ***************************************************/ + +osm_qos_level_t *osm_qos_policy_qos_level_create() +{ + osm_qos_level_t *p = + (osm_qos_level_t *) calloc(1, sizeof(osm_qos_level_t)); + return p; +} + +/*************************************************** + ***************************************************/ + +void osm_qos_policy_qos_level_destroy(osm_qos_level_t * p) +{ + unsigned i; + + if (!p) + return; + + free(p->name); + free(p->use); + + for (i = 0; i < p->path_bits_range_len; i++) + free(p->path_bits_range_arr[i]); + free(p->path_bits_range_arr); + + for(i = 0; i < p->pkey_range_len; i++) + free((p->pkey_range_arr[i])); + free(p->pkey_range_arr); + + free(p); +} + +/*************************************************** + ***************************************************/ + +boolean_t osm_qos_level_has_pkey(IN const osm_qos_level_t * p_qos_level, + IN ib_net16_t pkey) +{ + if (!p_qos_level || !p_qos_level->pkey_range_len) + return FALSE; + return __is_num_in_range_arr(p_qos_level->pkey_range_arr, + p_qos_level->pkey_range_len, + cl_ntoh16(ib_pkey_get_base(pkey))); +} + +/*************************************************** + ***************************************************/ + +ib_net16_t osm_qos_level_get_shared_pkey(IN const osm_qos_level_t * p_qos_level, + IN const osm_physp_t * p_src_physp, + IN const osm_physp_t * p_dest_physp, + IN const boolean_t allow_both_pkeys) +{ + unsigned i; + uint16_t pkey_ho = 0; + + if (!p_qos_level || !p_qos_level->pkey_range_len) + return 0; + + /* + * ToDo: This approach is not optimal. + * Think how to find shared pkey that also exists + * in QoS level in less runtime. + */ + + for (i = 0; i < p_qos_level->pkey_range_len; i++) { + for (pkey_ho = p_qos_level->pkey_range_arr[i][0]; + pkey_ho <= p_qos_level->pkey_range_arr[i][1]; pkey_ho++) { + if (osm_physp_share_this_pkey + (p_src_physp, p_dest_physp, cl_hton16(pkey_ho), + allow_both_pkeys)) + return cl_hton16(pkey_ho); + } + } + + return 0; +} + +/*************************************************** + ***************************************************/ + +osm_qos_match_rule_t *osm_qos_policy_match_rule_create() +{ + osm_qos_match_rule_t *p = + (osm_qos_match_rule_t *) calloc(1, sizeof(osm_qos_match_rule_t)); + if (p) { + cl_list_init(&p->source_list, 10); + cl_list_init(&p->source_group_list, 10); + cl_list_init(&p->destination_list, 10); + cl_list_init(&p->destination_group_list, 10); + } + return p; +} + +/*************************************************** + ***************************************************/ + +void osm_qos_policy_match_rule_destroy(osm_qos_match_rule_t * p) +{ + unsigned i; + + if (!p) + return; + + if (p->qos_level_name) + free(p->qos_level_name); + if (p->use) + free(p->use); + + if (p->service_id_range_arr) { + for (i = 0; i < p->service_id_range_len; i++) + free(p->service_id_range_arr[i]); + free(p->service_id_range_arr); + } + + if (p->qos_class_range_arr) { + for (i = 0; i < p->qos_class_range_len; i++) + free(p->qos_class_range_arr[i]); + free(p->qos_class_range_arr); + } + + if (p->pkey_range_arr) { + for (i = 0; i < p->pkey_range_len; i++) + free(p->pkey_range_arr[i]); + free(p->pkey_range_arr); + } + + cl_list_apply_func(&p->source_list, __free_single_element, NULL); + cl_list_remove_all(&p->source_list); + cl_list_destroy(&p->source_list); + + cl_list_remove_all(&p->source_group_list); + cl_list_destroy(&p->source_group_list); + + cl_list_apply_func(&p->destination_list, __free_single_element, NULL); + cl_list_remove_all(&p->destination_list); + cl_list_destroy(&p->destination_list); + + cl_list_remove_all(&p->destination_group_list); + cl_list_destroy(&p->destination_group_list); + + free(p); +} + +/*************************************************** + ***************************************************/ + +osm_qos_policy_t * osm_qos_policy_create(osm_subn_t * p_subn) +{ + osm_qos_policy_t * p_qos_policy = (osm_qos_policy_t *)calloc(1, sizeof(osm_qos_policy_t)); + if (!p_qos_policy) + return NULL; + + cl_list_construct(&p_qos_policy->port_groups); + cl_list_init(&p_qos_policy->port_groups, 10); + + cl_list_construct(&p_qos_policy->vlarb_tables); + cl_list_init(&p_qos_policy->vlarb_tables, 10); + + cl_list_construct(&p_qos_policy->sl2vl_tables); + cl_list_init(&p_qos_policy->sl2vl_tables, 10); + + cl_list_construct(&p_qos_policy->qos_levels); + cl_list_init(&p_qos_policy->qos_levels, 10); + + cl_list_construct(&p_qos_policy->qos_match_rules); + cl_list_init(&p_qos_policy->qos_match_rules, 10); + + p_qos_policy->p_subn = p_subn; + __build_nodebyname_hash(p_qos_policy); + + return p_qos_policy; +} + +/*************************************************** + ***************************************************/ + +void osm_qos_policy_destroy(osm_qos_policy_t * p_qos_policy) +{ + cl_list_iterator_t list_iterator; + osm_qos_port_group_t *p_port_group = NULL; + osm_qos_vlarb_scope_t *p_vlarb_scope = NULL; + osm_qos_sl2vl_scope_t *p_sl2vl_scope = NULL; + osm_qos_level_t *p_qos_level = NULL; + osm_qos_match_rule_t *p_qos_match_rule = NULL; + + if (!p_qos_policy) + return; + + list_iterator = cl_list_head(&p_qos_policy->port_groups); + while (list_iterator != cl_list_end(&p_qos_policy->port_groups)) { + p_port_group = + (osm_qos_port_group_t *) cl_list_obj(list_iterator); + if (p_port_group) + osm_qos_policy_port_group_destroy(p_port_group); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&p_qos_policy->port_groups); + cl_list_destroy(&p_qos_policy->port_groups); + + list_iterator = cl_list_head(&p_qos_policy->vlarb_tables); + while (list_iterator != cl_list_end(&p_qos_policy->vlarb_tables)) { + p_vlarb_scope = + (osm_qos_vlarb_scope_t *) cl_list_obj(list_iterator); + if (p_vlarb_scope) + osm_qos_policy_vlarb_scope_destroy(p_vlarb_scope); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&p_qos_policy->vlarb_tables); + cl_list_destroy(&p_qos_policy->vlarb_tables); + + list_iterator = cl_list_head(&p_qos_policy->sl2vl_tables); + while (list_iterator != cl_list_end(&p_qos_policy->sl2vl_tables)) { + p_sl2vl_scope = + (osm_qos_sl2vl_scope_t *) cl_list_obj(list_iterator); + if (p_sl2vl_scope) + osm_qos_policy_sl2vl_scope_destroy(p_sl2vl_scope); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&p_qos_policy->sl2vl_tables); + cl_list_destroy(&p_qos_policy->sl2vl_tables); + + list_iterator = cl_list_head(&p_qos_policy->qos_levels); + while (list_iterator != cl_list_end(&p_qos_policy->qos_levels)) { + p_qos_level = (osm_qos_level_t *) cl_list_obj(list_iterator); + if (p_qos_level) + osm_qos_policy_qos_level_destroy(p_qos_level); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&p_qos_policy->qos_levels); + cl_list_destroy(&p_qos_policy->qos_levels); + + list_iterator = cl_list_head(&p_qos_policy->qos_match_rules); + while (list_iterator != cl_list_end(&p_qos_policy->qos_match_rules)) { + p_qos_match_rule = + (osm_qos_match_rule_t *) cl_list_obj(list_iterator); + if (p_qos_match_rule) + osm_qos_policy_match_rule_destroy(p_qos_match_rule); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&p_qos_policy->qos_match_rules); + cl_list_destroy(&p_qos_policy->qos_match_rules); + + if (p_qos_policy->p_node_hash) + st_free_table(p_qos_policy->p_node_hash); + + free(p_qos_policy); + + p_qos_policy = NULL; +} + +/*************************************************** + ***************************************************/ + +static boolean_t +__qos_policy_is_port_in_group(osm_subn_t * p_subn, + const osm_physp_t * p_physp, + osm_qos_port_group_t * p_port_group) +{ + osm_node_t *p_node = osm_physp_get_node_ptr(p_physp); + ib_net64_t port_guid = osm_physp_get_port_guid(p_physp); + uint64_t port_guid_ho = cl_ntoh64(port_guid); + + /* check whether this port's type matches any of group's types */ + + if ( p_port_group->node_types & + (((uint8_t)1)<port_map, port_guid_ho) != + cl_qmap_end(&p_port_group->port_map)) + return TRUE; + + return FALSE; +} /* __qos_policy_is_port_in_group() */ + +/*************************************************** + ***************************************************/ + +static boolean_t +__qos_policy_is_port_in_group_list(const osm_qos_policy_t * p_qos_policy, + const osm_physp_t * p_physp, + cl_list_t * p_port_group_list) +{ + osm_qos_port_group_t *p_port_group; + cl_list_iterator_t list_iterator; + + list_iterator = cl_list_head(p_port_group_list); + while (list_iterator != cl_list_end(p_port_group_list)) { + p_port_group = + (osm_qos_port_group_t *) cl_list_obj(list_iterator); + if (p_port_group) { + if (__qos_policy_is_port_in_group + (p_qos_policy->p_subn, p_physp, p_port_group)) + return TRUE; + } + list_iterator = cl_list_next(list_iterator); + } + return FALSE; +} + +/*************************************************** + ***************************************************/ + +static osm_qos_match_rule_t *__qos_policy_get_match_rule_by_params( + const osm_qos_policy_t * p_qos_policy, + uint64_t service_id, + uint16_t qos_class, + uint16_t pkey, + const osm_physp_t * p_src_physp, + const osm_physp_t * p_dest_physp, + ib_net64_t comp_mask) +{ + osm_qos_match_rule_t *p_qos_match_rule = NULL; + cl_list_iterator_t list_iterator; + osm_log_t * p_log = &p_qos_policy->p_subn->p_osm->log; + + boolean_t matched_by_sguid = FALSE, + matched_by_dguid = FALSE, + matched_by_sordguid = FALSE, + matched_by_class = FALSE, + matched_by_sid = FALSE, + matched_by_pkey = FALSE; + + if (!cl_list_count(&p_qos_policy->qos_match_rules)) + return NULL; + + OSM_LOG_ENTER(p_log); + + /* Go over all QoS match rules and find the one that matches the request */ + + list_iterator = cl_list_head(&p_qos_policy->qos_match_rules); + while (list_iterator != cl_list_end(&p_qos_policy->qos_match_rules)) { + p_qos_match_rule = + (osm_qos_match_rule_t *) cl_list_obj(list_iterator); + if (!p_qos_match_rule) { + list_iterator = cl_list_next(list_iterator); + continue; + } + + /* If a match rule has Source groups and no Destination groups, + * PR request source has to be in this list */ + + if (cl_list_count(&p_qos_match_rule->source_group_list) + && !cl_list_count(&p_qos_match_rule->destination_group_list)) { + if (!__qos_policy_is_port_in_group_list(p_qos_policy, + p_src_physp, + &p_qos_match_rule-> + source_group_list)) + { + list_iterator = cl_list_next(list_iterator); + continue; + } + matched_by_sguid = TRUE; + } + + /* If a match rule has Destination groups and no Source groups, + * PR request dest. has to be in this list */ + + if (cl_list_count(&p_qos_match_rule->destination_group_list) + && !cl_list_count(&p_qos_match_rule->source_group_list)) { + if (!__qos_policy_is_port_in_group_list(p_qos_policy, + p_dest_physp, + &p_qos_match_rule-> + destination_group_list)) + { + list_iterator = cl_list_next(list_iterator); + continue; + } + matched_by_dguid = TRUE; + } + + /* If a match rule has both Source and Destination groups, + * PR request source or dest. must be in respective list + */ + if (cl_list_count(&p_qos_match_rule->source_group_list) + && cl_list_count(&p_qos_match_rule->destination_group_list)) { + if (__qos_policy_is_port_in_group_list(p_qos_policy, + p_src_physp, + &p_qos_match_rule-> + source_group_list) + && __qos_policy_is_port_in_group_list(p_qos_policy, + p_dest_physp, + &p_qos_match_rule-> + destination_group_list)) + matched_by_sordguid = TRUE; + else { + list_iterator = cl_list_next(list_iterator); + continue; + } + } + + /* If a match rule has QoS classes, PR request HAS + to have a matching QoS class to match the rule */ + + if (p_qos_match_rule->qos_class_range_len) { + if (!(comp_mask & IB_PR_COMPMASK_QOS_CLASS)) { + list_iterator = cl_list_next(list_iterator); + continue; + } + + if (!__is_num_in_range_arr + (p_qos_match_rule->qos_class_range_arr, + p_qos_match_rule->qos_class_range_len, + qos_class)) { + list_iterator = cl_list_next(list_iterator); + continue; + } + matched_by_class = TRUE; + } + + /* If a match rule has Service IDs, PR request HAS + to have a matching Service ID to match the rule */ + + if (p_qos_match_rule->service_id_range_len) { + if (!(comp_mask & IB_PR_COMPMASK_SERVICEID_MSB) || + !(comp_mask & IB_PR_COMPMASK_SERVICEID_LSB)) { + list_iterator = cl_list_next(list_iterator); + continue; + } + + if (!__is_num_in_range_arr + (p_qos_match_rule->service_id_range_arr, + p_qos_match_rule->service_id_range_len, + service_id)) { + list_iterator = cl_list_next(list_iterator); + continue; + } + matched_by_sid = TRUE; + } + + /* If a match rule has PKeys, PR request HAS + to have a matching PKey to match the rule */ + + if (p_qos_match_rule->pkey_range_len) { + if (!(comp_mask & IB_PR_COMPMASK_PKEY)) { + list_iterator = cl_list_next(list_iterator); + continue; + } + + if (!__is_num_in_range_arr + (p_qos_match_rule->pkey_range_arr, + p_qos_match_rule->pkey_range_len, + pkey & 0x7FFF)) { + list_iterator = cl_list_next(list_iterator); + continue; + } + matched_by_pkey = TRUE; + } + + /* if we got here, then this match-rule matched this PR request */ + break; + } + + if (list_iterator == cl_list_end(&p_qos_policy->qos_match_rules)) + p_qos_match_rule = NULL; + + if (p_qos_match_rule) + OSM_LOG(p_log, OSM_LOG_DEBUG, + "request matched rule (%s) by:%s%s%s%s%s%s\n", + (p_qos_match_rule->use) ? + p_qos_match_rule->use : "no description", + (matched_by_sguid) ? " SGUID" : "", + (matched_by_dguid) ? " DGUID" : "", + (matched_by_sordguid) ? "SorDGUID" : "", + (matched_by_class) ? " QoS_Class" : "", + (matched_by_sid) ? " ServiceID" : "", + (matched_by_pkey) ? " PKey" : ""); + else + OSM_LOG(p_log, OSM_LOG_DEBUG, + "request not matched any rule\n"); + + OSM_LOG_EXIT(p_log); + return p_qos_match_rule; +} /* __qos_policy_get_match_rule_by_params() */ + +/*************************************************** + ***************************************************/ + +static osm_qos_level_t *__qos_policy_get_qos_level_by_name( + const osm_qos_policy_t * p_qos_policy, + const char *name) +{ + osm_qos_level_t *p_qos_level = NULL; + cl_list_iterator_t list_iterator; + + list_iterator = cl_list_head(&p_qos_policy->qos_levels); + while (list_iterator != cl_list_end(&p_qos_policy->qos_levels)) { + p_qos_level = (osm_qos_level_t *) cl_list_obj(list_iterator); + if (!p_qos_level) + continue; + + /* names are case INsensitive */ + if (strcasecmp(name, p_qos_level->name) == 0) + return p_qos_level; + + list_iterator = cl_list_next(list_iterator); + } + + return NULL; +} + +/*************************************************** + ***************************************************/ + +static osm_qos_port_group_t *__qos_policy_get_port_group_by_name( + const osm_qos_policy_t * p_qos_policy, + const char *const name) +{ + osm_qos_port_group_t *p_port_group = NULL; + cl_list_iterator_t list_iterator; + + list_iterator = cl_list_head(&p_qos_policy->port_groups); + while (list_iterator != cl_list_end(&p_qos_policy->port_groups)) { + p_port_group = + (osm_qos_port_group_t *) cl_list_obj(list_iterator); + if (!p_port_group) + continue; + + /* names are case INsensitive */ + if (strcasecmp(name, p_port_group->name) == 0) + return p_port_group; + + list_iterator = cl_list_next(list_iterator); + } + + return NULL; +} + +/*************************************************** + ***************************************************/ + +static void __qos_policy_validate_pkey( + osm_qos_policy_t * p_qos_policy, + osm_qos_match_rule_t * p_qos_match_rule, + osm_prtn_t * p_prtn) +{ + if (!p_qos_policy || !p_qos_match_rule || !p_prtn) + return; + + if (!p_qos_match_rule->p_qos_level->sl_set || + p_prtn->sl == p_qos_match_rule->p_qos_level->sl) + return; + + OSM_LOG(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_VERBOSE, + "QoS Level SL (%u) for Pkey 0x%04X in match rule " + "differs from partition SL (%u)\n", + p_qos_match_rule->p_qos_level->sl, + cl_ntoh16(p_prtn->pkey), p_prtn->sl); +} + +/*************************************************** + ***************************************************/ + +int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, + osm_log_t *p_log) +{ + cl_list_iterator_t match_rules_list_iterator; + cl_list_iterator_t list_iterator; + osm_qos_port_group_t *p_port_group = NULL; + osm_qos_match_rule_t *p_qos_match_rule = NULL; + char *str; + unsigned i, j; + int res = 0; + uint64_t pkey_64; + ib_net16_t pkey; + osm_prtn_t * p_prtn; + + OSM_LOG_ENTER(p_log); + + /* set default qos level */ + + p_qos_policy->p_default_qos_level = + __qos_policy_get_qos_level_by_name(p_qos_policy, OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); + if (!p_qos_policy->p_default_qos_level) { + /* There's no default QoS level in the usual qos-level section. + Check whether the 'simple' default QoS level that can be + defined in the qos-ulp section exists */ + if (__default_simple_qos_level.sl_set) { + p_qos_policy->p_default_qos_level = &__default_simple_qos_level; + } + else { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC10: " + "Default qos-level (%s) not defined.\n", + OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); + res = 1; + goto Exit; + } + } + + /* scan all the match rules, and fill the lists of pointers to + relevant qos levels and port groups to speed up PR matching */ + + i = 1; + match_rules_list_iterator = + cl_list_head(&p_qos_policy->qos_match_rules); + while (match_rules_list_iterator != + cl_list_end(&p_qos_policy->qos_match_rules)) { + p_qos_match_rule = + (osm_qos_match_rule_t *) + cl_list_obj(match_rules_list_iterator); + CL_ASSERT(p_qos_match_rule); + + /* find the matching qos-level for each match-rule */ + + if (!p_qos_match_rule->p_qos_level) + p_qos_match_rule->p_qos_level = + __qos_policy_get_qos_level_by_name(p_qos_policy, + p_qos_match_rule->qos_level_name); + + if (!p_qos_match_rule->p_qos_level) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC11: " + "qos-match-rule num %u: qos-level '%s' not found\n", + i, p_qos_match_rule->qos_level_name); + res = 1; + goto Exit; + } + + /* find the matching port-group for element of source_list */ + + if (cl_list_count(&p_qos_match_rule->source_list)) { + list_iterator = + cl_list_head(&p_qos_match_rule->source_list); + while (list_iterator != + cl_list_end(&p_qos_match_rule->source_list)) { + str = (char *)cl_list_obj(list_iterator); + CL_ASSERT(str); + + p_port_group = + __qos_policy_get_port_group_by_name(p_qos_policy, str); + if (!p_port_group) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC12: " + "qos-match-rule num %u: source port-group '%s' not found\n", + i, str); + res = 1; + goto Exit; + } + + cl_list_insert_tail(&p_qos_match_rule-> + source_group_list, + p_port_group); + + list_iterator = cl_list_next(list_iterator); + } + } + + /* find the matching port-group for element of destination_list */ + + if (cl_list_count(&p_qos_match_rule->destination_list)) { + list_iterator = + cl_list_head(&p_qos_match_rule->destination_list); + while (list_iterator != + cl_list_end(&p_qos_match_rule-> + destination_list)) { + str = (char *)cl_list_obj(list_iterator); + CL_ASSERT(str); + + p_port_group = + __qos_policy_get_port_group_by_name(p_qos_policy,str); + if (!p_port_group) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC13: " + "qos-match-rule num %u: destination port-group '%s' not found\n", + i, str); + res = 1; + goto Exit; + } + + cl_list_insert_tail(&p_qos_match_rule-> + destination_group_list, + p_port_group); + + list_iterator = cl_list_next(list_iterator); + } + } + + /* + * Scan all the pkeys in matching rule, and if the + * partition for these pkeys exists, set the SL + * according to the QoS Level. + * Warn if there's mismatch between QoS level SL + * and Partition SL. + */ + + for (j = 0; j < p_qos_match_rule->pkey_range_len; j++) { + for ( pkey_64 = p_qos_match_rule->pkey_range_arr[j][0]; + pkey_64 <= p_qos_match_rule->pkey_range_arr[j][1]; + pkey_64++) { + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); + p_prtn = (osm_prtn_t *)cl_qmap_get( + &p_qos_policy->p_subn->prtn_pkey_tbl, pkey); + + if (p_prtn == (osm_prtn_t *)cl_qmap_end( + &p_qos_policy->p_subn->prtn_pkey_tbl)) + /* partition for this pkey not found */ + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC14: " + "pkey 0x%04X in match rule - " + "partition doesn't exist\n", + cl_ntoh16(pkey)); + else + __qos_policy_validate_pkey(p_qos_policy, + p_qos_match_rule, + p_prtn); + } + } + + /* done with the current match-rule */ + + match_rules_list_iterator = + cl_list_next(match_rules_list_iterator); + i++; + } + +Exit: + OSM_LOG_EXIT(p_log); + return res; +} /* osm_qos_policy_validate() */ + +/*************************************************** + ***************************************************/ + +static osm_qos_level_t * __qos_policy_get_qos_level_by_params( + IN const osm_qos_policy_t * p_qos_policy, + IN const osm_physp_t * p_src_physp, + IN const osm_physp_t * p_dest_physp, + IN uint64_t service_id, + IN uint16_t qos_class, + IN uint16_t pkey, + IN ib_net64_t comp_mask) +{ + osm_qos_match_rule_t *p_qos_match_rule = NULL; + + if (!p_qos_policy) + return NULL; + + p_qos_match_rule = __qos_policy_get_match_rule_by_params( + p_qos_policy, service_id, qos_class, pkey, + p_src_physp, p_dest_physp, comp_mask); + + return p_qos_match_rule ? p_qos_match_rule->p_qos_level : + p_qos_policy->p_default_qos_level; +} /* __qos_policy_get_qos_level_by_params() */ + +/*************************************************** + ***************************************************/ + +osm_qos_level_t * osm_qos_policy_get_qos_level_by_pr( + IN const osm_qos_policy_t * p_qos_policy, + IN const ib_path_rec_t * p_pr, + IN const osm_physp_t * p_src_physp, + IN const osm_physp_t * p_dest_physp, + IN ib_net64_t comp_mask) +{ + return __qos_policy_get_qos_level_by_params( + p_qos_policy, p_src_physp, p_dest_physp, + cl_ntoh64(p_pr->service_id), ib_path_rec_qos_class(p_pr), + cl_ntoh16(p_pr->pkey), comp_mask); +} + +/*************************************************** + ***************************************************/ + +osm_qos_level_t * osm_qos_policy_get_qos_level_by_mpr( + IN const osm_qos_policy_t * p_qos_policy, + IN const ib_multipath_rec_t * p_mpr, + IN const osm_physp_t * p_src_physp, + IN const osm_physp_t * p_dest_physp, + IN ib_net64_t comp_mask) +{ + ib_net64_t pr_comp_mask = 0; + + if (!p_qos_policy) + return NULL; + + /* + * Converting MultiPathRecord compmask to the PathRecord + * compmask. Note that only relevant bits are set. + */ + pr_comp_mask = + ((comp_mask & IB_MPR_COMPMASK_QOS_CLASS) ? + IB_PR_COMPMASK_QOS_CLASS : 0) | + ((comp_mask & IB_MPR_COMPMASK_PKEY) ? + IB_PR_COMPMASK_PKEY : 0) | + ((comp_mask & IB_MPR_COMPMASK_SERVICEID_MSB) ? + IB_PR_COMPMASK_SERVICEID_MSB : 0) | + ((comp_mask & IB_MPR_COMPMASK_SERVICEID_LSB) ? + IB_PR_COMPMASK_SERVICEID_LSB : 0); + + return __qos_policy_get_qos_level_by_params( + p_qos_policy, p_src_physp, p_dest_physp, + cl_ntoh64(ib_multipath_rec_service_id(p_mpr)), + ib_multipath_rec_qos_class(p_mpr), + cl_ntoh16(p_mpr->pkey), pr_comp_mask); +} + +/*************************************************** + ***************************************************/ diff --git a/opensm/osm_remote_sm.c b/opensm/osm_remote_sm.c new file mode 100644 index 0000000..e081664 --- /dev/null +++ b/opensm/osm_remote_sm.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_remote_sm_t. + * This object represents the remote SM object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#define FILE_ID OSM_FILE_REMOTE_SM_C +#include + +void osm_remote_sm_construct(IN osm_remote_sm_t * p_sm) +{ + memset(p_sm, 0, sizeof(*p_sm)); +} + +void osm_remote_sm_destroy(IN osm_remote_sm_t * p_sm) +{ + memset(p_sm, 0, sizeof(*p_sm)); +} + +void osm_remote_sm_init(IN osm_remote_sm_t * p_sm, + IN const ib_sm_info_t * p_smi) +{ + CL_ASSERT(p_sm); + + osm_remote_sm_construct(p_sm); + + p_sm->smi = *p_smi; + return; +} diff --git a/opensm/osm_req.c b/opensm/osm_req.c new file mode 100644 index 0000000..0664f62 --- /dev/null +++ b/opensm/osm_req.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_req_t. + * This object represents the generic attribute requester. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_REQ_C +#include +#include +#include +#include +#include +#include +#include +#include + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static ib_net64_t req_determine_mkey(IN osm_sm_t * sm, + IN const osm_dr_path_t * p_path) +{ + osm_node_t *p_node; + osm_port_t *p_sm_port; + osm_physp_t *p_physp; + ib_net64_t dest_port_guid = 0, m_key; + uint8_t hop; + + OSM_LOG_ENTER(sm->p_log); + + p_physp = NULL; + + p_sm_port = osm_get_port_by_guid(sm->p_subn, sm->p_subn->sm_port_guid); + + /* hop_count == 0: destination port guid is SM */ + if (p_path->hop_count == 0) { + dest_port_guid = sm->p_subn->sm_port_guid; + goto Remote_Guid; + } + + if (p_sm_port) { + p_node = p_sm_port->p_node; + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) + p_physp = osm_node_get_physp_ptr(p_node, p_path->path[1]); + else + p_physp = p_sm_port->p_physp; + } + + /* hop_count == 1: outgoing physp is SM physp */ + for (hop = 2; p_physp && hop <= p_path->hop_count; hop++) { + p_physp = p_physp->p_remote_physp; + if (!p_physp) + break; + p_node = p_physp->p_node; + p_physp = osm_node_get_physp_ptr(p_node, p_path->path[hop]); + } + + /* At this point, p_physp points at the outgoing physp on the + last hop, or NULL if we don't know it. + */ + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1107: Outgoing physp is null on non-hop_0!\n"); + osm_dump_dr_path_v2(sm->p_log, p_path, FILE_ID, OSM_LOG_ERROR); + dest_port_guid = 0; + goto Remote_Guid; + } + + if (p_physp->p_remote_physp) { + dest_port_guid = p_physp->p_remote_physp->port_guid; + goto Remote_Guid; + } + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Target port guid unknown, " + "using persistent DB\n"); + if (!osm_db_neighbor_get(sm->p_subn->p_neighbor, + cl_ntoh64(p_physp->port_guid), + p_physp->port_num, + &dest_port_guid, NULL)) { + dest_port_guid = cl_hton64(dest_port_guid); + } + +Remote_Guid: + if (dest_port_guid) { + if (!osm_db_guid2mkey_get(sm->p_subn->p_g2m, + cl_ntoh64(dest_port_guid), &m_key)) { + m_key = cl_hton64(m_key); + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Found mkey for guid 0x%" + PRIx64 "\n", cl_ntoh64(dest_port_guid)); + } else { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Target port mkey unknown, using default\n"); + m_key = sm->p_subn->opt.m_key; + } + } else { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Target port guid unknown, using default\n"); + m_key = sm->p_subn->opt.m_key; + } + + OSM_LOG_EXIT(sm->p_log); + + return m_key; +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +ib_api_status_t osm_req_get(IN osm_sm_t * sm, IN const osm_dr_path_t * p_path, + IN ib_net16_t attr_id, IN ib_net32_t attr_mod, + IN boolean_t find_mkey, IN ib_net64_t m_key, + IN uint32_t timeout, IN cl_disp_msgid_t err_msg, + IN const osm_madw_context_t * p_context) +{ + osm_madw_t *p_madw; + ib_api_status_t status = IB_SUCCESS; + ib_net64_t m_key_calc; + ib_net64_t tid; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_path); + CL_ASSERT(attr_id); + + /* do nothing if we are exiting ... */ + if (osm_exit_flag) + goto Exit; + + /* p_context may be NULL. */ + + p_madw = osm_mad_pool_get(sm->p_mad_pool, sm->mad_ctrl.h_bind, + MAD_BLOCK_SIZE, NULL); + if (p_madw == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1101: Unable to acquire MAD\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + tid = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id) + & (uint64_t)(0xFFFFFFFF)); + if (tid == 0) + tid = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id) + & (uint64_t)(0xFFFFFFFF)); + + if (sm->p_subn->opt.m_key_lookup == TRUE) { + if (find_mkey == TRUE) + m_key_calc = req_determine_mkey(sm, p_path); + else + m_key_calc = m_key; + } else + m_key_calc = sm->p_subn->opt.m_key; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Getting %s (0x%X), modifier 0x%X, TID 0x%" PRIx64 + ", MKey 0x%016" PRIx64 "\n", + ib_get_sm_attr_str(attr_id), cl_ntoh16(attr_id), + cl_ntoh32(attr_mod), cl_ntoh64(tid), cl_ntoh64(m_key_calc)); + + ib_smp_init_new(osm_madw_get_smp_ptr(p_madw), IB_MAD_METHOD_GET, + tid, attr_id, attr_mod, p_path->hop_count, + m_key_calc, p_path->path, + IB_LID_PERMISSIVE, IB_LID_PERMISSIVE); + + p_madw->mad_addr.dest_lid = IB_LID_PERMISSIVE; + p_madw->mad_addr.addr_type.smi.source_lid = IB_LID_PERMISSIVE; + p_madw->resp_expected = TRUE; + p_madw->timeout = timeout; + p_madw->fail_msg = err_msg; + + /* + Fill in the mad wrapper context for the recipient. + In this case, the only thing the recipient needs is the + guid value. + */ + + if (p_context) + p_madw->context = *p_context; + + osm_vl15_post(sm->p_vl15, p_madw); + +Exit: + OSM_LOG_EXIT(sm->p_log); + return status; +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +osm_madw_t *osm_prepare_req_set(IN osm_sm_t * sm, IN const osm_dr_path_t * p_path, + IN const uint8_t * p_payload, + IN size_t payload_size, + IN ib_net16_t attr_id, IN ib_net32_t attr_mod, + IN boolean_t find_mkey, IN ib_net64_t m_key, + IN uint32_t timeout, + IN cl_disp_msgid_t err_msg, + IN const osm_madw_context_t * p_context) +{ + osm_madw_t *p_madw = NULL; + ib_net64_t m_key_calc; + ib_net64_t tid; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_path); + CL_ASSERT(attr_id); + CL_ASSERT(p_payload); + + /* do nothing if we are exiting ... */ + if (osm_exit_flag) + goto Exit; + + /* p_context may be NULL. */ + + p_madw = osm_mad_pool_get(sm->p_mad_pool, sm->mad_ctrl.h_bind, + MAD_BLOCK_SIZE, NULL); + if (p_madw == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1102: Unable to acquire MAD\n"); + goto Exit; + } + + tid = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id) + & (uint64_t)(0xFFFFFFFF)); + if (tid == 0) + tid = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id) + & (uint64_t)(0xFFFFFFFF)); + + if (sm->p_subn->opt.m_key_lookup == TRUE) { + if (find_mkey == TRUE) + m_key_calc = req_determine_mkey(sm, p_path); + else + m_key_calc = m_key; + } else + m_key_calc = sm->p_subn->opt.m_key; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Setting %s (0x%X), modifier 0x%X, TID 0x%" PRIx64 + ", MKey 0x%016" PRIx64 "\n", + ib_get_sm_attr_str(attr_id), cl_ntoh16(attr_id), + cl_ntoh32(attr_mod), cl_ntoh64(tid), cl_ntoh64(m_key_calc)); + + ib_smp_init_new(osm_madw_get_smp_ptr(p_madw), IB_MAD_METHOD_SET, + tid, attr_id, attr_mod, p_path->hop_count, + m_key_calc, p_path->path, + IB_LID_PERMISSIVE, IB_LID_PERMISSIVE); + + p_madw->mad_addr.dest_lid = IB_LID_PERMISSIVE; + p_madw->mad_addr.addr_type.smi.source_lid = IB_LID_PERMISSIVE; + p_madw->resp_expected = TRUE; + p_madw->timeout = timeout; + p_madw->fail_msg = err_msg; + + /* + Fill in the mad wrapper context for the recipient. + In this case, the only thing the recipient needs is the + guid value. + */ + + if (p_context) + p_madw->context = *p_context; + + memcpy(osm_madw_get_smp_ptr(p_madw)->data, p_payload, payload_size); + +Exit: + OSM_LOG_EXIT(sm->p_log); + return p_madw; +} + +void osm_send_req_mad(IN osm_sm_t * sm, IN osm_madw_t *p_madw) +{ + CL_ASSERT(p_madw); + CL_ASSERT(sm); + + osm_vl15_post(sm->p_vl15, p_madw); +} + +/********************************************************************** + The plock MAY or MAY NOT be held before calling this function. +**********************************************************************/ +ib_api_status_t osm_req_set(IN osm_sm_t * sm, IN const osm_dr_path_t * p_path, + IN const uint8_t * p_payload, + IN size_t payload_size, + IN ib_net16_t attr_id, IN ib_net32_t attr_mod, + IN boolean_t find_mkey, IN ib_net64_t m_key, + IN uint32_t timeout, + IN cl_disp_msgid_t err_msg, + IN const osm_madw_context_t * p_context) +{ + osm_madw_t *p_madw; + ib_api_status_t status = IB_SUCCESS; + + p_madw = osm_prepare_req_set(sm, p_path, p_payload, payload_size, attr_id, + attr_mod, find_mkey, m_key, timeout, err_msg, p_context); + if (p_madw == NULL) + status = IB_INSUFFICIENT_RESOURCES; + else + osm_send_req_mad(sm, p_madw); + + return status; +} + +int osm_send_trap144(osm_sm_t * sm, ib_net16_t local) +{ + osm_madw_t *madw; + ib_smp_t *smp; + ib_mad_notice_attr_t *ntc; + osm_port_t *port, *smport; + ib_port_info_t *pi; + + port = osm_get_port_by_guid(sm->p_subn, sm->p_subn->sm_port_guid); + if (!port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1104: cannot find SM port by guid 0x%" PRIx64 "\n", + cl_ntoh64(sm->p_subn->sm_port_guid)); + return -1; + } + + pi = &port->p_physp->port_info; + + /* don't bother with sending trap when SMA supports this */ + if (!local && + pi->capability_mask&(IB_PORT_CAP_HAS_TRAP|IB_PORT_CAP_HAS_CAP_NTC)) + return 0; + + smport = osm_get_port_by_guid(sm->p_subn, sm->master_sm_guid); + if (!smport) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1106: cannot find master SM port by guid 0x%" PRIx64 "\n", + cl_ntoh64(sm->master_sm_guid)); + return -1; + } + + madw = osm_mad_pool_get(sm->p_mad_pool, + osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl), + MAD_BLOCK_SIZE, NULL); + if (madw == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1105: Unable to acquire MAD\n"); + return -1; + } + + madw->mad_addr.dest_lid = smport->p_physp->port_info.base_lid; + madw->mad_addr.addr_type.smi.source_lid = pi->base_lid; + madw->resp_expected = TRUE; + madw->fail_msg = CL_DISP_MSGID_NONE; + + smp = osm_madw_get_smp_ptr(madw); + memset(smp, 0, sizeof(*smp)); + + smp->base_ver = 1; + smp->mgmt_class = IB_MCLASS_SUBN_LID; + smp->class_ver = 1; + smp->method = IB_MAD_METHOD_TRAP; + smp->trans_id = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id) + & (uint64_t)(0xFFFFFFFF)); + if (smp->trans_id == 0) + smp->trans_id = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id) + & (uint64_t)(0xFFFFFFFF)); + + smp->attr_id = IB_MAD_ATTR_NOTICE; + + ntc = (ib_mad_notice_attr_t *) smp->data; + + ntc->generic_type = 0x80 | IB_NOTICE_TYPE_INFO; + ib_notice_set_prod_type_ho(ntc, osm_node_get_type(port->p_node)); + ntc->g_or_v.generic.trap_num = cl_hton16(SM_LOCAL_CHANGES_TRAP); /* 144 */ + ntc->issuer_lid = pi->base_lid; + ntc->data_details.ntc_144.lid = pi->base_lid; + ntc->data_details.ntc_144.local_changes = local ? + TRAP_144_MASK_OTHER_LOCAL_CHANGES : 0; + ntc->data_details.ntc_144.new_cap_mask = pi->capability_mask; + ntc->data_details.ntc_144.change_flgs = local; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Sending Trap 144, TID 0x%" PRIx64 " to SM lid %u\n", + cl_ntoh64(smp->trans_id), cl_ntoh16(madw->mad_addr.dest_lid)); + + osm_vl15_post(sm->p_vl15, madw); + + return 0; +} diff --git a/opensm/osm_resp.c b/opensm/osm_resp.c new file mode 100644 index 0000000..9a98df9 --- /dev/null +++ b/opensm/osm_resp.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_resp_t. + * This object represents the generic attribute responder. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_RESP_C +#include +#include +#include +#include +#include +#include +#include + +static void resp_make_resp_smp(IN osm_sm_t * sm, IN const ib_smp_t * p_src_smp, + IN ib_net16_t status, + IN const uint8_t * p_payload, + OUT ib_smp_t * p_dest_smp) +{ + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_dest_smp); + CL_ASSERT(p_src_smp); + CL_ASSERT(!ib_smp_is_response(p_src_smp)); + + *p_dest_smp = *p_src_smp; + if (p_src_smp->method == IB_MAD_METHOD_GET || + p_src_smp->method == IB_MAD_METHOD_SET) { + p_dest_smp->method = IB_MAD_METHOD_GET_RESP; + p_dest_smp->status = status; + } else if (p_src_smp->method == IB_MAD_METHOD_TRAP) { + p_dest_smp->method = IB_MAD_METHOD_TRAP_REPRESS; + p_dest_smp->status = 0; + } else { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 1302: " + "src smp method unsupported 0x%X\n", p_src_smp->method); + goto Exit; + } + + if (p_src_smp->mgmt_class == IB_MCLASS_SUBN_DIR) + p_dest_smp->status |= IB_SMP_DIRECTION; + + memcpy(&p_dest_smp->data, p_payload, IB_SMP_DATA_SIZE); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +ib_api_status_t osm_resp_send(IN osm_sm_t * sm, + IN const osm_madw_t * p_req_madw, + IN ib_net16_t mad_status, + IN const uint8_t * p_payload) +{ + const ib_smp_t *p_req_smp; + ib_smp_t *p_smp; + osm_madw_t *p_madw; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_req_madw); + CL_ASSERT(p_payload); + + /* do nothing if we are exiting ... */ + if (osm_exit_flag) + goto Exit; + + p_madw = osm_mad_pool_get(sm->p_mad_pool, + osm_madw_get_bind_handle(p_req_madw), + MAD_BLOCK_SIZE, NULL); + + if (p_madw == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1301: Unable to acquire MAD\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + /* + Copy the request smp to the response smp, then just + update the necessary fields. + */ + p_smp = osm_madw_get_smp_ptr(p_madw); + p_req_smp = osm_madw_get_smp_ptr(p_req_madw); + resp_make_resp_smp(sm, p_req_smp, mad_status, p_payload, p_smp); + p_madw->mad_addr.dest_lid = + p_req_madw->mad_addr.addr_type.smi.source_lid; + p_madw->mad_addr.addr_type.smi.source_lid = + p_req_madw->mad_addr.dest_lid; + + p_madw->resp_expected = FALSE; + p_madw->fail_msg = CL_DISP_MSGID_NONE; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to %s (0x%X)" + "\n\t\t\t\tattribute modifier 0x%X, TID 0x%" PRIx64 "\n", + ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh16(p_smp->attr_id), + cl_ntoh32(p_smp->attr_mod), cl_ntoh64(p_smp->trans_id)); + + osm_vl15_post(sm->p_vl15, p_madw); + +Exit: + OSM_LOG_EXIT(sm->p_log); + return status; +} diff --git a/opensm/osm_router.c b/opensm/osm_router.c new file mode 100644 index 0000000..af5fff5 --- /dev/null +++ b/opensm/osm_router.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_router_t. + * This object represents an Infiniband router. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_ROUTER_C +#include + +void osm_router_delete(IN OUT osm_router_t ** pp_rtr) +{ + free(*pp_rtr); + *pp_rtr = NULL; +} + +osm_router_t *osm_router_new(IN osm_port_t * p_port) +{ + osm_router_t *p_rtr; + + CL_ASSERT(p_port); + + p_rtr = malloc(sizeof(*p_rtr)); + if (p_rtr) { + memset(p_rtr, 0, sizeof(*p_rtr)); + p_rtr->p_port = p_port; + } + + return p_rtr; +} diff --git a/opensm/osm_sa.c b/opensm/osm_sa.c new file mode 100644 index 0000000..ca793aa --- /dev/null +++ b/opensm/osm_sa.c @@ -0,0 +1,1377 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2014 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sa_t. + * This object represents the Subnet Administration object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OSM_SA_INITIAL_TID_VALUE 0xabc + +extern void osm_cpi_rcv_process(IN void *context, IN void *data); +extern void osm_gir_rcv_process(IN void *context, IN void *data); +extern void osm_infr_rcv_process(IN void *context, IN void *data); +extern void osm_infir_rcv_process(IN void *context, IN void *data); +extern void osm_lftr_rcv_process(IN void *context, IN void *data); +extern void osm_lr_rcv_process(IN void *context, IN void *data); +extern void osm_mcmr_rcv_process(IN void *context, IN void *data); +extern void osm_mftr_rcv_process(IN void *context, IN void *data); +extern void osm_mpr_rcv_process(IN void *context, IN void *data); +extern void osm_nr_rcv_process(IN void *context, IN void *data); +extern void osm_pr_rcv_process(IN void *context, IN void *data); +extern void osm_pkey_rec_rcv_process(IN void *context, IN void *data); +extern void osm_pir_rcv_process(IN void *context, IN void *data); +extern void osm_sr_rcv_process(IN void *context, IN void *data); +extern void osm_slvl_rec_rcv_process(IN void *context, IN void *data); +extern void osm_smir_rcv_process(IN void *context, IN void *data); +extern void osm_sir_rcv_process(IN void *context, IN void *data); +extern void osm_vlarb_rec_rcv_process(IN void *context, IN void *data); +extern void osm_sr_rcv_lease_cb(IN void *context); + +void osm_sa_construct(IN osm_sa_t * p_sa) +{ + memset(p_sa, 0, sizeof(*p_sa)); + p_sa->state = OSM_SA_STATE_INIT; + p_sa->sa_trans_id = OSM_SA_INITIAL_TID_VALUE; + + cl_timer_construct(&p_sa->sr_timer); +} + +void osm_sa_shutdown(IN osm_sa_t * p_sa) +{ + OSM_LOG_ENTER(p_sa->p_log); + + cl_timer_stop(&p_sa->sr_timer); + + /* unbind from the mad service */ + osm_sa_mad_ctrl_unbind(&p_sa->mad_ctrl); + + /* remove any registered dispatcher message */ + cl_disp_unregister(p_sa->nr_disp_h); + cl_disp_unregister(p_sa->pir_disp_h); + cl_disp_unregister(p_sa->gir_disp_h); + cl_disp_unregister(p_sa->lr_disp_h); + cl_disp_unregister(p_sa->pr_disp_h); +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + cl_disp_unregister(p_sa->mpr_disp_h); +#endif + cl_disp_unregister(p_sa->smir_disp_h); + cl_disp_unregister(p_sa->mcmr_disp_h); + cl_disp_unregister(p_sa->sr_disp_h); + cl_disp_unregister(p_sa->infr_disp_h); + cl_disp_unregister(p_sa->infir_disp_h); + cl_disp_unregister(p_sa->vlarb_disp_h); + cl_disp_unregister(p_sa->slvl_disp_h); + cl_disp_unregister(p_sa->pkey_disp_h); + cl_disp_unregister(p_sa->lft_disp_h); + cl_disp_unregister(p_sa->sir_disp_h); + cl_disp_unregister(p_sa->mft_disp_h); + + if (p_sa->p_set_disp) { + cl_disp_unregister(p_sa->mcmr_set_disp_h); + cl_disp_unregister(p_sa->infr_set_disp_h); + cl_disp_unregister(p_sa->sr_set_disp_h); + cl_disp_unregister(p_sa->gir_set_disp_h); + } + + osm_sa_mad_ctrl_destroy(&p_sa->mad_ctrl); + + OSM_LOG_EXIT(p_sa->p_log); +} + +void osm_sa_destroy(IN osm_sa_t * p_sa) +{ + OSM_LOG_ENTER(p_sa->p_log); + + p_sa->state = OSM_SA_STATE_INIT; + + cl_timer_destroy(&p_sa->sr_timer); + + OSM_LOG_EXIT(p_sa->p_log); +} + +ib_api_status_t osm_sa_init(IN osm_sm_t * p_sm, IN osm_sa_t * p_sa, + IN osm_subn_t * p_subn, IN osm_vendor_t * p_vendor, + IN osm_mad_pool_t * p_mad_pool, + IN osm_log_t * p_log, IN osm_stats_t * p_stats, + IN cl_dispatcher_t * p_disp, + IN cl_dispatcher_t * p_set_disp, + IN cl_plock_t * p_lock) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(p_log); + + p_sa->sm = p_sm; + p_sa->p_subn = p_subn; + p_sa->p_vendor = p_vendor; + p_sa->p_mad_pool = p_mad_pool; + p_sa->p_log = p_log; + p_sa->p_disp = p_disp; + p_sa->p_set_disp = p_set_disp; + p_sa->p_lock = p_lock; + + p_sa->state = OSM_SA_STATE_READY; + + status = osm_sa_mad_ctrl_init(&p_sa->mad_ctrl, p_sa, p_sa->p_mad_pool, + p_sa->p_vendor, p_subn, p_log, p_stats, + p_disp, p_set_disp); + if (status != IB_SUCCESS) + goto Exit; + + status = cl_timer_init(&p_sa->sr_timer, osm_sr_rcv_lease_cb, p_sa); + if (status != IB_SUCCESS) + goto Exit; + + status = IB_INSUFFICIENT_RESOURCES; + p_sa->cpi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_CLASS_PORT_INFO, + osm_cpi_rcv_process, p_sa); + if (p_sa->cpi_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->nr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_RECORD, + osm_nr_rcv_process, p_sa); + if (p_sa->nr_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->pir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PORTINFO_RECORD, + osm_pir_rcv_process, p_sa); + if (p_sa->pir_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->gir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_GUIDINFO_RECORD, + osm_gir_rcv_process, p_sa); + if (p_sa->gir_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->lr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LINK_RECORD, + osm_lr_rcv_process, p_sa); + if (p_sa->lr_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->pr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PATH_RECORD, + osm_pr_rcv_process, p_sa); + if (p_sa->pr_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + p_sa->mpr_disp_h = + cl_disp_register(p_disp, OSM_MSG_MAD_MULTIPATH_RECORD, + osm_mpr_rcv_process, p_sa); + if (p_sa->mpr_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; +#endif + + p_sa->smir_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SMINFO_RECORD, + osm_smir_rcv_process, p_sa); + if (p_sa->smir_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->mcmr_disp_h = + cl_disp_register(p_disp, OSM_MSG_MAD_MCMEMBER_RECORD, + osm_mcmr_rcv_process, p_sa); + if (p_sa->mcmr_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->sr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SERVICE_RECORD, + osm_sr_rcv_process, p_sa); + if (p_sa->sr_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->infr_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_INFORM_INFO, + osm_infr_rcv_process, p_sa); + if (p_sa->infr_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->infir_disp_h = + cl_disp_register(p_disp, OSM_MSG_MAD_INFORM_INFO_RECORD, + osm_infir_rcv_process, p_sa); + if (p_sa->infir_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->vlarb_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_VL_ARB_RECORD, + osm_vlarb_rec_rcv_process, p_sa); + if (p_sa->vlarb_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->slvl_disp_h = + cl_disp_register(p_disp, OSM_MSG_MAD_SLVL_TBL_RECORD, + osm_slvl_rec_rcv_process, p_sa); + if (p_sa->slvl_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->pkey_disp_h = + cl_disp_register(p_disp, OSM_MSG_MAD_PKEY_TBL_RECORD, + osm_pkey_rec_rcv_process, p_sa); + if (p_sa->pkey_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->lft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LFT_RECORD, + osm_lftr_rcv_process, p_sa); + if (p_sa->lft_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->sir_disp_h = + cl_disp_register(p_disp, OSM_MSG_MAD_SWITCH_INFO_RECORD, + osm_sir_rcv_process, p_sa); + if (p_sa->sir_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->mft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MFT_RECORD, + osm_mftr_rcv_process, p_sa); + if (p_sa->mft_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + /* + * When p_set_disp is defined, it means that we use different dispatcher + * for SA Set requests, and we need to register handlers for it. + */ + if (p_set_disp) { + p_sa->gir_set_disp_h = + cl_disp_register(p_set_disp, OSM_MSG_MAD_GUIDINFO_RECORD, + osm_gir_rcv_process, p_sa); + if (p_sa->gir_set_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->mcmr_set_disp_h = + cl_disp_register(p_set_disp, OSM_MSG_MAD_MCMEMBER_RECORD, + osm_mcmr_rcv_process, p_sa); + if (p_sa->mcmr_set_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->sr_set_disp_h = + cl_disp_register(p_set_disp, OSM_MSG_MAD_SERVICE_RECORD, + osm_sr_rcv_process, p_sa); + if (p_sa->sr_set_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sa->infr_set_disp_h = + cl_disp_register(p_set_disp, OSM_MSG_MAD_INFORM_INFO, + osm_infr_rcv_process, p_sa); + if (p_sa->infr_set_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + } + + status = IB_SUCCESS; +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +ib_api_status_t osm_sa_bind(IN osm_sa_t * p_sa, IN ib_net64_t port_guid) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(p_sa->p_log); + + status = osm_sa_mad_ctrl_bind(&p_sa->mad_ctrl, port_guid); + + if (status != IB_SUCCESS) { + OSM_LOG(p_sa->p_log, OSM_LOG_ERROR, "ERR 4C03: " + "SA MAD Controller bind failed (%s)\n", + ib_get_err_str(status)); + } + + OSM_LOG_EXIT(p_sa->p_log); + return status; +} + +ib_api_status_t osm_sa_send(osm_sa_t *sa, IN osm_madw_t * p_madw, + IN boolean_t resp_expected) +{ + ib_api_status_t status; + + cl_atomic_inc(&sa->p_subn->p_osm->stats.sa_mads_sent); + status = osm_vendor_send(p_madw->h_bind, p_madw, resp_expected); + if (status != IB_SUCCESS) { + cl_atomic_dec(&sa->p_subn->p_osm->stats.sa_mads_sent); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4C04: " + "osm_vendor_send failed, status = %s\n", + ib_get_err_str(status)); + } + return status; +} + +void osm_sa_send_error(IN osm_sa_t * sa, IN const osm_madw_t * p_madw, + IN ib_net16_t sa_status) +{ + osm_madw_t *p_resp_madw; + ib_sa_mad_t *p_resp_sa_mad; + ib_sa_mad_t *p_sa_mad; + + OSM_LOG_ENTER(sa->p_log); + + /* avoid races - if we are exiting - exit */ + if (osm_exit_flag) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Ignoring requested send after exit\n"); + goto Exit; + } + + p_resp_madw = osm_mad_pool_get(sa->p_mad_pool, + p_madw->h_bind, MAD_BLOCK_SIZE, + &p_madw->mad_addr); + + if (p_resp_madw == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4C07: " + "Unable to acquire response MAD\n"); + goto Exit; + } + + p_resp_sa_mad = osm_madw_get_sa_mad_ptr(p_resp_madw); + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + /* Copy the MAD header back into the response mad */ + *p_resp_sa_mad = *p_sa_mad; + p_resp_sa_mad->status = sa_status; + + if (p_resp_sa_mad->method == IB_MAD_METHOD_SET) + p_resp_sa_mad->method = IB_MAD_METHOD_GET; + else if (p_resp_sa_mad->method == IB_MAD_METHOD_GETTABLE) + p_resp_sa_mad->attr_offset = 0; + + p_resp_sa_mad->method |= IB_MAD_METHOD_RESP_MASK; + + /* + * C15-0.1.5 - always return SM_Key = 0 (table 185 p 884) + */ + p_resp_sa_mad->sm_key = 0; + + /* + * o15-0.2.7 - The PathRecord Attribute ID shall be used in + * the response (to a SubnAdmGetMulti(MultiPathRecord) + */ + if (p_resp_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD) + p_resp_sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD; + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_FRAMES)) + osm_dump_sa_mad_v2(sa->p_log, p_resp_sa_mad, FILE_ID, OSM_LOG_FRAMES); + + osm_sa_send(sa, p_resp_madw, FALSE); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +void osm_sa_respond(osm_sa_t *sa, osm_madw_t *madw, size_t attr_size, + cl_qlist_t *list) +{ + cl_list_item_t *item; + osm_madw_t *resp_madw; + ib_sa_mad_t *sa_mad, *resp_sa_mad; + unsigned num_rec, i; +#ifndef VENDOR_RMPP_SUPPORT + unsigned trim_num_rec; +#endif + unsigned char *p; + + sa_mad = osm_madw_get_sa_mad_ptr(madw); + num_rec = cl_qlist_count(list); + + /* + * C15-0.1.30: + * If we do a SubnAdmGet and got more than one record it is an error! + */ + if (sa_mad->method == IB_MAD_METHOD_GET && num_rec > 1) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4C05: " + "Got %u records for SubnAdmGet(%s) comp_mask 0x%016" PRIx64 + " from requester LID %u\n", + num_rec, ib_get_sa_attr_str(sa_mad->attr_id), + cl_ntoh64(sa_mad->comp_mask), + cl_ntoh16(madw->mad_addr.dest_lid)); + osm_sa_send_error(sa, madw, IB_SA_MAD_STATUS_TOO_MANY_RECORDS); + goto Exit; + } + +#ifndef VENDOR_RMPP_SUPPORT + trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / attr_size; + if (trim_num_rec < num_rec) { + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Number of records:%u trimmed to:%u to fit in one MAD\n", + num_rec, trim_num_rec); + num_rec = trim_num_rec; + } +#endif + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); + + if (sa_mad->method == IB_MAD_METHOD_GET && num_rec == 0) { + osm_sa_send_error(sa, madw, IB_SA_MAD_STATUS_NO_RECORDS); + goto Exit; + } + + /* + * Get a MAD to reply. Address of Mad is in the received mad_wrapper + */ + resp_madw = osm_mad_pool_get(sa->p_mad_pool, madw->h_bind, + num_rec * attr_size + IB_SA_MAD_HDR_SIZE, + &madw->mad_addr); + if (!resp_madw) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4C06: " + "osm_mad_pool_get failed\n"); + osm_sa_send_error(sa, madw, IB_SA_MAD_STATUS_NO_RESOURCES); + goto Exit; + } + + resp_sa_mad = osm_madw_get_sa_mad_ptr(resp_madw); + + /* + Copy the MAD header back into the response mad. + Set the 'R' bit and the payload length, + Then copy all records from the list into the response payload. + */ + + memcpy(resp_sa_mad, sa_mad, IB_SA_MAD_HDR_SIZE); + if (resp_sa_mad->method == IB_MAD_METHOD_SET) + resp_sa_mad->method = IB_MAD_METHOD_GET; + resp_sa_mad->method |= IB_MAD_METHOD_RESP_MASK; + /* C15-0.1.5 - always return SM_Key = 0 (table 185 p 884) */ + resp_sa_mad->sm_key = 0; + + /* Fill in the offset (paylen will be done by the rmpp SAR) */ + resp_sa_mad->attr_offset = num_rec ? ib_get_attr_offset(attr_size) : 0; + + p = ib_sa_mad_get_payload_ptr(resp_sa_mad); + +#ifndef VENDOR_RMPP_SUPPORT + /* we support only one packet RMPP - so we will set the first and + last flags for gettable */ + if (resp_sa_mad->method == IB_MAD_METHOD_GETTABLE_RESP) { + resp_sa_mad->rmpp_type = IB_RMPP_TYPE_DATA; + resp_sa_mad->rmpp_flags = + IB_RMPP_FLAG_FIRST | IB_RMPP_FLAG_LAST | + IB_RMPP_FLAG_ACTIVE; + } +#else + /* forcefully define the packet as RMPP one */ + if (resp_sa_mad->method == IB_MAD_METHOD_GETTABLE_RESP) + resp_sa_mad->rmpp_flags = IB_RMPP_FLAG_ACTIVE; +#endif + + for (i = 0; i < num_rec; i++) { + item = cl_qlist_remove_head(list); + memcpy(p, ((osm_sa_item_t *)item)->resp.data, attr_size); + p += attr_size; + free(item); + } + + osm_dump_sa_mad_v2(sa->p_log, resp_sa_mad, FILE_ID, OSM_LOG_FRAMES); + osm_sa_send(sa, resp_madw, FALSE); + +Exit: + /* need to set the mem free ... */ + item = cl_qlist_remove_head(list); + while (item != cl_qlist_end(list)) { + free(item); + item = cl_qlist_remove_head(list); + } +} + +/* + * SA DB Dumper + * + */ + +struct opensm_dump_context { + osm_opensm_t *p_osm; + FILE *file; +}; + +static int +opensm_dump_to_file(osm_opensm_t * p_osm, const char *file_name, + void (*dump_func) (osm_opensm_t * p_osm, FILE * file)) +{ + char path[1024]; + char path_tmp[1032]; + FILE *file; + int fd, status = 0; + + snprintf(path, sizeof(path), "%s/%s", + p_osm->subn.opt.dump_files_dir, file_name); + + snprintf(path_tmp, sizeof(path_tmp), "%s.tmp", path); + + file = fopen(path_tmp, "w"); + if (!file) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 4C01: " + "cannot open file \'%s\': %s\n", + path_tmp, strerror(errno)); + return -1; + } + + if (chmod(path_tmp, S_IRUSR | S_IWUSR)) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 4C0C: " + "cannot change access permissions of file " + "\'%s\' : %s\n", + path_tmp, strerror(errno)); + fclose(file); + return -1; + } + + dump_func(p_osm, file); + + if (p_osm->subn.opt.fsync_high_avail_files) { + if (fflush(file) == 0) { + fd = fileno(file); + if (fd != -1) { + if (fsync(fd) == -1) + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "ERR 4C08: fsync() failed (%s) for %s\n", + strerror(errno), path_tmp); + } else + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 4C09: " + "fileno() failed for %s\n", path_tmp); + } else + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 4C0A: " + "fflush() failed (%s) for %s\n", + strerror(errno), path_tmp); + } + + fclose(file); + + status = rename(path_tmp, path); + if (status) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 4C0B: " + "Failed to rename file:%s (err:%s)\n", + path_tmp, strerror(errno)); + } + + return status; +} + +static void mcast_mgr_dump_one_port(cl_map_item_t * p_map_item, void *cxt) +{ + FILE *file = ((struct opensm_dump_context *)cxt)->file; + osm_mcm_alias_guid_t *p_mcm_alias_guid = (osm_mcm_alias_guid_t *) p_map_item; + + fprintf(file, "mcm_port: " + "port_gid=0x%016" PRIx64 ":0x%016" PRIx64 " " + "scope_state=0x%02x proxy_join=0x%x" "\n\n", + cl_ntoh64(p_mcm_alias_guid->port_gid.unicast.prefix), + cl_ntoh64(p_mcm_alias_guid->port_gid.unicast.interface_id), + p_mcm_alias_guid->scope_state, p_mcm_alias_guid->proxy_join); +} + +static void sa_dump_one_mgrp(osm_mgrp_t *p_mgrp, void *cxt) +{ + struct opensm_dump_context dump_context; + osm_opensm_t *p_osm = ((struct opensm_dump_context *)cxt)->p_osm; + FILE *file = ((struct opensm_dump_context *)cxt)->file; + + fprintf(file, "MC Group 0x%04x %s:" + " mgid=0x%016" PRIx64 ":0x%016" PRIx64 + " port_gid=0x%016" PRIx64 ":0x%016" PRIx64 + " qkey=0x%08x mlid=0x%04x mtu=0x%02x tclass=0x%02x" + " pkey=0x%04x rate=0x%02x pkt_life=0x%02x sl_flow_hop=0x%08x" + " scope_state=0x%02x proxy_join=0x%x" "\n\n", + cl_ntoh16(p_mgrp->mlid), + p_mgrp->well_known ? " (well known)" : "", + cl_ntoh64(p_mgrp->mcmember_rec.mgid.unicast.prefix), + cl_ntoh64(p_mgrp->mcmember_rec.mgid.unicast.interface_id), + cl_ntoh64(p_mgrp->mcmember_rec.port_gid.unicast.prefix), + cl_ntoh64(p_mgrp->mcmember_rec.port_gid.unicast.interface_id), + cl_ntoh32(p_mgrp->mcmember_rec.qkey), + cl_ntoh16(p_mgrp->mcmember_rec.mlid), + p_mgrp->mcmember_rec.mtu, + p_mgrp->mcmember_rec.tclass, + cl_ntoh16(p_mgrp->mcmember_rec.pkey), + p_mgrp->mcmember_rec.rate, + p_mgrp->mcmember_rec.pkt_life, + cl_ntoh32(p_mgrp->mcmember_rec.sl_flow_hop), + p_mgrp->mcmember_rec.scope_state, + p_mgrp->mcmember_rec.proxy_join); + + dump_context.p_osm = p_osm; + dump_context.file = file; + + cl_qmap_apply_func(&p_mgrp->mcm_alias_port_tbl, + mcast_mgr_dump_one_port, &dump_context); +} + +static void sa_dump_one_inform(cl_list_item_t * p_list_item, void *cxt) +{ + FILE *file = ((struct opensm_dump_context *)cxt)->file; + osm_infr_t *p_infr = (osm_infr_t *) p_list_item; + ib_inform_info_record_t *p_iir = &p_infr->inform_record; + + fprintf(file, "InformInfo Record:" + " subscriber_gid=0x%016" PRIx64 ":0x%016" PRIx64 + " subscriber_enum=0x%x" + " InformInfo:" + " gid=0x%016" PRIx64 ":0x%016" PRIx64 + " lid_range_begin=0x%x" + " lid_range_end=0x%x" + " is_generic=0x%x" + " subscribe=0x%x" + " trap_type=0x%x" + " trap_num=0x%x" + " qpn_resp_time_val=0x%x" + " node_type=0x%06x" + " rep_addr: lid=0x%04x path_bits=0x%02x static_rate=0x%02x" + " remote_qp=0x%08x remote_qkey=0x%08x pkey_ix=0x%04x sl=0x%02x" + "\n\n", + cl_ntoh64(p_iir->subscriber_gid.unicast.prefix), + cl_ntoh64(p_iir->subscriber_gid.unicast.interface_id), + cl_ntoh16(p_iir->subscriber_enum), + cl_ntoh64(p_iir->inform_info.gid.unicast.prefix), + cl_ntoh64(p_iir->inform_info.gid.unicast.interface_id), + cl_ntoh16(p_iir->inform_info.lid_range_begin), + cl_ntoh16(p_iir->inform_info.lid_range_end), + p_iir->inform_info.is_generic, + p_iir->inform_info.subscribe, + cl_ntoh16(p_iir->inform_info.trap_type), + cl_ntoh16(p_iir->inform_info.g_or_v.generic.trap_num), + cl_ntoh32(p_iir->inform_info.g_or_v.generic.qpn_resp_time_val), + cl_ntoh32(ib_inform_info_get_prod_type(&p_iir->inform_info)), + cl_ntoh16(p_infr->report_addr.dest_lid), + p_infr->report_addr.path_bits, + p_infr->report_addr.static_rate, + cl_ntoh32(p_infr->report_addr.addr_type.gsi.remote_qp), + cl_ntoh32(p_infr->report_addr.addr_type.gsi.remote_qkey), + p_infr->report_addr.addr_type.gsi.pkey_ix, + p_infr->report_addr.addr_type.gsi.service_level); +} + +static void sa_dump_one_service(cl_list_item_t * p_list_item, void *cxt) +{ + FILE *file = ((struct opensm_dump_context *)cxt)->file; + osm_svcr_t *p_svcr = (osm_svcr_t *) p_list_item; + ib_service_record_t *p_sr = &p_svcr->service_record; + + fprintf(file, "Service Record: id=0x%016" PRIx64 + " gid=0x%016" PRIx64 ":0x%016" PRIx64 + " pkey=0x%x" + " lease=0x%x" + " key=0x%02x%02x%02x%02x%02x%02x%02x%02x" + ":0x%02x%02x%02x%02x%02x%02x%02x%02x" + " name=\'%s\'" + " data8=0x%02x%02x%02x%02x%02x%02x%02x%02x" + ":0x%02x%02x%02x%02x%02x%02x%02x%02x" + " data16=0x%04x%04x%04x%04x:0x%04x%04x%04x%04x" + " data32=0x%08x%08x:0x%08x%08x" + " data64=0x%016" PRIx64 ":0x%016" PRIx64 + " modified_time=0x%x lease_period=0x%x\n\n", + cl_ntoh64(p_sr->service_id), + cl_ntoh64(p_sr->service_gid.unicast.prefix), + cl_ntoh64(p_sr->service_gid.unicast.interface_id), + cl_ntoh16(p_sr->service_pkey), + cl_ntoh32(p_sr->service_lease), + p_sr->service_key[0], p_sr->service_key[1], + p_sr->service_key[2], p_sr->service_key[3], + p_sr->service_key[4], p_sr->service_key[5], + p_sr->service_key[6], p_sr->service_key[7], + p_sr->service_key[8], p_sr->service_key[9], + p_sr->service_key[10], p_sr->service_key[11], + p_sr->service_key[12], p_sr->service_key[13], + p_sr->service_key[14], p_sr->service_key[15], + p_sr->service_name, + p_sr->service_data8[0], p_sr->service_data8[1], + p_sr->service_data8[2], p_sr->service_data8[3], + p_sr->service_data8[4], p_sr->service_data8[5], + p_sr->service_data8[6], p_sr->service_data8[7], + p_sr->service_data8[8], p_sr->service_data8[9], + p_sr->service_data8[10], p_sr->service_data8[11], + p_sr->service_data8[12], p_sr->service_data8[13], + p_sr->service_data8[14], p_sr->service_data8[15], + cl_ntoh16(p_sr->service_data16[0]), + cl_ntoh16(p_sr->service_data16[1]), + cl_ntoh16(p_sr->service_data16[2]), + cl_ntoh16(p_sr->service_data16[3]), + cl_ntoh16(p_sr->service_data16[4]), + cl_ntoh16(p_sr->service_data16[5]), + cl_ntoh16(p_sr->service_data16[6]), + cl_ntoh16(p_sr->service_data16[7]), + cl_ntoh32(p_sr->service_data32[0]), + cl_ntoh32(p_sr->service_data32[1]), + cl_ntoh32(p_sr->service_data32[2]), + cl_ntoh32(p_sr->service_data32[3]), + cl_ntoh64(p_sr->service_data64[0]), + cl_ntoh64(p_sr->service_data64[1]), + p_svcr->modified_time, p_svcr->lease_period); +} + +static void sa_dump_one_port_guidinfo(cl_map_item_t * p_map_item, void *cxt) +{ + FILE *file = ((struct opensm_dump_context *)cxt)->file; + osm_port_t *p_port = (osm_port_t *) p_map_item; + uint32_t max_block; + int block_num; + + if (!p_port->p_physp->p_guids) + return; + + max_block = (p_port->p_physp->port_info.guid_cap + GUID_TABLE_MAX_ENTRIES - 1) / + GUID_TABLE_MAX_ENTRIES; + + for (block_num = 0; block_num < max_block; block_num++) { + fprintf(file, "GUIDInfo Record:" + " base_guid=0x%016" PRIx64 " lid=0x%04x block_num=0x%x" + " guid0=0x%016" PRIx64 " guid1=0x%016" PRIx64 + " guid2=0x%016" PRIx64 " guid3=0x%016" PRIx64 + " guid4=0x%016" PRIx64 " guid5=0x%016" PRIx64 + " guid6=0x%016" PRIx64 " guid7=0x%016" PRIx64 + "\n\n", + cl_ntoh64((*p_port->p_physp->p_guids)[0]), + cl_ntoh16(osm_port_get_base_lid(p_port)), block_num, + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES]), + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES + 1]), + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES + 2]), + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES + 3]), + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES + 4]), + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES + 5]), + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES + 6]), + cl_ntoh64((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES + 7])); + } +} + +static void sa_dump_all_sa(osm_opensm_t * p_osm, FILE * file) +{ + struct opensm_dump_context dump_context; + osm_mgrp_t *p_mgrp; + + dump_context.p_osm = p_osm; + dump_context.file = file; + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump guidinfo\n"); + cl_qmap_apply_func(&p_osm->subn.port_guid_tbl, + sa_dump_one_port_guidinfo, &dump_context); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump multicast\n"); + for (p_mgrp = (osm_mgrp_t *) cl_fmap_head(&p_osm->subn.mgrp_mgid_tbl); + p_mgrp != (osm_mgrp_t *) cl_fmap_end(&p_osm->subn.mgrp_mgid_tbl); + p_mgrp = (osm_mgrp_t *) cl_fmap_next(&p_mgrp->map_item)) + sa_dump_one_mgrp(p_mgrp, &dump_context); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump inform\n"); + cl_qlist_apply_func(&p_osm->subn.sa_infr_list, + sa_dump_one_inform, &dump_context); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump services\n"); + cl_qlist_apply_func(&p_osm->subn.sa_sr_list, + sa_dump_one_service, &dump_context); +} + +int osm_sa_db_file_dump(osm_opensm_t * p_osm) +{ + int res = 1; + + cl_plock_acquire(&p_osm->lock); + if (p_osm->sa.dirty) { + res = opensm_dump_to_file( + p_osm, "opensm-sa.dump", sa_dump_all_sa); + if (!res) + p_osm->sa.dirty = FALSE; + } + cl_plock_release(&p_osm->lock); + + return res; +} + +/* + * SA DB Loader + */ +static osm_mgrp_t *load_mcgroup(osm_opensm_t * p_osm, ib_net16_t mlid, + ib_member_rec_t * p_mcm_rec) +{ + ib_net64_t comp_mask; + osm_mgrp_t *p_mgrp; + + cl_plock_excl_acquire(&p_osm->lock); + + p_mgrp = osm_get_mgrp_by_mgid(&p_osm->subn, &p_mcm_rec->mgid); + if (p_mgrp) { + if (p_mgrp->mlid == mlid) { + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "mgrp %04x is already here.", cl_ntoh16(mlid)); + goto _out; + } + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "mlid %04x is already used by another MC group. Will " + "request clients reregistration.\n", cl_ntoh16(mlid)); + p_mgrp = NULL; + goto _out; + } + + comp_mask = IB_MCR_COMPMASK_MTU | IB_MCR_COMPMASK_MTU_SEL + | IB_MCR_COMPMASK_RATE | IB_MCR_COMPMASK_RATE_SEL; + if (!(p_mgrp = osm_mcmr_rcv_find_or_create_new_mgrp(&p_osm->sa, + comp_mask, + p_mcm_rec)) || + p_mgrp->mlid != mlid) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot create MC group with mlid 0x%04x and mgid " + "0x%016" PRIx64 ":0x%016" PRIx64 "\n", cl_ntoh16(mlid), + cl_ntoh64(p_mcm_rec->mgid.unicast.prefix), + cl_ntoh64(p_mcm_rec->mgid.unicast.interface_id)); + p_mgrp = NULL; + } + +_out: + cl_plock_release(&p_osm->lock); + + return p_mgrp; +} + +static int load_svcr(osm_opensm_t * p_osm, ib_service_record_t * sr, + uint32_t modified_time, uint32_t lease_period) +{ + osm_svcr_t *p_svcr; + int ret = 0; + + cl_plock_excl_acquire(&p_osm->lock); + + if (osm_svcr_get_by_rid(&p_osm->subn, &p_osm->log, sr)) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "ServiceRecord already exists\n"); + goto _out; + } + + if (!(p_svcr = osm_svcr_new(sr))) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot allocate new service struct\n"); + ret = -1; + goto _out; + } + + p_svcr->modified_time = modified_time; + p_svcr->lease_period = lease_period; + + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "adding ServiceRecord...\n"); + + osm_svcr_insert_to_db(&p_osm->subn, &p_osm->log, p_svcr); + + if (lease_period != 0xffffffff) + cl_timer_trim(&p_osm->sa.sr_timer, 1000); + +_out: + cl_plock_release(&p_osm->lock); + + return ret; +} + +static int load_infr(osm_opensm_t * p_osm, ib_inform_info_record_t * iir, + osm_mad_addr_t * addr) +{ + osm_infr_t infr, *p_infr; + int ret = 0; + + infr.h_bind = p_osm->sa.mad_ctrl.h_bind; + infr.sa = &p_osm->sa; + /* other possible way to restore mad_addr partially is + to extract qpn from InformInfo and to find lid by gid */ + infr.report_addr = *addr; + infr.inform_record = *iir; + + cl_plock_excl_acquire(&p_osm->lock); + if (osm_infr_get_by_rec(&p_osm->subn, &p_osm->log, &infr)) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "InformInfo Record already exists\n"); + goto _out; + } + + if (!(p_infr = osm_infr_new(&infr))) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot allocate new infr struct\n"); + ret = -1; + goto _out; + } + + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "adding InformInfo Record...\n"); + + osm_infr_insert_to_db(&p_osm->subn, &p_osm->log, p_infr); + +_out: + cl_plock_release(&p_osm->lock); + + return ret; +} + +static int load_guidinfo(osm_opensm_t * p_osm, ib_net64_t base_guid, + ib_guidinfo_record_t *gir) +{ + osm_port_t *p_port; + uint32_t max_block; + int i, ret = 0; + osm_alias_guid_t *p_alias_guid, *p_alias_guid_check; + + cl_plock_excl_acquire(&p_osm->lock); + + p_port = osm_get_port_by_guid(&p_osm->subn, base_guid); + if (!p_port) + goto _out; + + if (!p_port->p_physp->p_guids) { + max_block = (p_port->p_physp->port_info.guid_cap + GUID_TABLE_MAX_ENTRIES - 1) / + GUID_TABLE_MAX_ENTRIES; + p_port->p_physp->p_guids = calloc(max_block * GUID_TABLE_MAX_ENTRIES, + sizeof(ib_net64_t)); + if (!p_port->p_physp->p_guids) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot allocate GUID table for port " + "GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_port->p_physp->port_guid)); + goto _out; + } + } + + for (i = 0; i < GUID_TABLE_MAX_ENTRIES; i++) { + if (!gir->guid_info.guid[i]) + continue; + /* skip block 0 index 0 */ + if (gir->block_num == 0 && i == 0) + continue; + if (gir->block_num * GUID_TABLE_MAX_ENTRIES + i > + p_port->p_physp->port_info.guid_cap) + break; + + p_alias_guid = osm_alias_guid_new(gir->guid_info.guid[i], + p_port); + if (!p_alias_guid) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "Alias guid %d memory allocation failed" + " for port GUID 0x%" PRIx64 "\n", + gir->block_num * GUID_TABLE_MAX_ENTRIES + i, + cl_ntoh64(p_port->p_physp->port_guid)); + goto _out; + } + + p_alias_guid_check = + (osm_alias_guid_t *) cl_qmap_insert(&p_osm->subn.alias_port_guid_tbl, + p_alias_guid->alias_guid, + &p_alias_guid->map_item); + if (p_alias_guid_check != p_alias_guid) { + /* alias GUID is a duplicate */ + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "Duplicate alias port GUID 0x%" PRIx64 + " index %d base port GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_alias_guid->alias_guid), + gir->block_num * GUID_TABLE_MAX_ENTRIES + i, + cl_ntoh64(p_alias_guid->p_base_port->guid)); + osm_alias_guid_delete(&p_alias_guid); + goto _out; + } + } + + memcpy(&(*p_port->p_physp->p_guids)[gir->block_num * GUID_TABLE_MAX_ENTRIES], + &gir->guid_info, sizeof(ib_guid_info_t)); + + osm_queue_guidinfo(&p_osm->sa, p_port, gir->block_num); + +_out: + cl_plock_release(&p_osm->lock); + + return ret; +} + +#define UNPACK_FUNC(name,x) \ +static int unpack_##name##x(char *p, uint##x##_t *val_ptr) \ +{ \ + char *q; \ + unsigned long long num; \ + num = strtoull(p, &q, 16); \ + if (num > ~((uint##x##_t)0x0) \ + || q == p || (!isspace(*q) && *q != ':')) { \ + *val_ptr = 0; \ + return -1; \ + } \ + *val_ptr = cl_hton##x((uint##x##_t)num); \ + return (int)(q - p); \ +} + +#define cl_hton8(x) (x) + +UNPACK_FUNC(net, 8); +UNPACK_FUNC(net, 16); +UNPACK_FUNC(net, 32); +UNPACK_FUNC(net, 64); + +static int unpack_string(char *p, uint8_t * buf, unsigned len) +{ + char *q = p; + char delim = ' '; + + if (*q == '\'' || *q == '\"') + delim = *q++; + while (--len && *q && *q != delim) + *buf++ = *q++; + *buf = '\0'; + if (*q == delim && delim != ' ') + q++; + return (int)(q - p); +} + +static int unpack_string64(char *p, uint8_t * buf) +{ + return unpack_string(p, buf, 64); +} + +#define PARSE_AHEAD(p, x, name, val_ptr) { int _ret; \ + p = strstr(p, name); \ + if (!p) { \ + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, \ + "PARSE ERROR: %s:%u: cannot find \"%s\" string\n", \ + file_name, lineno, (name)); \ + ret = -2; \ + goto _error; \ + } \ + p += strlen(name); \ + _ret = unpack_##x(p, (val_ptr)); \ + if (_ret < 0) { \ + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, \ + "PARSE ERROR: %s:%u: cannot parse "#x" value " \ + "after \"%s\"\n", file_name, lineno, (name)); \ + ret = _ret; \ + goto _error; \ + } \ + p += _ret; \ +} + +static void sa_db_file_load_handle_mgrp(osm_opensm_t * p_osm, + osm_mgrp_t * p_mgrp) +{ + /* decide whether to delete the mgrp object or not */ + if (p_mgrp->full_members == 0 && !p_mgrp->well_known) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "Closing MC group 0x%016" PRIx64 ":0x%016" PRIx64 + " - no full members were added to not well known " + "group\n", + cl_ntoh64(p_mgrp->mcmember_rec.mgid.unicast.prefix), + cl_ntoh64(p_mgrp->mcmember_rec.mgid.unicast.interface_id)); + osm_mgrp_cleanup(&p_osm->subn, p_mgrp); + } +} + +int osm_sa_db_file_load(osm_opensm_t * p_osm) +{ + char line[1024]; + char *file_name; + FILE *file; + int ret = 0; + osm_mgrp_t *p_next_mgrp = NULL; + osm_mgrp_t *p_prev_mgrp = NULL; + unsigned rereg_clients = 0; + unsigned lineno; + + if (!p_osm->subn.first_time_master_sweep) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "Not first sweep - skip SA DB restore\n"); + return 0; + } + + file_name = p_osm->subn.opt.sa_db_file; + if (!file_name) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "sa db file name is not specified. Skip restore\n"); + return 0; + } + + file = fopen(file_name, "r"); + if (!file) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 4C02: " + "Can't open sa db file \'%s\'. Skip restoring\n", + file_name); + return -1; + } + + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "Restoring SA DB from file \'%s\'\n", + file_name); + + lineno = 0; + + while (fgets(line, sizeof(line) - 1, file) != NULL) { + char *p; + uint8_t val; + + lineno++; + + p = line; + while (isspace(*p)) + p++; + + if (*p == '#') + continue; + + if (!strncmp(p, "MC Group", 8)) { + ib_member_rec_t mcm_rec; + ib_net16_t mlid; + + p_next_mgrp = NULL; + memset(&mcm_rec, 0, sizeof(mcm_rec)); + + PARSE_AHEAD(p, net16, " 0x", &mlid); + PARSE_AHEAD(p, net64, " mgid=0x", + &mcm_rec.mgid.unicast.prefix); + PARSE_AHEAD(p, net64, ":0x", + &mcm_rec.mgid.unicast.interface_id); + PARSE_AHEAD(p, net64, " port_gid=0x", + &mcm_rec.port_gid.unicast.prefix); + PARSE_AHEAD(p, net64, ":0x", + &mcm_rec.port_gid.unicast.interface_id); + PARSE_AHEAD(p, net32, " qkey=0x", &mcm_rec.qkey); + PARSE_AHEAD(p, net16, " mlid=0x", &mcm_rec.mlid); + PARSE_AHEAD(p, net8, " mtu=0x", &mcm_rec.mtu); + PARSE_AHEAD(p, net8, " tclass=0x", &mcm_rec.tclass); + PARSE_AHEAD(p, net16, " pkey=0x", &mcm_rec.pkey); + PARSE_AHEAD(p, net8, " rate=0x", &mcm_rec.rate); + PARSE_AHEAD(p, net8, " pkt_life=0x", &mcm_rec.pkt_life); + PARSE_AHEAD(p, net32, " sl_flow_hop=0x", + &mcm_rec.sl_flow_hop); + PARSE_AHEAD(p, net8, " scope_state=0x", + &mcm_rec.scope_state); + PARSE_AHEAD(p, net8, " proxy_join=0x", &val); + mcm_rec.proxy_join = val; + + p_next_mgrp = load_mcgroup(p_osm, mlid, &mcm_rec); + if (!p_next_mgrp) + rereg_clients = 1; + if (cl_ntoh16(mlid) > p_osm->sm.mlids_init_max) + p_osm->sm.mlids_init_max = cl_ntoh16(mlid); + } else if (p_next_mgrp && !strncmp(p, "mcm_port", 8)) { + ib_member_rec_t mcmr; + ib_net64_t guid; + osm_port_t *port; + boolean_t proxy; + + PARSE_AHEAD(p, net64, " port_gid=0x", + &mcmr.port_gid.unicast.prefix); + PARSE_AHEAD(p, net64, ":0x", + &mcmr.port_gid.unicast.interface_id); + PARSE_AHEAD(p, net8, " scope_state=0x", &mcmr.scope_state); + PARSE_AHEAD(p, net8, " proxy_join=0x", &val); + proxy = val; + + guid = mcmr.port_gid.unicast.interface_id; + port = osm_get_port_by_alias_guid(&p_osm->subn, guid); + if (port && + cl_qmap_get(&p_next_mgrp->mcm_port_tbl, guid) == + cl_qmap_end(&p_next_mgrp->mcm_port_tbl) && + !osm_mgrp_add_port(&p_osm->subn, &p_osm->log, + p_next_mgrp, port, &mcmr, proxy)) + rereg_clients = 1; + } else if (!strncmp(p, "Service Record:", 15)) { + ib_service_record_t s_rec; + uint32_t modified_time, lease_period; + + p_next_mgrp = NULL; + memset(&s_rec, 0, sizeof(s_rec)); + + PARSE_AHEAD(p, net64, " id=0x", &s_rec.service_id); + PARSE_AHEAD(p, net64, " gid=0x", + &s_rec.service_gid.unicast.prefix); + PARSE_AHEAD(p, net64, ":0x", + &s_rec.service_gid.unicast.interface_id); + PARSE_AHEAD(p, net16, " pkey=0x", &s_rec.service_pkey); + PARSE_AHEAD(p, net32, " lease=0x", + &s_rec.service_lease); + PARSE_AHEAD(p, net64, " key=0x", + (ib_net64_t *) (&s_rec.service_key[0])); + PARSE_AHEAD(p, net64, ":0x", + (ib_net64_t *) (&s_rec.service_key[8])); + PARSE_AHEAD(p, string64, " name=", s_rec.service_name); + PARSE_AHEAD(p, net64, " data8=0x", + (ib_net64_t *) (&s_rec.service_data8[0])); + PARSE_AHEAD(p, net64, ":0x", + (ib_net64_t *) (&s_rec.service_data8[8])); + PARSE_AHEAD(p, net64, " data16=0x", + (ib_net64_t *) (&s_rec.service_data16[0])); + PARSE_AHEAD(p, net64, ":0x", + (ib_net64_t *) (&s_rec.service_data16[4])); + PARSE_AHEAD(p, net64, " data32=0x", + (ib_net64_t *) (&s_rec.service_data32[0])); + PARSE_AHEAD(p, net64, ":0x", + (ib_net64_t *) (&s_rec.service_data32[2])); + PARSE_AHEAD(p, net64, " data64=0x", + &s_rec.service_data64[0]); + PARSE_AHEAD(p, net64, ":0x", &s_rec.service_data64[1]); + PARSE_AHEAD(p, net32, " modified_time=0x", + &modified_time); + PARSE_AHEAD(p, net32, " lease_period=0x", + &lease_period); + + if (load_svcr(p_osm, &s_rec, cl_ntoh32(modified_time), + cl_ntoh32(lease_period))) + rereg_clients = 1; + } else if (!strncmp(p, "InformInfo Record:", 18)) { + ib_inform_info_record_t i_rec; + osm_mad_addr_t rep_addr; + ib_net16_t val16; + + p_next_mgrp = NULL; + memset(&i_rec, 0, sizeof(i_rec)); + memset(&rep_addr, 0, sizeof(rep_addr)); + + PARSE_AHEAD(p, net64, " subscriber_gid=0x", + &i_rec.subscriber_gid.unicast.prefix); + PARSE_AHEAD(p, net64, ":0x", + &i_rec.subscriber_gid.unicast.interface_id); + PARSE_AHEAD(p, net16, " subscriber_enum=0x", + &i_rec.subscriber_enum); + PARSE_AHEAD(p, net64, " gid=0x", + &i_rec.inform_info.gid.unicast.prefix); + PARSE_AHEAD(p, net64, ":0x", + &i_rec.inform_info.gid.unicast. + interface_id); + PARSE_AHEAD(p, net16, " lid_range_begin=0x", + &i_rec.inform_info.lid_range_begin); + PARSE_AHEAD(p, net16, " lid_range_end=0x", + &i_rec.inform_info.lid_range_end); + PARSE_AHEAD(p, net8, " is_generic=0x", + &i_rec.inform_info.is_generic); + PARSE_AHEAD(p, net8, " subscribe=0x", + &i_rec.inform_info.subscribe); + PARSE_AHEAD(p, net16, " trap_type=0x", + &i_rec.inform_info.trap_type); + PARSE_AHEAD(p, net16, " trap_num=0x", + &i_rec.inform_info.g_or_v.generic.trap_num); + PARSE_AHEAD(p, net32, " qpn_resp_time_val=0x", + &i_rec.inform_info.g_or_v.generic. + qpn_resp_time_val); + PARSE_AHEAD(p, net32, " node_type=0x", + (uint32_t *) & i_rec.inform_info.g_or_v. + generic.reserved2); + + PARSE_AHEAD(p, net16, " rep_addr: lid=0x", + &rep_addr.dest_lid); + PARSE_AHEAD(p, net8, " path_bits=0x", + &rep_addr.path_bits); + PARSE_AHEAD(p, net8, " static_rate=0x", + &rep_addr.static_rate); + PARSE_AHEAD(p, net32, " remote_qp=0x", + &rep_addr.addr_type.gsi.remote_qp); + PARSE_AHEAD(p, net32, " remote_qkey=0x", + &rep_addr.addr_type.gsi.remote_qkey); + PARSE_AHEAD(p, net16, " pkey_ix=0x", &val16); + rep_addr.addr_type.gsi.pkey_ix = cl_ntoh16(val16); + PARSE_AHEAD(p, net8, " sl=0x", + &rep_addr.addr_type.gsi.service_level); + + if (load_infr(p_osm, &i_rec, &rep_addr)) + rereg_clients = 1; + } else if (!strncmp(p, "GUIDInfo Record:", 16)) { + ib_guidinfo_record_t gi_rec; + ib_net64_t base_guid; + + p_next_mgrp = NULL; + memset(&gi_rec, 0, sizeof(gi_rec)); + + PARSE_AHEAD(p, net64, " base_guid=0x", &base_guid); + PARSE_AHEAD(p, net16, " lid=0x", &gi_rec.lid); + PARSE_AHEAD(p, net8, " block_num=0x", + &gi_rec.block_num); + PARSE_AHEAD(p, net64, " guid0=0x", + &gi_rec.guid_info.guid[0]); + PARSE_AHEAD(p, net64, " guid1=0x", + &gi_rec.guid_info.guid[1]); + PARSE_AHEAD(p, net64, " guid2=0x", + &gi_rec.guid_info.guid[2]); + PARSE_AHEAD(p, net64, " guid3=0x", + &gi_rec.guid_info.guid[3]); + PARSE_AHEAD(p, net64, " guid4=0x", + &gi_rec.guid_info.guid[4]); + PARSE_AHEAD(p, net64, " guid5=0x", + &gi_rec.guid_info.guid[5]); + PARSE_AHEAD(p, net64, " guid6=0x", + &gi_rec.guid_info.guid[6]); + PARSE_AHEAD(p, net64, " guid7=0x", + &gi_rec.guid_info.guid[7]); + + if (load_guidinfo(p_osm, base_guid, &gi_rec)) + rereg_clients = 1; + } + + /* + * p_next_mgrp points to the multicast group now being parsed. + * p_prev_mgrp points to the last multicast group we parsed. + * We decide whether to keep or delete each multicast group + * only when we finish parsing it's member records. if the + * group has full members, or it is a "well known group" we + * keep it. + */ + if (p_prev_mgrp != p_next_mgrp) { + if (p_prev_mgrp) + sa_db_file_load_handle_mgrp(p_osm, p_prev_mgrp); + p_prev_mgrp = p_next_mgrp; + } + } + + if (p_next_mgrp) + sa_db_file_load_handle_mgrp(p_osm, p_prev_mgrp); + + /* + * If loading succeeded, do whatever 'no_clients_rereg' says. + * If loading failed at some point, turn off the 'no_clients_rereg' + * option (turn on re-registration requests). + */ + if (rereg_clients) + p_osm->subn.opt.no_clients_rereg = FALSE; + + /* We've just finished loading SA DB file - clear the "dirty" flag */ + p_osm->sa.dirty = FALSE; + +_error: + fclose(file); + return ret; +} diff --git a/opensm/osm_sa_class_port_info.c b/opensm/osm_sa_class_port_info.c new file mode 100644 index 0000000..8147ed0 --- /dev/null +++ b/opensm/osm_sa_class_port_info.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_cpi_rcv_t. + * This object represents the ClassPortInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_CLASS_PORT_INFO_C +#include +#include +#include + +#define MAX_MSECS_TO_RTV 24 +/* Precalculated table in msec (index is related to encoded value) */ +/* 4.096 usec * 2 ** n (where n = 8 - 31) */ +const static uint32_t msecs_to_rtv_table[MAX_MSECS_TO_RTV] = { + 1, 2, 4, 8, + 16, 33, 67, 134, + 268, 536, 1073, 2147, + 4294, 8589, 17179, 34359, + 68719, 137438, 274877, 549755, + 1099511, 2199023, 4398046, 8796093 +}; + +static void cpi_rcv_respond(IN osm_sa_t * sa, IN const osm_madw_t * p_madw) +{ + osm_madw_t *p_resp_madw; + const ib_sa_mad_t *p_sa_mad; + ib_sa_mad_t *p_resp_sa_mad; + ib_class_port_info_t *p_resp_cpi; + ib_gid_t zero_gid; + uint32_t cap_mask2; + uint8_t rtv; + + OSM_LOG_ENTER(sa->p_log); + + memset(&zero_gid, 0, sizeof(ib_gid_t)); + + /* + Get a MAD to reply. Address of Mad is in the received mad_wrapper + */ + p_resp_madw = osm_mad_pool_get(sa->p_mad_pool, p_madw->h_bind, + MAD_BLOCK_SIZE, &p_madw->mad_addr); + if (!p_resp_madw) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1408: " + "Unable to allocate MAD\n"); + goto Exit; + } + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_resp_sa_mad = osm_madw_get_sa_mad_ptr(p_resp_madw); + + memcpy(p_resp_sa_mad, p_sa_mad, IB_SA_MAD_HDR_SIZE); + p_resp_sa_mad->method |= IB_MAD_METHOD_RESP_MASK; + /* C15-0.1.5 - always return SM_Key = 0 (table 185 p 884) */ + p_resp_sa_mad->sm_key = 0; + + p_resp_cpi = + (ib_class_port_info_t *) ib_sa_mad_get_payload_ptr(p_resp_sa_mad); + + /* finally do it (the job) man ! */ + p_resp_cpi->base_ver = 1; + p_resp_cpi->class_ver = 2; + /* Calculate encoded response time value */ + /* transaction timeout is in msec */ + if (sa->p_subn->opt.transaction_timeout > + msecs_to_rtv_table[MAX_MSECS_TO_RTV - 1]) + rtv = MAX_MSECS_TO_RTV - 1; + else { + for (rtv = 0; rtv < MAX_MSECS_TO_RTV; rtv++) { + if (sa->p_subn->opt.transaction_timeout <= + msecs_to_rtv_table[rtv]) + break; + } + } + rtv += 8; + ib_class_set_resp_time_val(p_resp_cpi, rtv); + p_resp_cpi->redir_gid = zero_gid; + p_resp_cpi->redir_tc_sl_fl = 0; + p_resp_cpi->redir_lid = 0; + p_resp_cpi->redir_pkey = 0; + p_resp_cpi->redir_qp = CL_NTOH32(1); + p_resp_cpi->redir_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + p_resp_cpi->trap_gid = zero_gid; + p_resp_cpi->trap_tc_sl_fl = 0; + p_resp_cpi->trap_lid = 0; + p_resp_cpi->trap_pkey = 0; + p_resp_cpi->trap_hop_qp = 0; + p_resp_cpi->trap_qkey = IB_QP1_WELL_KNOWN_Q_KEY; + + /* set specific capability mask bits */ + /* we do not support the following options/optional records: + OSM_CAP_IS_SUBN_OPT_RECS_SUP : + RandomForwardingTableRecord, + ServiceAssociationRecord + other optional records supported "under the table" + + OSM_CAP_IS_MULTIPATH_SUP: + TraceRecord + + OSM_CAP_IS_REINIT_SUP: + For reinitialization functionality. + + So not sending traps, but supporting Get(Notice) and Set(Notice). + */ + + /* Note host notation replaced later */ +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + p_resp_cpi->cap_mask = OSM_CAP_IS_SUBN_GET_SET_NOTICE_SUP | + OSM_CAP_IS_PORT_INFO_CAPMASK_MATCH_SUPPORTED | + OSM_CAP_IS_MULTIPATH_SUP; +#else + p_resp_cpi->cap_mask = OSM_CAP_IS_SUBN_GET_SET_NOTICE_SUP | + OSM_CAP_IS_PORT_INFO_CAPMASK_MATCH_SUPPORTED; +#endif + cap_mask2 = OSM_CAP2_IS_FULL_PORTINFO_REC_SUPPORTED | + OSM_CAP2_IS_EXTENDED_SPEEDS_SUPPORTED | + OSM_CAP2_IS_ALIAS_GUIDS_SUPPORTED | + OSM_CAP2_IS_MULTICAST_SERVICE_RECS_SUPPORTED | + OSM_CAP2_IS_PORT_INFO_CAPMASK2_MATCH_SUPPORTED | + OSM_CAP2_IS_SEND_ONLY_FULL_MEMBER_SUPPORTED | + OSM_CAP2_IS_LINK_WIDTH_2X_SUPPORTED | + OSM_CAP2_IS_LINK_SPEED_HDR_SUPPORTED; + if (sa->p_subn->opt.use_mfttop) + cap_mask2 |= OSM_CAP2_IS_MCAST_TOP_SUPPORTED; + if (sa->p_subn->opt.qos) + cap_mask2 |= OSM_CAP2_IS_QOS_SUPPORTED; + ib_class_set_cap_mask2(p_resp_cpi, cap_mask2); + + if (!sa->p_subn->opt.disable_multicast) + p_resp_cpi->cap_mask |= OSM_CAP_IS_UD_MCAST_SUP; + p_resp_cpi->cap_mask = cl_hton16(p_resp_cpi->cap_mask); + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_FRAMES)) + osm_dump_sa_mad_v2(sa->p_log, p_resp_sa_mad, FILE_ID, OSM_LOG_FRAMES); + + osm_sa_send(sa, p_resp_madw, FALSE); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +/********************************************************************** + * This code actually handles the call + **********************************************************************/ +void osm_cpi_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_sa_mad; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + /* we only support GET */ + if (p_sa_mad->method != IB_MAD_METHOD_GET) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1403: " + "Unsupported Method (%s) for ClassPortInfo request\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); + + /* CLASS PORT INFO does not really look at the SMDB - no lock required. */ + + cpi_rcv_respond(sa, p_madw); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_guidinfo_record.c b/opensm/osm_sa_guidinfo_record.c new file mode 100644 index 0000000..e548924 --- /dev/null +++ b/opensm/osm_sa_guidinfo_record.c @@ -0,0 +1,844 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_gir_rcv_t. + * This object represents the GUIDInfoRecord Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_GUIDINFO_RECORD_C +#include +#include +#include +#include +#include +#include +#include + +#define SA_GIR_RESP_SIZE SA_ITEM_RESP_SIZE(guid_rec) + +#define MOD_GIR_COMP_MASK (IB_GIR_COMPMASK_LID | IB_GIR_COMPMASK_BLOCKNUM) + +typedef struct osm_gir_item { + cl_list_item_t list_item; + ib_guidinfo_record_t rec; +} osm_gir_item_t; + +typedef struct osm_gir_search_ctxt { + const ib_guidinfo_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_gir_search_ctxt_t; + +static ib_api_status_t gir_rcv_new_gir(IN osm_sa_t * sa, + IN const osm_node_t * p_node, + IN cl_qlist_t * p_list, + IN ib_net64_t const match_port_guid, + IN ib_net16_t const match_lid, + IN const osm_physp_t * p_physp, + IN uint8_t const block_num) +{ + osm_sa_item_t *p_rec_item; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_GIR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5102: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New GUIDInfoRecord: lid %u, block num %d\n", + cl_ntoh16(match_lid), block_num); + + memset(p_rec_item, 0, SA_GIR_RESP_SIZE); + + p_rec_item->resp.guid_rec.lid = match_lid; + p_rec_item->resp.guid_rec.block_num = block_num; + if (p_physp->p_guids) + memcpy(&p_rec_item->resp.guid_rec.guid_info, + *p_physp->p_guids + block_num * GUID_TABLE_MAX_ENTRIES, + sizeof(ib_guid_info_t)); + else if (!block_num) + p_rec_item->resp.guid_rec.guid_info.guid[0] = osm_physp_get_port_guid(p_physp); + + cl_qlist_insert_tail(p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void sa_gir_create_gir(IN osm_sa_t * sa, IN osm_node_t * p_node, + IN cl_qlist_t * p_list, + IN ib_net64_t const match_port_guid, + IN ib_net16_t const match_lid, + IN const osm_physp_t * p_req_physp, + IN uint8_t const match_block_num) +{ + const osm_physp_t *p_physp; + uint8_t port_num; + uint8_t num_ports; + uint16_t match_lid_ho; + ib_net16_t base_lid_ho; + ib_net16_t max_lid_ho; + uint8_t lmc; + ib_net64_t port_guid; + uint8_t block_num, start_block_num, end_block_num, num_blocks; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Looking for GUIDRecord with LID: %u GUID:0x%016" + PRIx64 "\n", cl_ntoh16(match_lid), cl_ntoh64(match_port_guid)); + + /* + For switches, do not return the GUIDInfo record(s) + for each port on the switch, just for port 0. + */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) + num_ports = 1; + else + num_ports = osm_node_get_num_physp(p_node); + + for (port_num = 0; port_num < num_ports; port_num++) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) + continue; + + /* Check to see if the found p_physp and the requester physp + share a pkey. If not, continue */ + if (!osm_physp_share_pkey(sa->p_log, p_physp, p_req_physp, + sa->p_subn->opt.allow_both_pkeys)) + continue; + + port_guid = osm_physp_get_port_guid(p_physp); + + if (match_port_guid && (port_guid != match_port_guid)) + continue; + + /* + Note: the following check is a temporary workaround + Since 1. GUIDCap should never be 0 on ports where this applies + and 2. GUIDCap should not be used on ports where it doesn't apply + So this should really be a check for whether the port is a + switch external port or not! + */ + if (p_physp->port_info.guid_cap == 0) + continue; + + num_blocks = p_physp->port_info.guid_cap / 8; + if (p_physp->port_info.guid_cap % 8) + num_blocks++; + if (match_block_num == 255) { + start_block_num = 0; + end_block_num = num_blocks - 1; + } else { + if (match_block_num >= num_blocks) + continue; + end_block_num = start_block_num = match_block_num; + } + + base_lid_ho = cl_ntoh16(osm_physp_get_base_lid(p_physp)); + match_lid_ho = cl_ntoh16(match_lid); + if (match_lid_ho) { + lmc = osm_physp_get_lmc(p_physp); + max_lid_ho = (uint16_t) (base_lid_ho + (1 << lmc) - 1); + + /* + We validate that the lid belongs to this node. + */ + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Comparing LID: %u <= %u <= %u\n", + base_lid_ho, match_lid_ho, max_lid_ho); + + if (match_lid_ho < base_lid_ho + || match_lid_ho > max_lid_ho) + continue; + } + + for (block_num = start_block_num; block_num <= end_block_num; + block_num++) + gir_rcv_new_gir(sa, p_node, p_list, port_guid, + cl_ntoh16(base_lid_ho), p_physp, + block_num); + } + + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_gir_by_comp_mask_cb(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + const osm_gir_search_ctxt_t *p_ctxt = cxt; + osm_node_t *const p_node = (osm_node_t *) p_map_item; + const ib_guidinfo_record_t *const p_rcvd_rec = p_ctxt->p_rcvd_rec; + const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp; + osm_sa_t *sa = p_ctxt->sa; + const ib_guid_info_t *p_comp_gi; + ib_net64_t const comp_mask = p_ctxt->comp_mask; + ib_net64_t match_port_guid = 0; + ib_net16_t match_lid = 0; + uint8_t match_block_num = 255; + + OSM_LOG_ENTER(p_ctxt->sa->p_log); + + if (comp_mask & IB_GIR_COMPMASK_LID) + match_lid = p_rcvd_rec->lid; + + if (comp_mask & IB_GIR_COMPMASK_BLOCKNUM) + match_block_num = p_rcvd_rec->block_num; + + p_comp_gi = &p_rcvd_rec->guid_info; + /* Different rule for block 0 v. other blocks */ + if (comp_mask & IB_GIR_COMPMASK_GID0) { + if (!p_rcvd_rec->block_num) + match_port_guid = osm_physp_get_port_guid(p_req_physp); + if (p_comp_gi->guid[0] != match_port_guid) + goto Exit; + } + + if (comp_mask & IB_GIR_COMPMASK_GID1) { + if (p_comp_gi->guid[1] != 0) + goto Exit; + } + + if (comp_mask & IB_GIR_COMPMASK_GID2) { + if (p_comp_gi->guid[2] != 0) + goto Exit; + } + + if (comp_mask & IB_GIR_COMPMASK_GID3) { + if (p_comp_gi->guid[3] != 0) + goto Exit; + } + + if (comp_mask & IB_GIR_COMPMASK_GID4) { + if (p_comp_gi->guid[4] != 0) + goto Exit; + } + + if (comp_mask & IB_GIR_COMPMASK_GID5) { + if (p_comp_gi->guid[5] != 0) + goto Exit; + } + + if (comp_mask & IB_GIR_COMPMASK_GID6) { + if (p_comp_gi->guid[6] != 0) + goto Exit; + } + + if (comp_mask & IB_GIR_COMPMASK_GID7) { + if (p_comp_gi->guid[7] != 0) + goto Exit; + } + + sa_gir_create_gir(sa, p_node, p_ctxt->p_list, match_port_guid, + match_lid, p_req_physp, match_block_num); + +Exit: + OSM_LOG_EXIT(p_ctxt->sa->p_log); +} + +static inline boolean_t check_mod_comp_mask(ib_net64_t comp_mask) +{ + return ((comp_mask & MOD_GIR_COMP_MASK) == MOD_GIR_COMP_MASK); +} + +static uint8_t coalesce_comp_mask(IN osm_madw_t *p_madw) +{ + uint8_t comp_mask = 0; + ib_sa_mad_t *p_sa_mad; + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID0) + comp_mask |= 1<<0; + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID1) + comp_mask |= 1<<1; + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID2) + comp_mask |= 1<<2; + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID3) + comp_mask |= 1<<3; + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID4) + comp_mask |= 1<<4; + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID5) + comp_mask |= 1<<5; + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID6) + comp_mask |= 1<<6; + if (p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID7) + comp_mask |= 1<<7; + return comp_mask; +} + +static void guidinfo_respond(IN osm_sa_t *sa, IN osm_madw_t *p_madw, + IN ib_guidinfo_record_t * p_guidinfo_rec) +{ + cl_qlist_t rec_list; + osm_sa_item_t *item; + + OSM_LOG_ENTER(sa->p_log); + + item = malloc(SA_GIR_RESP_SIZE); + if (!item) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5101: " + "rec_item alloc failed\n"); + goto Exit; + } + + item->resp.guid_rec = *p_guidinfo_rec; + + cl_qlist_init(&rec_list); + cl_qlist_insert_tail(&rec_list, &item->list_item); + + osm_sa_respond(sa, p_madw, sizeof(ib_guidinfo_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void gir_respond(IN osm_sa_t *sa, IN osm_madw_t *p_madw) +{ + ib_sa_mad_t *p_sa_mad; + ib_guidinfo_record_t *p_rcvd_rec; + ib_guidinfo_record_t guidinfo_rec; + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = (ib_guidinfo_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) + osm_dump_guidinfo_record_v2(sa->p_log, p_rcvd_rec, FILE_ID, OSM_LOG_DEBUG); + + guidinfo_rec = *p_rcvd_rec; + guidinfo_respond(sa, p_madw, &guidinfo_rec); +} + +static ib_net64_t sm_assigned_guid(uint8_t assigned_byte) +{ + static uint32_t uniq_count; + + if (++uniq_count == 0) { + uniq_count--; + return 0; + } + return cl_hton64(((uint64_t) uniq_count) | + (((uint64_t) assigned_byte) << 32) | + (((uint64_t) OSM_VENDOR_ID_OPENIB) << 40)); +} + +static void del_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, + IN osm_port_t *p_port, IN uint8_t block_num) +{ + int i; + uint32_t max_block; + ib_sa_mad_t *p_sa_mad; + ib_guidinfo_record_t *p_rcvd_rec; + ib_net64_t del_alias_guid; + osm_alias_guid_t *p_alias_guid; + cl_list_item_t *p_list_item; + osm_mcm_port_t *p_mcm_port; + osm_mcm_alias_guid_t *p_mcm_alias_guid; + uint8_t del_mask; + int dirty = 0; + + if (!p_port->p_physp->p_guids) + goto Exit; + + max_block = (p_port->p_physp->port_info.guid_cap + GUID_TABLE_MAX_ENTRIES - 1) / + GUID_TABLE_MAX_ENTRIES; + + if (block_num >= max_block) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5116: " + "block_num %d is higher than Max GUID Cap block %d " + "for port GUID 0x%" PRIx64 "\n", + block_num, max_block, cl_ntoh64(p_port->p_physp->port_guid)); + CL_PLOCK_RELEASE(sa->p_lock); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_NO_RECORDS); + return; + } + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_guidinfo_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + del_mask = coalesce_comp_mask(p_madw); + + for (i = block_num * GUID_TABLE_MAX_ENTRIES; + (block_num + 1) * GUID_TABLE_MAX_ENTRIES < p_port->p_physp->port_info.guid_cap ? i < (block_num + 1) * GUID_TABLE_MAX_ENTRIES : i < p_port->p_physp->port_info.guid_cap; + i++) { + /* can't delete block 0 index 0 (base guid is RO) for alias guid table */ + if (i == 0 && p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID0) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Not allowed to delete RO GID 0\n"); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + return; + } + if (!(del_mask & 1<<(i % 8))) + continue; + + del_alias_guid = (*p_port->p_physp->p_guids)[i]; + if (del_alias_guid) { + /* Search all of port's multicast groups for alias */ + p_list_item = cl_qlist_head(&p_port->mcm_list); + while (p_list_item != cl_qlist_end(&p_port->mcm_list)) { + p_mcm_port = cl_item_obj(p_list_item, + p_mcm_port, list_item); + p_list_item = cl_qlist_next(p_list_item); + p_mcm_alias_guid = osm_mgrp_get_mcm_alias_guid(p_mcm_port->mgrp, del_alias_guid); + if (p_mcm_alias_guid) { + CL_PLOCK_RELEASE(sa->p_lock); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_DENIED); + return; + } + } + } + } + + for (i = block_num * GUID_TABLE_MAX_ENTRIES; + (block_num + 1) * GUID_TABLE_MAX_ENTRIES < p_port->p_physp->port_info.guid_cap ? i < (block_num + 1) * GUID_TABLE_MAX_ENTRIES : i < p_port->p_physp->port_info.guid_cap; + i++) { + if (!(del_mask & 1<<(i % 8))) + continue; + + del_alias_guid = (*p_port->p_physp->p_guids)[i]; + if (del_alias_guid) { + /* remove original from alias guid table */ + p_alias_guid = (osm_alias_guid_t *) + cl_qmap_remove(&sa->p_subn->alias_port_guid_tbl, + del_alias_guid); + if (p_alias_guid != (osm_alias_guid_t *) + cl_qmap_end(&sa->p_subn->alias_port_guid_tbl)) + osm_alias_guid_delete(&p_alias_guid); + else + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 510B: " + "Original alias GUID 0x%" PRIx64 + " at index %u not found\n", + cl_ntoh64(del_alias_guid), i); + /* clear guid at index */ + (*p_port->p_physp->p_guids)[i] = 0; + dirty = 1; + } + } + + if (dirty) { + if (osm_queue_guidinfo(sa, p_port, block_num)) + osm_sm_signal(sa->sm, OSM_SIGNAL_GUID_PROCESS_REQUEST); + sa->dirty = TRUE; + } + + memcpy(&p_rcvd_rec->guid_info, + &((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES]), + sizeof(ib_guid_info_t)); + +Exit: + CL_PLOCK_RELEASE(sa->p_lock); + gir_respond(sa, p_madw); +} + +static void set_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, + IN osm_port_t *p_port, IN uint8_t block_num) +{ + uint32_t max_block; + int i, j, dirty = 0; + ib_sa_mad_t *p_sa_mad; + ib_guidinfo_record_t *p_rcvd_rec; + osm_assigned_guids_t *p_assigned_guids = 0; + osm_alias_guid_t *p_alias_guid, *p_alias_guid_check; + cl_map_item_t *p_item; + ib_net64_t set_alias_guid, del_alias_guid, assigned_guid; + uint8_t set_mask; + + max_block = (p_port->p_physp->port_info.guid_cap + GUID_TABLE_MAX_ENTRIES - 1) / + GUID_TABLE_MAX_ENTRIES; + if (block_num >= max_block) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5118: " + "block_num %d is higher than Max GUID Cap block %d " + "for port GUID 0x%" PRIx64 "\n", + block_num, max_block, cl_ntoh64(p_port->p_physp->port_guid)); + CL_PLOCK_RELEASE(sa->p_lock); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_NO_RECORDS); + return; + } + if (!p_port->p_physp->p_guids) { + p_port->p_physp->p_guids = calloc(max_block * GUID_TABLE_MAX_ENTRIES, + sizeof(ib_net64_t)); + if (!p_port->p_physp->p_guids) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5103: " + "GUID table memory allocation failed for port " + "GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_port->p_physp->port_guid)); + CL_PLOCK_RELEASE(sa->p_lock); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_NO_RESOURCES); + return; + } + /* setup base port guid in index 0 */ + (*p_port->p_physp->p_guids)[0] = p_port->p_physp->port_guid; + } + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = (ib_guidinfo_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Dump of incoming record\n"); + osm_dump_guidinfo_record_v2(sa->p_log, p_rcvd_rec, FILE_ID, OSM_LOG_DEBUG); + } + + set_mask = coalesce_comp_mask(p_madw); + + for (i = block_num * GUID_TABLE_MAX_ENTRIES; + (block_num + 1) * GUID_TABLE_MAX_ENTRIES < p_port->p_physp->port_info.guid_cap ? i < (block_num + 1) * GUID_TABLE_MAX_ENTRIES : i < p_port->p_physp->port_info.guid_cap; + i++) { + /* can't set block 0 index 0 (base guid is RO) for alias guid table */ + if (i == 0 && p_sa_mad->comp_mask & IB_GIR_COMPMASK_GID0) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Not allowed to set RO GID 0\n"); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + return; + } + + if (!(set_mask & 1<<(i % 8))) + continue; + + set_alias_guid = p_rcvd_rec->guid_info.guid[i % 8]; + if (!set_alias_guid) { + /* was a GUID previously assigned for this index ? */ + set_alias_guid = (*p_port->p_physp->p_guids)[i]; + if (set_alias_guid) { + p_rcvd_rec->guid_info.guid[i % 8] = set_alias_guid; + continue; + } + /* Is there a persistent SA assigned guid for this index ? */ + if (!p_assigned_guids) + p_assigned_guids = + osm_get_assigned_guids_by_guid(sa->p_subn, + p_port->p_physp->port_guid); + if (p_assigned_guids) { + set_alias_guid = p_assigned_guids->assigned_guid[i]; + if (set_alias_guid) { + p_rcvd_rec->guid_info.guid[i % 8] = set_alias_guid; + p_item = cl_qmap_get(&sa->sm->p_subn->alias_port_guid_tbl, + set_alias_guid); + if (p_item == cl_qmap_end(&sa->sm->p_subn->alias_port_guid_tbl)) + goto add_alias_guid; + else { + p_alias_guid = (osm_alias_guid_t *) p_item; + if (p_alias_guid->p_base_port != p_port) { + OSM_LOG(sa->p_log, + OSM_LOG_ERROR, + "ERR 5110: " + " Assigned alias port GUID 0x%" PRIx64 + " index %d base port GUID 0x%" PRIx64 + " now attempted on port GUID 0x%" PRIx64 + "\n", + cl_ntoh64(p_alias_guid->alias_guid), i, + cl_ntoh64(p_alias_guid->p_base_port->guid), + cl_ntoh64(p_port->guid)); + /* clear response guid at index to indicate duplicate */ + p_rcvd_rec->guid_info.guid[i % 8] = 0; + } + continue; + } + } + } + } + if (!set_alias_guid) { + for (j = 0; j < 1000; j++) { + assigned_guid = sm_assigned_guid(sa->p_subn->opt.sm_assigned_guid); + if (!assigned_guid) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, + "ERR 510E: No more assigned guids available\n"); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_NO_RESOURCES); + return; + } + p_item = cl_qmap_get(&sa->sm->p_subn->alias_port_guid_tbl, + assigned_guid); + if (p_item == cl_qmap_end(&sa->sm->p_subn->alias_port_guid_tbl)) { + set_alias_guid = assigned_guid; + p_rcvd_rec->guid_info.guid[i % 8] = assigned_guid; + if (!p_assigned_guids) { + p_assigned_guids = osm_assigned_guids_new(p_port->p_physp->port_guid, + max_block * GUID_TABLE_MAX_ENTRIES); + if (p_assigned_guids) { + cl_qmap_insert(&(sa->p_subn->assigned_guids_tbl), + p_assigned_guids->port_guid, + &p_assigned_guids->map_item); + } else { + OSM_LOG(sa->p_log, + OSM_LOG_ERROR, + "ERR 510D: osm_assigned_guids_new failed port GUID 0x%" PRIx64 " index %d\n", + cl_ntoh64(p_port->p_physp->port_guid), i); + CL_PLOCK_RELEASE(sa->p_lock); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_NO_RESOURCES); + return; + } + } + if (p_assigned_guids) + p_assigned_guids->assigned_guid[i] = assigned_guid; + break; + } + } + if (!set_alias_guid) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 510A: " + "SA assigned GUID %d failed for " + "port GUID 0x%" PRIx64 "\n", i, + cl_ntoh64(p_port->p_physp->port_guid)); + continue; + } + } + +add_alias_guid: + /* allocate alias guid and add to alias guid table */ + p_alias_guid = osm_alias_guid_new(set_alias_guid, p_port); + if (!p_alias_guid) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5107: " + "Alias guid %d memory allocation failed" + " for port GUID 0x%" PRIx64 "\n", + i, cl_ntoh64(p_port->p_physp->port_guid)); + CL_PLOCK_RELEASE(sa->p_lock); + return; + } + + p_alias_guid_check = + (osm_alias_guid_t *) cl_qmap_insert(&sa->sm->p_subn->alias_port_guid_tbl, + p_alias_guid->alias_guid, + &p_alias_guid->map_item); + if (p_alias_guid_check != p_alias_guid) { + /* alias GUID is a duplicate if it exists on another port or on the same port but at another index */ + if (p_alias_guid_check->p_base_port != p_port || + (*p_port->p_physp->p_guids)[i] != set_alias_guid) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5108: " + "Duplicate alias port GUID 0x%" PRIx64 + " index %d base port GUID 0x%" PRIx64 + ", alias GUID already assigned to " + "base port GUID 0x%" PRIx64 "\n", + cl_ntoh64(p_alias_guid->alias_guid), i, + cl_ntoh64(p_alias_guid->p_base_port->guid), + cl_ntoh64(p_alias_guid_check->p_base_port->guid)); + /* clear response guid at index to indicate duplicate */ + p_rcvd_rec->guid_info.guid[i % 8] = 0; + } + osm_alias_guid_delete(&p_alias_guid); + } else { + del_alias_guid = (*p_port->p_physp->p_guids)[i]; + if (del_alias_guid) { + /* remove original from alias guid table */ + p_alias_guid_check = (osm_alias_guid_t *) + cl_qmap_remove(&sa->p_subn->alias_port_guid_tbl, + del_alias_guid); + if (p_alias_guid_check) + osm_alias_guid_delete(&p_alias_guid_check); + else + OSM_LOG(sa->p_log, OSM_LOG_ERROR, + "ERR 510C: Original alias GUID " + "0x%" PRIx64 "at index %u " + "not found\n", + cl_ntoh64(del_alias_guid), + i); + } + + /* insert or replace guid at index */ + (*p_port->p_physp->p_guids)[i] = set_alias_guid; + dirty = 1; + } + } + + if (dirty) { + if (osm_queue_guidinfo(sa, p_port, block_num)) + osm_sm_signal(sa->sm, OSM_SIGNAL_GUID_PROCESS_REQUEST); + sa->dirty = TRUE; + } + + memcpy(&p_rcvd_rec->guid_info, + &((*p_port->p_physp->p_guids)[block_num * GUID_TABLE_MAX_ENTRIES]), + sizeof(ib_guid_info_t)); + + CL_PLOCK_RELEASE(sa->p_lock); + gir_respond(sa, p_madw); +} + +static void get_guidinfo(IN osm_sa_t *sa, IN osm_madw_t *p_madw, + IN osm_physp_t *p_req_physp) +{ + const ib_sa_mad_t *p_rcvd_mad; + const ib_guidinfo_record_t *p_rcvd_rec; + cl_qlist_t rec_list; + osm_gir_search_ctxt_t context; + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_guidinfo_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.sa = sa; + context.p_req_physp = p_req_physp; + + + cl_qmap_apply_func(&sa->p_subn->node_guid_tbl, sa_gir_by_comp_mask_cb, + &context); + + CL_PLOCK_RELEASE(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_guidinfo_record_t), &rec_list); +} + +void osm_gir_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_rcvd_mad; + osm_physp_t *p_req_physp; + osm_port_t *p_port; + const ib_guidinfo_record_t *p_rcvd_rec; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + + CL_ASSERT(p_rcvd_mad->attr_id == IB_MAD_ATTR_GUIDINFO_RECORD); + + switch(p_rcvd_mad->method) { + case IB_MAD_METHOD_GET: + case IB_MAD_METHOD_GETTABLE: + /* update the requester physical port */ + CL_PLOCK_ACQUIRE(sa->p_lock); + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr(p_madw)); + if (p_req_physp == NULL) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5104: " + "Cannot find requester physical port\n"); + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + get_guidinfo(sa, p_madw, p_req_physp); + goto Exit; + case IB_MAD_METHOD_SET: + case IB_MAD_METHOD_DELETE: + if (!check_mod_comp_mask(p_rcvd_mad->comp_mask)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5106: " + "component mask = 0x%016" PRIx64 ", " + "expected comp mask = 0x%016" PRIx64 "\n", + cl_ntoh64(p_rcvd_mad->comp_mask), + CL_NTOH64(MOD_GIR_COMP_MASK)); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_INSUF_COMPS); + goto Exit; + } + p_rcvd_rec = (ib_guidinfo_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + + /* update the requester physical port */ + CL_PLOCK_EXCL_ACQUIRE(sa->p_lock); + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr(p_madw)); + if (p_req_physp == NULL) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5104: " + "Cannot find requester physical port\n"); + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + p_port = osm_get_port_by_lid(sa->p_subn, p_rcvd_rec->lid); + if (!p_port) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5117: " + "Port with LID %u not found\n", + cl_ntoh16(p_rcvd_rec->lid)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RECORDS); + goto Exit; + } + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_port->p_physp, + sa->p_subn->opt.allow_both_pkeys)) { + CL_PLOCK_RELEASE(sa->p_lock); + goto Exit; + } + + if (p_rcvd_mad->method == IB_MAD_METHOD_SET) + set_guidinfo(sa, p_madw, p_port, p_rcvd_rec->block_num); + else + del_guidinfo(sa, p_madw, p_port, p_rcvd_rec->block_num); + break; + default: + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5105: " + "Unsupported Method (%s) for GUIDInfoRecord request\n", + ib_get_sa_method_str(p_rcvd_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + } + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_informinfo.c b/opensm/osm_sa_informinfo.c new file mode 100644 index 0000000..5f042c0 --- /dev/null +++ b/opensm/osm_sa_informinfo.c @@ -0,0 +1,642 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_infr_rcv_t. + * This object represents the InformInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_INFORMINFO_C +#include +#include +#include +#include +#include +#include +#include +#include + +#define SA_IIR_RESP_SIZE SA_ITEM_RESP_SIZE(inform_rec) +#define SA_II_RESP_SIZE SA_ITEM_RESP_SIZE(inform) + +typedef struct osm_iir_search_ctxt { + const ib_inform_info_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + ib_gid_t subscriber_gid; + ib_net16_t subscriber_enum; + osm_sa_t *sa; + osm_physp_t *p_req_physp; + ib_net64_t sm_key; +} osm_iir_search_ctxt_t; + +/********************************************************************** +o13-14.1.1: Except for Set(InformInfo) requests with Inform- +Info:LIDRangeBegin=0xFFFF, managers that support event forwarding +shall, upon receiving a Set(InformInfo), verify that the requester +originating the Set(InformInfo) and a Trap() source identified by Inform- +can access each other - can use path record to verify that. +**********************************************************************/ +static boolean_t validate_ports_access_rights(IN osm_sa_t * sa, + IN osm_infr_t * p_infr_rec) +{ + boolean_t valid = TRUE; + osm_physp_t *p_requester_physp; + osm_port_t *p_port; + ib_net64_t portguid; + uint16_t lid_range_begin, lid_range_end, lid; + + OSM_LOG_ENTER(sa->p_log); + + /* get the requester physp from the request address */ + p_requester_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + &p_infr_rec->report_addr); + + if (ib_gid_is_notzero(&p_infr_rec->inform_record.inform_info.gid)) { + /* a gid is defined */ + portguid = + p_infr_rec->inform_record.inform_info.gid.unicast. + interface_id; + + p_port = osm_get_port_by_guid(sa->p_subn, portguid); + if (p_port == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4301: " + "Invalid port guid: 0x%016" PRIx64 "\n", + cl_ntoh64(portguid)); + valid = FALSE; + goto Exit; + } + + /* make sure that the requester and destination port can access + each other according to the current partitioning. */ + if (!osm_physp_share_pkey + (sa->p_log, p_port->p_physp, p_requester_physp, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "port and requester don't share pkey\n"); + valid = FALSE; + goto Exit; + } + } else { + size_t lids_size; + + /* gid is zero - check if LID range is defined */ + lid_range_begin = + cl_ntoh16(p_infr_rec->inform_record.inform_info. + lid_range_begin); + /* if lid is 0xFFFF - meaning all endports managed by the manager */ + if (lid_range_begin == 0xFFFF) + goto Exit; + + lid_range_end = + cl_ntoh16(p_infr_rec->inform_record.inform_info. + lid_range_end); + + lids_size = cl_ptr_vector_get_size(&sa->p_subn->port_lid_tbl); + + /* lid_range_end is set to zero if no range desired. In this + case - just make it equal to the lid_range_begin. */ + if (lid_range_end == 0) + lid_range_end = lid_range_begin; + else if (lid_range_end >= lids_size) + lid_range_end = lids_size - 1; + + if (lid_range_begin >= lids_size) { + /* requested lids are out of range */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4302: " + "Given LIDs (%u-%u) are out of range (%zu)\n", + lid_range_begin, lid_range_end, lids_size); + valid = FALSE; + goto Exit; + } + + /* go over all defined lids within the range and make sure that the + requester port can access them according to current partitioning. */ + for (lid = lid_range_begin; lid <= lid_range_end; lid++) { + p_port = osm_get_port_by_lid_ho(sa->p_subn, lid); + if (p_port == NULL) + continue; + + /* make sure that the requester and destination port can access + each other according to the current partitioning. */ + if (!osm_physp_share_pkey + (sa->p_log, p_port->p_physp, p_requester_physp, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "port and requester don't share pkey\n"); + valid = FALSE; + goto Exit; + } + } + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + return valid; +} + +static boolean_t validate_infr(IN osm_sa_t * sa, IN osm_infr_t * p_infr_rec) +{ + boolean_t valid = TRUE; + + OSM_LOG_ENTER(sa->p_log); + + valid = validate_ports_access_rights(sa, p_infr_rec); + if (!valid) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Invalid Access for InformInfo\n"); + valid = FALSE; + } + + OSM_LOG_EXIT(sa->p_log); + return valid; +} + +/********************************************************************** +o13-12.1.1: Confirm a valid request for event subscription by responding +with an InformInfo attribute that is a copy of the data in the +Set(InformInfo) request. +**********************************************************************/ +static void infr_rcv_respond(IN osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + cl_qlist_t rec_list; + osm_sa_item_t *item; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Generating successful InformInfo response\n"); + + item = malloc(SA_II_RESP_SIZE); + if (!item) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4303: " + "rec_item alloc failed\n"); + goto Exit; + } + + memcpy(&item->resp.inform, + ib_sa_mad_get_payload_ptr(osm_madw_get_sa_mad_ptr(p_madw)), + sizeof(ib_inform_info_t)); + + cl_qlist_init(&rec_list); + cl_qlist_insert_tail(&rec_list, &item->list_item); + + osm_sa_respond(sa, p_madw, sizeof(ib_inform_info_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_inform_info_rec_by_comp_mask(IN osm_sa_t * sa, + IN const osm_infr_t * p_infr, + osm_iir_search_ctxt_t * p_ctxt) +{ + ib_net64_t comp_mask; + ib_net64_t portguid; + osm_port_t *p_subscriber_port; + osm_physp_t *p_subscriber_physp; + const osm_physp_t *p_req_physp; + osm_sa_item_t *p_rec_item; + + OSM_LOG_ENTER(sa->p_log); + + comp_mask = p_ctxt->comp_mask; + p_req_physp = p_ctxt->p_req_physp; + + if (comp_mask & IB_IIR_COMPMASK_SUBSCRIBERGID && + memcmp(&p_infr->inform_record.subscriber_gid, + &p_ctxt->subscriber_gid, + sizeof(p_infr->inform_record.subscriber_gid))) + goto Exit; + + if (comp_mask & IB_IIR_COMPMASK_ENUM && + p_infr->inform_record.subscriber_enum != p_ctxt->subscriber_enum) + goto Exit; + + /* Implement any other needed search cases */ + + /* Ensure pkey is shared before returning any records */ + portguid = p_infr->inform_record.subscriber_gid.unicast.interface_id; + p_subscriber_port = osm_get_port_by_guid(sa->p_subn, portguid); + if (p_subscriber_port == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430D: " + "Invalid subscriber port guid: 0x%016" PRIx64 "\n", + cl_ntoh64(portguid)); + goto Exit; + } + + /* get the subscriber InformInfo physical port */ + p_subscriber_physp = p_subscriber_port->p_physp; + /* make sure that the requester and subscriber port can access each + other according to the current partitioning. */ + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_subscriber_physp, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "requester and subscriber ports don't share pkey\n"); + goto Exit; + } + + p_rec_item = malloc(SA_IIR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430E: " + "rec_item alloc failed\n"); + goto Exit; + } + + memcpy(&p_rec_item->resp.inform_rec, &p_infr->inform_record, + sizeof(ib_inform_info_record_t)); + + /* + * Per C15-0.2-1.16, InformInfoRecords shall always be + * provided with the QPN set to 0, except for the case + * of a trusted request, in which case the actual + * subscriber QPN shall be returned. + */ + if (p_ctxt->sm_key == 0) + ib_inform_info_set_qpn(&p_rec_item->resp.inform_rec.inform_info, 0); + + cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_inform_info_rec_by_comp_mask_cb(IN cl_list_item_t * p_list_item, + IN void *context) +{ + const osm_infr_t *p_infr = (osm_infr_t *) p_list_item; + osm_iir_search_ctxt_t *p_ctxt = context; + + sa_inform_info_rec_by_comp_mask(p_ctxt->sa, p_infr, p_ctxt); +} + +/********************************************************************** +Received a Get(InformInfoRecord) or GetTable(InformInfoRecord) MAD +**********************************************************************/ +static void infr_rcv_process_get_method(osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + char gid_str[INET6_ADDRSTRLEN]; + ib_sa_mad_t *p_rcvd_mad; + const ib_inform_info_record_t *p_rcvd_rec; + cl_qlist_t rec_list; + osm_iir_search_ctxt_t context; + osm_physp_t *p_req_physp; + osm_sa_item_t *item; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_inform_info_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4309: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + osm_dump_inform_info_record_v2(sa->p_log, p_rcvd_rec, + FILE_ID, OSM_LOG_DEBUG); + } + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.subscriber_gid = p_rcvd_rec->subscriber_gid; + context.subscriber_enum = p_rcvd_rec->subscriber_enum; + context.sa = sa; + context.p_req_physp = p_req_physp; + context.sm_key = p_rcvd_mad->sm_key; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Query Subscriber GID:%s(%02X) Enum:0x%X(%02X)\n", + inet_ntop(AF_INET6, p_rcvd_rec->subscriber_gid.raw, + gid_str, sizeof gid_str), + (p_rcvd_mad->comp_mask & IB_IIR_COMPMASK_SUBSCRIBERGID) != 0, + cl_ntoh16(p_rcvd_rec->subscriber_enum), + (p_rcvd_mad->comp_mask & IB_IIR_COMPMASK_ENUM) != 0); + + cl_qlist_apply_func(&sa->p_subn->sa_infr_list, + sa_inform_info_rec_by_comp_mask_cb, &context); + + /* clear reserved and pad fields in InformInfoRecord */ + for (item = (osm_sa_item_t *) cl_qlist_head(&rec_list); + item != (osm_sa_item_t *) cl_qlist_end(&rec_list); + item = (osm_sa_item_t *) cl_qlist_next(&item->list_item)) { + memset(item->resp.inform_rec.reserved, 0, sizeof(item->resp.inform_rec.reserved)); + memset(item->resp.inform_rec.pad, 0, sizeof(item->resp.inform_rec.pad)); + } + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_inform_info_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +/********************************************************************* +Received a Set(InformInfo) MAD +**********************************************************************/ +static void infr_rcv_process_set_method(osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + ib_sa_mad_t *p_sa_mad; + ib_inform_info_t *p_recvd_inform_info; + osm_infr_t inform_info_rec; /* actual inform record to be stored for reports */ + osm_infr_t *p_infr; + ib_net32_t qpn; + uint8_t resp_time_val; + ib_api_status_t res; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_inform_info = + (ib_inform_info_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + +#if 0 + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) + osm_dump_inform_info_v2(sa->p_log, p_recvd_inform_info, + FILE_ID, OSM_LOG_DEBUG); +#endif + + /* Grab the lock */ + cl_plock_excl_acquire(sa->p_lock); + + /* define the inform record */ + inform_info_rec.inform_record.inform_info = *p_recvd_inform_info; + + /* following C13-32.1.2 Tbl 120: we only copy the source address vector */ + inform_info_rec.report_addr = p_madw->mad_addr; + + /* we will need to know the mad srvc to send back through */ + inform_info_rec.h_bind = p_madw->h_bind; + inform_info_rec.sa = sa; + + /* update the subscriber GID according to mad address */ + res = osm_get_gid_by_mad_addr(sa->p_log, sa->p_subn, &p_madw->mad_addr, + &inform_info_rec.inform_record. + subscriber_gid); + if (res != IB_SUCCESS) { + cl_plock_release(sa->p_lock); + + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4308: " + "Subscribe Request from unknown LID: %u\n", + cl_ntoh16(p_madw->mad_addr.dest_lid)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* HACK: enum is always 0 (currently) */ + inform_info_rec.inform_record.subscriber_enum = 0; + + /* Subscribe values above 1 are undefined */ + if (p_recvd_inform_info->subscribe > 1) { + cl_plock_release(sa->p_lock); + + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430A: " + "Invalid subscribe: %d\n", + p_recvd_inform_info->subscribe); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* + * Per C15-0.2-1.16, SubnAdmSet(InformInfo) subscriptions for + * SM security traps shall be provided only if they come from a + * trusted source. + */ + if ((p_sa_mad->sm_key == 0) && p_recvd_inform_info->is_generic && + ((cl_ntoh16(p_recvd_inform_info->g_or_v.generic.trap_num) >= SM_BAD_MKEY_TRAP) && + (cl_ntoh16(p_recvd_inform_info->g_or_v.generic.trap_num) <= SM_BAD_SWITCH_PKEY_TRAP))) { + cl_plock_release(sa->p_lock); + + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430B " + "Request for security trap from non-trusted requester: " + "Given SM_Key:0x%016" PRIx64 "\n", + cl_ntoh64(p_sa_mad->sm_key)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* + * MODIFICATIONS DONE ON INCOMING REQUEST: + * + * QPN: + * Internally we keep the QPN field of the InformInfo updated + * so we can simply compare it in the record - when finding such. + */ + if (p_recvd_inform_info->subscribe) { + ib_inform_info_set_qpn(&inform_info_rec.inform_record. + inform_info, + inform_info_rec.report_addr.addr_type. + gsi.remote_qp); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Subscribe Request with QPN: 0x%06X\n", + cl_ntoh32(inform_info_rec.report_addr.addr_type.gsi. + remote_qp)); + } else { + ib_inform_info_get_qpn_resp_time(p_recvd_inform_info->g_or_v. + generic.qpn_resp_time_val, + &qpn, &resp_time_val); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "UnSubscribe Request with QPN: 0x%06X\n", + cl_ntoh32(qpn)); + } + + /* If record exists with matching InformInfo */ + p_infr = osm_infr_get_by_rec(sa->p_subn, sa->p_log, &inform_info_rec); + + /* check to see if the request was for subscribe */ + if (p_recvd_inform_info->subscribe) { + /* validate the request for a new or update InformInfo */ + if (validate_infr(sa, &inform_info_rec) != TRUE) { + cl_plock_release(sa->p_lock); + + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4305: " + "Failed to validate a new inform object\n"); + + /* o13-13.1.1: we need to set the subscribe bit to 0 */ + p_recvd_inform_info->subscribe = 0; + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* ok - we can try and create a new entry */ + if (p_infr == NULL) { + /* Create the instance of the osm_infr_t object */ + p_infr = osm_infr_new(&inform_info_rec); + if (p_infr == NULL) { + cl_plock_release(sa->p_lock); + + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4306: " + "Failed to create a new inform object\n"); + + /* o13-13.1.1: we need to set the subscribe bit to 0 */ + p_recvd_inform_info->subscribe = 0; + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_NO_RESOURCES); + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Adding event subscription for port 0x%" PRIx64 "\n", + cl_ntoh64(inform_info_rec.inform_record.subscriber_gid.unicast.interface_id)); + + /* Add this new osm_infr_t object to subnet object */ + osm_infr_insert_to_db(sa->p_subn, sa->p_log, p_infr); + } else + /* Update the old instance of the osm_infr_t object */ + p_infr->inform_record = inform_info_rec.inform_record; + /* We got an UnSubscribe request */ + } else if (p_infr == NULL) { + cl_plock_release(sa->p_lock); + + /* No Such Item - So Error */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4307: " + "Failed to UnSubscribe to non existing inform object\n"); + + /* o13-13.1.1: we need to set the subscribe bit to 0 */ + p_recvd_inform_info->subscribe = 0; + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } else { + /* Delete this object from the subnet list of informs */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Removing event subscription for port 0x%" PRIx64 "\n", + cl_ntoh64(inform_info_rec.inform_record.subscriber_gid.unicast.interface_id)); + osm_infr_remove_from_db(sa->p_subn, sa->p_log, p_infr); + } + + cl_plock_release(sa->p_lock); + + /* send the success response */ + infr_rcv_respond(sa, p_madw); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +void osm_infr_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + ib_sa_mad_t *p_sa_mad; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_INFORM_INFO); + + if (p_sa_mad->method != IB_MAD_METHOD_SET) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Unsupported Method (%s) for InformInfo\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + infr_rcv_process_set_method(sa, p_madw); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +void osm_infir_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + ib_sa_mad_t *p_sa_mad; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_INFORM_INFO_RECORD); + + if (p_sa_mad->method != IB_MAD_METHOD_GET && + p_sa_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Unsupported Method (%s) for InformInfoRecord\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + infr_rcv_process_get_method(sa, p_madw); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_lft_record.c b/opensm/osm_sa_lft_record.c new file mode 100644 index 0000000..8f4749e --- /dev/null +++ b/opensm/osm_sa_lft_record.c @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_lftr_rcv_t. + * This object represents the LinearForwardingTable Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_LFT_RECORD_C +#include +#include +#include +#include +#include + +#define SA_LFTR_RESP_SIZE SA_ITEM_RESP_SIZE(lft_rec) + +typedef struct osm_lftr_search_ctxt { + const ib_lft_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_lftr_search_ctxt_t; + +static ib_api_status_t lftr_rcv_new_lftr(IN osm_sa_t * sa, + IN const osm_switch_t * p_sw, + IN cl_qlist_t * p_list, + IN ib_net16_t lid, IN uint16_t block) +{ + osm_sa_item_t *p_rec_item; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_LFTR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4402: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New LinearForwardingTable: sw 0x%016" PRIx64 + "\n\t\t\t\tblock 0x%02X lid %u\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), + block, cl_ntoh16(lid)); + + memset(p_rec_item, 0, SA_LFTR_RESP_SIZE); + + p_rec_item->resp.lft_rec.lid = lid; + p_rec_item->resp.lft_rec.block_num = cl_hton16(block); + + /* copy the lft block */ + osm_switch_get_lft_block(p_sw, block, p_rec_item->resp.lft_rec.lft); + + cl_qlist_insert_tail(p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void lftr_rcv_by_comp_mask(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + const osm_lftr_search_ctxt_t *p_ctxt = cxt; + const osm_switch_t *p_sw = (osm_switch_t *) p_map_item; + const ib_lft_record_t *const p_rcvd_rec = p_ctxt->p_rcvd_rec; + osm_sa_t *sa = p_ctxt->sa; + ib_net64_t const comp_mask = p_ctxt->comp_mask; + const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp; + osm_port_t *p_port; + uint16_t min_lid_ho, max_lid_ho; + uint16_t min_block, max_block, block; + const osm_physp_t *p_physp; + + /* In switches, the port guid is the node guid. */ + p_port = osm_get_port_by_guid(sa->p_subn, + p_sw->p_node->node_info.port_guid); + if (!p_port) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4405: " + "Failed to find Port by Node Guid:0x%016" PRIx64 + "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid)); + return; + } + + /* check that the requester physp and the current physp are under + the same partition. */ + p_physp = p_port->p_physp; + if (!p_physp) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4406: " + "Failed to find default physical Port by Node Guid:0x%016" + PRIx64 "\n", + cl_ntoh64(p_sw->p_node->node_info.node_guid)); + return; + } + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, + p_physp, sa->p_subn->opt.allow_both_pkeys)) + return; + + /* get the port 0 of the switch */ + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + + /* compare the lids - if required */ + if (comp_mask & IB_LFTR_COMPMASK_LID) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Comparing lid:%u to port lid range: %u .. %u\n", + cl_ntoh16(p_rcvd_rec->lid), min_lid_ho, max_lid_ho); + /* ok we are ready for range check */ + if (min_lid_ho > cl_ntoh16(p_rcvd_rec->lid) || + max_lid_ho < cl_ntoh16(p_rcvd_rec->lid)) + return; + } + + /* now we need to decide which blocks to output */ + max_block = osm_switch_get_max_block_id_in_use(p_sw); + if (comp_mask & IB_LFTR_COMPMASK_BLOCK) { + min_block = cl_ntoh16(p_rcvd_rec->block_num); + if (min_block > max_block) + return; + max_block = min_block; + } else /* use as many blocks as "in use" */ + min_block = 0; + + /* so we can add these blocks one by one ... */ + for (block = min_block; block <= max_block; block++) + lftr_rcv_new_lftr(sa, p_sw, p_ctxt->p_list, + osm_port_get_base_lid(p_port), block); +} + +void osm_lftr_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_rcvd_mad; + const ib_lft_record_t *p_rcvd_rec; + cl_qlist_t rec_list; + osm_lftr_search_ctxt_t context; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = (ib_lft_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + + CL_ASSERT(p_rcvd_mad->attr_id == IB_MAD_ATTR_LFT_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_rcvd_mad->method != IB_MAD_METHOD_GET && + p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4408: " + "Unsupported Method (%s) for LFTRecord request\n", + ib_get_sa_method_str(p_rcvd_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4407: " + "Cannot find requester physical port\n"); + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.sa = sa; + context.p_req_physp = p_req_physp; + + /* Go over all switches */ + cl_qmap_apply_func(&sa->p_subn->sw_guid_tbl, lftr_rcv_by_comp_mask, + &context); + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_lft_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_link_record.c b/opensm/osm_sa_link_record.c new file mode 100644 index 0000000..5ab3d74 --- /dev/null +++ b/opensm/osm_sa_link_record.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_lr_rcv_t. + * This object represents the LinkRecord Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_LINK_RECORD_C +#include +#include +#include +#include +#include +#include + +#define SA_LR_RESP_SIZE SA_ITEM_RESP_SIZE(link_rec) + +static void lr_rcv_build_physp_link(IN osm_sa_t * sa, IN ib_net16_t from_lid, + IN ib_net16_t to_lid, IN uint8_t from_port, + IN uint8_t to_port, IN cl_qlist_t * p_list) +{ + osm_sa_item_t *p_lr_item; + + p_lr_item = malloc(SA_LR_RESP_SIZE); + if (p_lr_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1801: " + "Unable to acquire link record\n" + "\t\t\t\tFrom port %u\n" "\t\t\t\tTo port %u\n" + "\t\t\t\tFrom lid %u\n" "\t\t\t\tTo lid %u\n", + from_port, to_port, + cl_ntoh16(from_lid), cl_ntoh16(to_lid)); + return; + } + memset(p_lr_item, 0, SA_LR_RESP_SIZE); + + p_lr_item->resp.link_rec.from_port_num = from_port; + p_lr_item->resp.link_rec.to_port_num = to_port; + p_lr_item->resp.link_rec.to_lid = to_lid; + p_lr_item->resp.link_rec.from_lid = from_lid; + + cl_qlist_insert_tail(p_list, &p_lr_item->list_item); +} + +static ib_net16_t get_base_lid(IN const osm_physp_t * p_physp) +{ + if (p_physp->p_node->node_info.node_type == IB_NODE_TYPE_SWITCH) + p_physp = osm_node_get_physp_ptr(p_physp->p_node, 0); + return osm_physp_get_base_lid(p_physp); +} + +static void lr_rcv_get_physp_link(IN osm_sa_t * sa, + IN const ib_link_record_t * p_lr, + IN const osm_physp_t * p_src_physp, + IN const osm_physp_t * p_dest_physp, + IN const ib_net64_t comp_mask, + IN cl_qlist_t * p_list, + IN const osm_physp_t * p_req_physp) +{ + uint8_t src_port_num; + uint8_t dest_port_num; + ib_net16_t from_base_lid; + ib_net16_t to_base_lid; + ib_net16_t lmc_mask; + + OSM_LOG_ENTER(sa->p_log); + + /* + If only one end of the link is specified, determine + the other side. + */ + if (p_src_physp) { + if (p_dest_physp) { + /* + Ensure the two physp's are actually connected. + If not, bail out. + */ + if (osm_physp_get_remote(p_src_physp) != p_dest_physp) + goto Exit; + } else { + p_dest_physp = osm_physp_get_remote(p_src_physp); + if (p_dest_physp == NULL) + goto Exit; + } + } else { + if (p_dest_physp) { + p_src_physp = osm_physp_get_remote(p_dest_physp); + if (p_src_physp == NULL) + goto Exit; + } else + goto Exit; /* no physp's, so nothing to do */ + } + + /* Check that the p_src_physp, p_dest_physp and p_req_physp + all share a pkey (doesn't have to be the same p_key). */ + if (!osm_physp_share_pkey(sa->p_log, p_src_physp, p_dest_physp, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Source and Dest PhysPorts do not share PKey\n"); + goto Exit; + } + if (!osm_physp_share_pkey(sa->p_log, p_src_physp, p_req_physp, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Source and Requester PhysPorts do not share PKey\n"); + goto Exit; + } + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_dest_physp, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester and Dest PhysPorts do not share PKey\n"); + goto Exit; + } + + src_port_num = osm_physp_get_port_num(p_src_physp); + dest_port_num = osm_physp_get_port_num(p_dest_physp); + + if (comp_mask & IB_LR_COMPMASK_FROM_PORT) + if (src_port_num != p_lr->from_port_num) + goto Exit; + + if (comp_mask & IB_LR_COMPMASK_TO_PORT) + if (dest_port_num != p_lr->to_port_num) + goto Exit; + + from_base_lid = get_base_lid(p_src_physp); + to_base_lid = get_base_lid(p_dest_physp); + + lmc_mask = ~((1 << sa->p_subn->opt.lmc) - 1); + lmc_mask = cl_hton16(lmc_mask); + + if (comp_mask & IB_LR_COMPMASK_FROM_LID) + if (from_base_lid != (p_lr->from_lid & lmc_mask)) + goto Exit; + + if (comp_mask & IB_LR_COMPMASK_TO_LID) + if (to_base_lid != (p_lr->to_lid & lmc_mask)) + goto Exit; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Acquiring link record\n" + "\t\t\t\tsrc port 0x%" PRIx64 " (port %u)" + ", dest port 0x%" PRIx64 " (port %u)\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), src_port_num, + cl_ntoh64(osm_physp_get_port_guid(p_dest_physp)), + dest_port_num); + + lr_rcv_build_physp_link(sa, from_base_lid, to_base_lid, src_port_num, + dest_port_num, p_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void lr_rcv_get_port_links(IN osm_sa_t * sa, + IN const ib_link_record_t * p_lr, + IN const osm_port_t * p_src_port, + IN const osm_port_t * p_dest_port, + IN const ib_net64_t comp_mask, + IN cl_qlist_t * p_list, + IN const osm_physp_t * p_req_physp) +{ + const osm_physp_t *p_src_physp; + const osm_physp_t *p_dest_physp; + const cl_qmap_t *p_node_tbl; + osm_node_t *p_node; + uint8_t port_num; + uint8_t num_ports; + uint8_t dest_num_ports; + uint8_t dest_port_num; + + OSM_LOG_ENTER(sa->p_log); + + if (p_src_port) { + if (p_dest_port) { + /* + Build an LR for every link connected between both ports. + The inner function will discard physp combinations + that do not actually connect. Don't bother screening + for that here. + */ + num_ports = osm_node_get_num_physp(p_src_port->p_node); + dest_num_ports = + osm_node_get_num_physp(p_dest_port->p_node); + for (port_num = 1; port_num < num_ports; port_num++) { + p_src_physp = + osm_node_get_physp_ptr(p_src_port->p_node, + port_num); + for (dest_port_num = 1; + dest_port_num < dest_num_ports; + dest_port_num++) { + p_dest_physp = + osm_node_get_physp_ptr(p_dest_port-> + p_node, + dest_port_num); + /* both physical ports should be with data */ + if (p_src_physp && p_dest_physp) + lr_rcv_get_physp_link + (sa, p_lr, p_src_physp, + p_dest_physp, comp_mask, + p_list, p_req_physp); + } + } + } else { + /* + Build an LR for every link connected from the source port. + */ + if (comp_mask & IB_LR_COMPMASK_FROM_PORT) { + port_num = p_lr->from_port_num; + /* If the port number is out of the range of the p_src_port, then + this couldn't be a relevant record. */ + if (port_num < + p_src_port->p_node->physp_tbl_size) { + p_src_physp = + osm_node_get_physp_ptr(p_src_port-> + p_node, + port_num); + if (p_src_physp) + lr_rcv_get_physp_link + (sa, p_lr, p_src_physp, + NULL, comp_mask, p_list, + p_req_physp); + } + } else { + num_ports = + osm_node_get_num_physp(p_src_port->p_node); + for (port_num = 1; port_num < num_ports; + port_num++) { + p_src_physp = + osm_node_get_physp_ptr(p_src_port-> + p_node, + port_num); + if (p_src_physp) + lr_rcv_get_physp_link + (sa, p_lr, p_src_physp, + NULL, comp_mask, p_list, + p_req_physp); + } + } + } + } else { + if (p_dest_port) { + /* + Build an LR for every link connected to the dest port. + */ + if (comp_mask & IB_LR_COMPMASK_TO_PORT) { + port_num = p_lr->to_port_num; + /* If the port number is out of the range of the p_dest_port, then + this couldn't be a relevant record. */ + if (port_num < + p_dest_port->p_node->physp_tbl_size) { + p_dest_physp = + osm_node_get_physp_ptr(p_dest_port-> + p_node, + port_num); + if (p_dest_physp) + lr_rcv_get_physp_link + (sa, p_lr, NULL, + p_dest_physp, comp_mask, + p_list, p_req_physp); + } + } else { + num_ports = + osm_node_get_num_physp(p_dest_port->p_node); + for (port_num = 1; port_num < num_ports; + port_num++) { + p_dest_physp = + osm_node_get_physp_ptr(p_dest_port-> + p_node, + port_num); + if (p_dest_physp) + lr_rcv_get_physp_link + (sa, p_lr, NULL, + p_dest_physp, comp_mask, + p_list, p_req_physp); + } + } + } else { + /* + Process the world (recurse once back into this function). + */ + p_node_tbl = &sa->p_subn->node_guid_tbl; + p_node = (osm_node_t *) cl_qmap_head(p_node_tbl); + + while (p_node != (osm_node_t *) cl_qmap_end(p_node_tbl)) { + num_ports = osm_node_get_num_physp(p_node); + for (port_num = 1; port_num < num_ports; + port_num++) { + p_src_physp = + osm_node_get_physp_ptr(p_node, + port_num); + if (p_src_physp) + lr_rcv_get_physp_link + (sa, p_lr, p_src_physp, + NULL, comp_mask, p_list, + p_req_physp); + } + p_node = (osm_node_t *) cl_qmap_next(&p_node-> + map_item); + } + } + } + + OSM_LOG_EXIT(sa->p_log); +} + +/********************************************************************** + Returns the SA status to return to the client. + **********************************************************************/ +static ib_net16_t lr_rcv_get_end_points(IN osm_sa_t * sa, + IN const osm_madw_t * p_madw, + OUT const osm_port_t ** pp_src_port, + OUT const osm_port_t ** pp_dest_port) +{ + const ib_link_record_t *p_lr; + const ib_sa_mad_t *p_sa_mad; + ib_net64_t comp_mask; + ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + /* + Determine what fields are valid and then get a pointer + to the source and destination port objects, if possible. + */ + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_lr = (ib_link_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + comp_mask = p_sa_mad->comp_mask; + *pp_src_port = NULL; + *pp_dest_port = NULL; + + if (comp_mask & IB_LR_COMPMASK_FROM_LID) { + *pp_src_port = osm_get_port_by_lid(sa->p_subn, p_lr->from_lid); + if (!*pp_src_port) { + /* + This 'error' is the client's fault (bad lid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "No source port with LID %u\n", + cl_ntoh16(p_lr->from_lid)); + + sa_status = IB_SA_MAD_STATUS_NO_RECORDS; + goto Exit; + } + } + + if (comp_mask & IB_LR_COMPMASK_TO_LID) { + *pp_dest_port = osm_get_port_by_lid(sa->p_subn, p_lr->to_lid); + if (!*pp_dest_port) { + /* + This 'error' is the client's fault (bad lid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "No dest port with LID %u\n", + cl_ntoh16(p_lr->to_lid)); + + sa_status = IB_SA_MAD_STATUS_NO_RECORDS; + goto Exit; + } + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + return sa_status; +} + +void osm_lr_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + const ib_link_record_t *p_lr; + const ib_sa_mad_t *p_sa_mad; + const osm_port_t *p_src_port; + const osm_port_t *p_dest_port; + cl_qlist_t lr_list; + ib_net16_t status; + osm_physp_t *p_req_physp; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_lr = ib_sa_mad_get_payload_ptr(p_sa_mad); + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_LINK_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_sa_mad->method != IB_MAD_METHOD_GET && + p_sa_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1804: " + "Unsupported Method (%s) for LinkRecord request\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1805: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + osm_dump_link_record_v2(sa->p_log, p_lr, FILE_ID, OSM_LOG_DEBUG); + } + + cl_qlist_init(&lr_list); + + /* + Most SA functions (including this one) are read-only on the + subnet object, so we grab the lock non-exclusively. + */ + status = lr_rcv_get_end_points(sa, p_madw, &p_src_port, &p_dest_port); + + if (status == IB_SA_MAD_STATUS_SUCCESS) + lr_rcv_get_port_links(sa, p_lr, p_src_port, p_dest_port, + p_sa_mad->comp_mask, &lr_list, + p_req_physp); + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_link_record_t), &lr_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_mad_ctrl.c b/opensm/osm_sa_mad_ctrl.c new file mode 100644 index 0000000..dbab4a9 --- /dev/null +++ b/opensm/osm_sa_mad_ctrl.c @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sa_mad_ctrl_t. + * This object is part of the SA object. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_MAD_CTRL_C +#include +#include +#include +#include +#include +#include + +/****f* opensm: SA/sa_mad_ctrl_disp_done_callback + * NAME + * sa_mad_ctrl_disp_done_callback + * + * DESCRIPTION + * This function is the Dispatcher callback that indicates + * a received MAD has been processed by the recipient. + * + * SYNOPSIS + */ +static void sa_mad_ctrl_disp_done_callback(IN void *context, IN void *p_data) +{ + osm_sa_mad_ctrl_t *p_ctrl = context; + osm_madw_t *p_madw = p_data; + + OSM_LOG_ENTER(p_ctrl->p_log); + + CL_ASSERT(p_madw); + /* + Return the MAD & wrapper to the pool. + */ + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/************/ + +/****f* opensm: SA/sa_mad_ctrl_process + * NAME + * sa_mad_ctrl_process + * + * DESCRIPTION + * This function handles known methods for received MADs. + * + * SYNOPSIS + */ +static void sa_mad_ctrl_process(IN osm_sa_mad_ctrl_t * p_ctrl, + IN osm_madw_t * p_madw, + IN boolean_t is_get_request) +{ + ib_sa_mad_t *p_sa_mad; + cl_disp_reg_handle_t h_disp; + cl_status_t status; + cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; + uint64_t last_dispatched_msg_queue_time_msec; + uint32_t num_messages; + + OSM_LOG_ENTER(p_ctrl->p_log); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + /* + If the dispatcher is showing us that it is overloaded + there is no point in placing the request in. We should instead + provide immediate response - IB_RESOURCE_BUSY + But how do we know? + The dispatcher reports back the number of outstanding messages and + the time the last message stayed in the queue. + HACK: Actually, we cannot send a mad from within the receive callback; + thus - we will just drop it. + */ + + if (!is_get_request && p_ctrl->p_set_disp) { + h_disp = p_ctrl->h_set_disp; + goto SKIP_QUEUE_CHECK; + } + + h_disp = p_ctrl->h_disp; + cl_disp_get_queue_status(h_disp, &num_messages, + &last_dispatched_msg_queue_time_msec); + + if (num_messages > 1 && p_ctrl->p_subn->opt.max_msg_fifo_timeout && + last_dispatched_msg_queue_time_msec > + p_ctrl->p_subn->opt.max_msg_fifo_timeout) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_INFO, + /* "Responding BUSY status since the dispatcher is already" */ + "Dropping MAD since the dispatcher is already" + " overloaded with %u messages and queue time of:" + "%" PRIu64 "[msec]\n", + num_messages, last_dispatched_msg_queue_time_msec); + + /* send a busy response */ + /* osm_sa_send_error(p_ctrl->p_resp, p_madw, IB_RESOURCE_BUSY); */ + + /* return the request to the pool */ + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + + goto Exit; + } + +SKIP_QUEUE_CHECK: + /* + Note that attr_id (like the rest of the MAD) is in + network byte order. + */ + switch (p_sa_mad->attr_id) { + case IB_MAD_ATTR_CLASS_PORT_INFO: + msg_id = OSM_MSG_MAD_CLASS_PORT_INFO; + break; + + case IB_MAD_ATTR_NODE_RECORD: + msg_id = OSM_MSG_MAD_NODE_RECORD; + break; + + case IB_MAD_ATTR_PORTINFO_RECORD: + msg_id = OSM_MSG_MAD_PORTINFO_RECORD; + break; + + case IB_MAD_ATTR_LINK_RECORD: + msg_id = OSM_MSG_MAD_LINK_RECORD; + break; + + case IB_MAD_ATTR_SMINFO_RECORD: + msg_id = OSM_MSG_MAD_SMINFO_RECORD; + break; + + case IB_MAD_ATTR_SERVICE_RECORD: + msg_id = OSM_MSG_MAD_SERVICE_RECORD; + break; + + case IB_MAD_ATTR_PATH_RECORD: + msg_id = OSM_MSG_MAD_PATH_RECORD; + break; + + case IB_MAD_ATTR_MCMEMBER_RECORD: + msg_id = OSM_MSG_MAD_MCMEMBER_RECORD; + break; + + case IB_MAD_ATTR_INFORM_INFO: + msg_id = OSM_MSG_MAD_INFORM_INFO; + break; + + case IB_MAD_ATTR_VLARB_RECORD: + msg_id = OSM_MSG_MAD_VL_ARB_RECORD; + break; + + case IB_MAD_ATTR_SLVL_RECORD: + msg_id = OSM_MSG_MAD_SLVL_TBL_RECORD; + break; + + case IB_MAD_ATTR_PKEY_TBL_RECORD: + msg_id = OSM_MSG_MAD_PKEY_TBL_RECORD; + break; + + case IB_MAD_ATTR_LFT_RECORD: + msg_id = OSM_MSG_MAD_LFT_RECORD; + break; + + case IB_MAD_ATTR_GUIDINFO_RECORD: + msg_id = OSM_MSG_MAD_GUIDINFO_RECORD; + break; + + case IB_MAD_ATTR_INFORM_INFO_RECORD: + msg_id = OSM_MSG_MAD_INFORM_INFO_RECORD; + break; + + case IB_MAD_ATTR_SWITCH_INFO_RECORD: + msg_id = OSM_MSG_MAD_SWITCH_INFO_RECORD; + break; + + case IB_MAD_ATTR_MFT_RECORD: + msg_id = OSM_MSG_MAD_MFT_RECORD; + break; + +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + case IB_MAD_ATTR_MULTIPATH_RECORD: + msg_id = OSM_MSG_MAD_MULTIPATH_RECORD; + break; +#endif + + default: + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A01: " + "Unsupported attribute 0x%X (%s)\n", + cl_ntoh16(p_sa_mad->attr_id), + ib_get_sa_attr_str(p_sa_mad->attr_id)); + osm_dump_sa_mad_v2(p_ctrl->p_log, p_sa_mad, FILE_ID, OSM_LOG_ERROR); + } + + if (msg_id != CL_DISP_MSGID_NONE) { + /* + Post this MAD to the dispatcher for asynchronous + processing by the appropriate controller. + */ + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "Posting Dispatcher message %s\n", + osm_get_disp_msg_str(msg_id)); + + status = cl_disp_post(h_disp, msg_id, p_madw, + sa_mad_ctrl_disp_done_callback, p_ctrl); + + if (status != CL_SUCCESS) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A02: " + "Dispatcher post message failed (%s) for attribute 0x%X (%s)\n", + CL_STATUS_MSG(status), + cl_ntoh16(p_sa_mad->attr_id), + ib_get_sa_attr_str(p_sa_mad->attr_id)); + + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + } else { + /* + There is an unknown MAD attribute type for which there is + no recipient. Simply retire the MAD here. + */ + cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd_unknown); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* opensm: SA/sa_mad_ctrl_rcv_callback + * NAME + * sa_mad_ctrl_rcv_callback + * + * DESCRIPTION + * This is the callback from the transport layer for received MADs. + * + * SYNOPSIS + */ +static void sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, IN void *context, + IN osm_madw_t * p_req_madw) +{ + osm_sa_mad_ctrl_t *p_ctrl = context; + ib_sa_mad_t *p_sa_mad; + boolean_t is_get_request = FALSE; + + OSM_LOG_ENTER(p_ctrl->p_log); + + CL_ASSERT(p_madw); + + /* + A MAD was received from the wire, possibly in response to a request. + */ + cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd); + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "%u SA MADs received\n", p_ctrl->p_stats->sa_mads_rcvd); + + /* + * C15-0.1.3 requires not responding to any MAD if the SM is + * not in active state! + * We will not respond if the sm_state is not MASTER, or if the + * first_time_master_sweep flag (of the subnet) is TRUE - this + * flag indicates that the master still didn't finish its first + * sweep, so the subnet is not up and stable yet. + */ + if (p_ctrl->p_subn->sm_state != IB_SMINFO_STATE_MASTER) { + cl_atomic_inc(&p_ctrl->p_stats->sa_mads_ignored); + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, + "Received SA MAD while SM not MASTER. MAD ignored\n"); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + if (p_ctrl->p_subn->first_time_master_sweep == TRUE) { + cl_atomic_inc(&p_ctrl->p_stats->sa_mads_ignored); + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, + "Received SA MAD while SM in first sweep. MAD ignored\n"); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + if (OSM_LOG_IS_ACTIVE_V2(p_ctrl->p_log, OSM_LOG_FRAMES)) + osm_dump_sa_mad_v2(p_ctrl->p_log, p_sa_mad, FILE_ID, OSM_LOG_FRAMES); + + /* + * C15-0.1.5 - Table 185: SA Header - p884 + * SM_key should be either 0 or match the current SM_Key + * otherwise discard the MAD. + */ + if (p_sa_mad->sm_key != 0 && + p_sa_mad->sm_key != p_ctrl->p_subn->opt.sa_key) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A04: " + "Non-Zero MAD SM_Key: 0x%" PRIx64 " != SM_Key: 0x%" + PRIx64 "; SA MAD ignored for method 0x%X attribute 0x%X (%s)\n", + cl_ntoh64(p_sa_mad->sm_key), + cl_ntoh64(p_ctrl->p_subn->opt.sa_key), + p_sa_mad->method, cl_ntoh16(p_sa_mad->attr_id), + ib_get_sa_attr_str(p_sa_mad->attr_id)); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + + switch (p_sa_mad->method) { + case IB_MAD_METHOD_REPORT_RESP: + /* we do not really do anything with report responses - + just retire the transaction */ + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "Received Report Response. Retiring the transaction\n"); + + if (p_req_madw) + osm_mad_pool_put(p_ctrl->p_mad_pool, p_req_madw); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + + break; + + case IB_MAD_METHOD_GET: + case IB_MAD_METHOD_GETTABLE: +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + case IB_MAD_METHOD_GETMULTI: +#endif + is_get_request = TRUE; + case IB_MAD_METHOD_SET: + case IB_MAD_METHOD_DELETE: + /* if we are closing down simply do nothing */ + if (osm_exit_flag) + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + else + sa_mad_ctrl_process(p_ctrl, p_madw, is_get_request); + break; + + default: + cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd_unknown); + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A05: " + "Unsupported method = 0x%X\n", p_sa_mad->method); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* opensm: SA/sa_mad_ctrl_send_err_callback + * NAME + * sa_mad_ctrl_send_err_callback + * + * DESCRIPTION + * This is the callback from the transport layer for send errors + * on MADs that were expecting a response. + * + * SYNOPSIS + */ +static void sa_mad_ctrl_send_err_callback(IN void *context, + IN osm_madw_t * p_madw) +{ + osm_sa_mad_ctrl_t *p_ctrl = context; + cl_status_t status; + + OSM_LOG_ENTER(p_ctrl->p_log); + + /* + We should never be here since the SA never originates a request. + Unless we generated a Report(Notice) + */ + + CL_ASSERT(p_madw); + + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A06: " + "MAD completed in error (%s): " + "%s(%s), attr_mod 0x%x, LID %u, TID 0x%" PRIx64 "\n", + ib_get_err_str(p_madw->status), + ib_get_sa_method_str(p_madw->p_mad->method), + ib_get_sa_attr_str(p_madw->p_mad->attr_id), + cl_ntoh32(p_madw->p_mad->attr_mod), + cl_ntoh16(p_madw->mad_addr.dest_lid), + cl_ntoh64(p_madw->p_mad->trans_id)); + + osm_dump_sa_mad_v2(p_ctrl->p_log, osm_madw_get_sa_mad_ptr(p_madw), + FILE_ID, OSM_LOG_ERROR); + + /* + An error occurred. No response was received to a request MAD. + Retire the original request MAD. + */ + + if (osm_madw_get_err_msg(p_madw) != CL_DISP_MSGID_NONE) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "Posting Dispatcher message %s\n", + osm_get_disp_msg_str(osm_madw_get_err_msg(p_madw))); + + if (p_ctrl->p_set_disp && + (p_madw->p_mad->method == IB_MAD_METHOD_SET || + p_madw->p_mad->method == IB_MAD_METHOD_DELETE)) + status = cl_disp_post(p_ctrl->h_set_disp, + osm_madw_get_err_msg(p_madw), + p_madw, + sa_mad_ctrl_disp_done_callback, + p_ctrl); + else + status = cl_disp_post(p_ctrl->h_disp, + osm_madw_get_err_msg(p_madw), + p_madw, + sa_mad_ctrl_disp_done_callback, + p_ctrl); + if (status != CL_SUCCESS) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A07: " + "Dispatcher post message failed (%s)\n", + CL_STATUS_MSG(status)); + } + } else /* No error message was provided, just retire the MAD. */ + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +void osm_sa_mad_ctrl_construct(IN osm_sa_mad_ctrl_t * p_ctrl) +{ + CL_ASSERT(p_ctrl); + memset(p_ctrl, 0, sizeof(*p_ctrl)); + p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; + p_ctrl->h_set_disp = CL_DISP_INVALID_HANDLE; +} + +void osm_sa_mad_ctrl_destroy(IN osm_sa_mad_ctrl_t * p_ctrl) +{ + CL_ASSERT(p_ctrl); + cl_disp_unregister(p_ctrl->h_disp); + cl_disp_unregister(p_ctrl->h_set_disp); +} + +ib_api_status_t osm_sa_mad_ctrl_init(IN osm_sa_mad_ctrl_t * p_ctrl, + IN osm_sa_t * sa, + IN osm_mad_pool_t * p_mad_pool, + IN osm_vendor_t * p_vendor, + IN osm_subn_t * p_subn, + IN osm_log_t * p_log, + IN osm_stats_t * p_stats, + IN cl_dispatcher_t * p_disp, + IN cl_dispatcher_t * p_set_disp) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + osm_sa_mad_ctrl_construct(p_ctrl); + + p_ctrl->sa = sa; + p_ctrl->p_log = p_log; + p_ctrl->p_disp = p_disp; + p_ctrl->p_set_disp = p_set_disp; + p_ctrl->p_mad_pool = p_mad_pool; + p_ctrl->p_vendor = p_vendor; + p_ctrl->p_stats = p_stats; + p_ctrl->p_subn = p_subn; + + p_ctrl->h_disp = cl_disp_register(p_disp, CL_DISP_MSGID_NONE, NULL, + p_ctrl); + + if (p_ctrl->h_disp == CL_DISP_INVALID_HANDLE) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 1A08: " + "Dispatcher registration failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + if (p_set_disp) { + p_ctrl->h_set_disp = + cl_disp_register(p_set_disp, CL_DISP_MSGID_NONE, NULL, + p_ctrl); + + if (p_ctrl->h_set_disp == CL_DISP_INVALID_HANDLE) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 1A0A: " + "SA set dispatcher registration failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +ib_api_status_t osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * p_ctrl, + IN ib_net64_t port_guid) +{ + osm_bind_info_t bind_info; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_ctrl->p_log); + + if (p_ctrl->h_bind != OSM_BIND_INVALID_HANDLE) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A09: " + "Multiple binds not allowed\n"); + status = IB_ERROR; + goto Exit; + } + + bind_info.class_version = 2; + bind_info.is_responder = TRUE; + bind_info.is_report_processor = FALSE; + bind_info.is_trap_processor = FALSE; + bind_info.mad_class = IB_MCLASS_SUBN_ADM; + bind_info.port_guid = port_guid; + bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; + bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; + bind_info.timeout = p_ctrl->sa->p_subn->opt.transaction_timeout; + bind_info.retries = p_ctrl->sa->p_subn->opt.transaction_retries; + + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, + "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); + + p_ctrl->h_bind = osm_vendor_bind(p_ctrl->p_vendor, &bind_info, + p_ctrl->p_mad_pool, + sa_mad_ctrl_rcv_callback, + sa_mad_ctrl_send_err_callback, p_ctrl); + + if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { + status = IB_ERROR; + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A10: " + "Vendor specific bind failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); + return status; +} + +ib_api_status_t osm_sa_mad_ctrl_unbind(IN osm_sa_mad_ctrl_t * p_ctrl) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_ctrl->p_log); + + if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A11: " + "No previous bind\n"); + status = IB_ERROR; + goto Exit; + } + + osm_vendor_unbind(p_ctrl->h_bind); +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); + return status; +} diff --git a/opensm/osm_sa_mcmember_record.c b/opensm/osm_sa_mcmember_record.c new file mode 100644 index 0000000..3a75a97 --- /dev/null +++ b/opensm/osm_sa_mcmember_record.c @@ -0,0 +1,1806 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mcmr_recv_t. + * This object represents the MCMemberRecord Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_MCMEMBER_RECORD_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SA_MCM_RESP_SIZE SA_ITEM_RESP_SIZE(mc_rec) + +#define JOIN_MC_COMP_MASK (IB_MCR_COMPMASK_MGID | \ + IB_MCR_COMPMASK_PORT_GID | \ + IB_MCR_COMPMASK_JOIN_STATE) + +#define REQUIRED_MC_CREATE_COMP_MASK (IB_MCR_COMPMASK_MGID | \ + IB_MCR_COMPMASK_PORT_GID | \ + IB_MCR_COMPMASK_JOIN_STATE | \ + IB_MCR_COMPMASK_QKEY | \ + IB_MCR_COMPMASK_TCLASS | \ + IB_MCR_COMPMASK_PKEY | \ + IB_MCR_COMPMASK_FLOW | \ + IB_MCR_COMPMASK_SL) + +#define IPV4_BCAST_MGID_PREFIX CL_HTON64(0xff10401b00000000ULL) +#define IPV4_BCAST_MGID_INT_ID CL_HTON64(0x00000000ffffffffULL) + +static int validate_other_comp_fields(osm_log_t * p_log, ib_net64_t comp_mask, + const ib_member_rec_t * p_mcmr, + osm_mgrp_t * p_mgrp, + osm_log_level_t log_level); + +/********************************************************************* + Copy certain fields between two mcmember records + used during the process of join request to copy data from the mgrp + to the port record. +**********************************************************************/ +static void copy_from_create_mc_rec(IN ib_member_rec_t * dest, + IN const ib_member_rec_t * src) +{ + dest->qkey = src->qkey; + dest->mlid = src->mlid; + dest->tclass = src->tclass; + dest->pkey = src->pkey; + dest->sl_flow_hop = src->sl_flow_hop; + dest->scope_state = ib_member_set_scope_state(src->scope_state >> 4, + dest->scope_state & 0x0F); + dest->mtu = src->mtu; + dest->rate = src->rate; + dest->pkt_life = src->pkt_life; +} + +/********************************************************************* + Return mlid to the pool of free mlids. + But this implementation is not a pool - it simply scans through + the MGRP database for unused mlids... +*********************************************************************/ +static void free_mlid(IN osm_sa_t * sa, IN uint16_t mlid) +{ + UNUSED_PARAM(sa); + UNUSED_PARAM(mlid); +} + +/********************************************************************* + Get a new unused mlid by scanning all the used ones in the subnet. +**********************************************************************/ +/* Special Case IPv6 Solicited Node Multicast (SNM) addresses */ +/* 0xff1Z601bXXXX0000 : 0x00000001ffYYYYYY */ +/* Where Z is the scope, XXXX is the P_Key, and + * YYYYYY is the last 24 bits of the port guid */ +#define PREFIX_MASK CL_HTON64(0xff10ffff0000ffffULL) +#define PREFIX_SIGNATURE CL_HTON64(0xff10601b00000000ULL) +#define INT_ID_MASK CL_HTON64(0xfffffff1ff000000ULL) +#define INT_ID_SIGNATURE CL_HTON64(0x00000001ff000000ULL) + +static int compare_ipv6_snm_mgids(const void *m1, const void *m2) +{ + return memcmp(m1, m2, sizeof(ib_gid_t) - 3); +} + +static ib_net16_t find_ipv6_snm_mlid(osm_subn_t *subn, ib_gid_t *mgid) +{ + osm_mgrp_t *m = (osm_mgrp_t *)cl_fmap_match(&subn->mgrp_mgid_tbl, mgid, + compare_ipv6_snm_mgids); + if (m != (osm_mgrp_t *)cl_fmap_end(&subn->mgrp_mgid_tbl)) + return m->mlid; + return 0; +} + +static unsigned match_ipv6_snm_mgid(ib_gid_t * mgid) +{ + return ((mgid->unicast.prefix & PREFIX_MASK) == PREFIX_SIGNATURE && + (mgid->unicast.interface_id & INT_ID_MASK) == INT_ID_SIGNATURE); +} + +static ib_net16_t get_new_mlid(osm_sa_t * sa, ib_member_rec_t * mcmr) +{ + osm_subn_t *p_subn = sa->p_subn; + ib_net16_t requested_mlid = mcmr->mlid; + unsigned i, max; + + if (requested_mlid && cl_ntoh16(requested_mlid) >= IB_LID_MCAST_START_HO + && cl_ntoh16(requested_mlid) <= p_subn->max_mcast_lid_ho + && !osm_get_mbox_by_mlid(p_subn, requested_mlid)) + return requested_mlid; + + if (sa->p_subn->opt.consolidate_ipv6_snm_req + && match_ipv6_snm_mgid(&mcmr->mgid) + && (requested_mlid = find_ipv6_snm_mlid(sa->p_subn, &mcmr->mgid))) { + char str[INET6_ADDRSTRLEN]; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Special Case Solicited Node Mcast Join for MGID %s\n", + inet_ntop(AF_INET6, mcmr->mgid.raw, str, sizeof(str))); + return requested_mlid; + } + + max = p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO + 1; + for (i = 0; i < max; i++) + if (!sa->p_subn->mboxes[i]) + return cl_hton16(i + IB_LID_MCAST_START_HO); + + return 0; +} + +static inline boolean_t check_join_comp_mask(ib_net64_t comp_mask) +{ + return ((comp_mask & JOIN_MC_COMP_MASK) == JOIN_MC_COMP_MASK); +} + +static boolean_t check_create_comp_mask(ib_net64_t comp_mask, + ib_member_rec_t * p_recvd_mcmember_rec) +{ + return ((comp_mask & REQUIRED_MC_CREATE_COMP_MASK) == + REQUIRED_MC_CREATE_COMP_MASK); +} + +/********************************************************************** + Generate the response MAD +**********************************************************************/ +static void mcmr_rcv_respond(IN osm_sa_t * sa, IN osm_madw_t * p_madw, + IN ib_member_rec_t * p_mcmember_rec) +{ + cl_qlist_t rec_list; + osm_sa_item_t *item; + + OSM_LOG_ENTER(sa->p_log); + + item = malloc(SA_MCM_RESP_SIZE); + if (!item) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B16: " + "rec_item alloc failed\n"); + goto Exit; + } + + item->resp.mc_rec = *p_mcmember_rec; + + /* Fill in the mtu, rate, and packet lifetime selectors */ + item->resp.mc_rec.mtu &= 0x3f; + item->resp.mc_rec.mtu |= IB_PATH_SELECTOR_EXACTLY << 6; + item->resp.mc_rec.rate &= 0x3f; + item->resp.mc_rec.rate |= IB_PATH_SELECTOR_EXACTLY << 6; + item->resp.mc_rec.pkt_life &= 0x3f; + item->resp.mc_rec.pkt_life |= IB_PATH_SELECTOR_EXACTLY << 6; + + cl_qlist_init(&rec_list); + cl_qlist_insert_tail(&rec_list, &item->list_item); + + osm_sa_respond(sa, p_madw, sizeof(ib_member_rec_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +/********************************************************************* + In joining an existing group, or when querying the mc groups, + we make sure the following components provided match: MTU and RATE + HACK: Currently we ignore the PKT_LIFETIME field. +**********************************************************************/ +static boolean_t validate_more_comp_fields(osm_log_t * p_log, + const osm_mgrp_t * p_mgrp, + const ib_member_rec_t * + p_recvd_mcmember_rec, + ib_net64_t comp_mask) +{ + uint8_t mtu_sel; + uint8_t mtu_required; + uint8_t mtu_mgrp; + uint8_t rate_sel; + uint8_t rate_required; + uint8_t rate_mgrp; + + if (comp_mask & IB_MCR_COMPMASK_MTU_SEL) { + mtu_sel = (uint8_t) (p_recvd_mcmember_rec->mtu >> 6); + /* Clearing last 2 bits */ + mtu_required = (uint8_t) (p_recvd_mcmember_rec->mtu & 0x3F); + mtu_mgrp = (uint8_t) (p_mgrp->mcmember_rec.mtu & 0x3F); + switch (mtu_sel) { + case 0: /* Greater than MTU specified */ + if (mtu_mgrp <= mtu_required) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested mcast group has MTU %x, " + "which is not greater than %x\n", + mtu_mgrp, mtu_required); + return FALSE; + } + break; + case 1: /* Less than MTU specified */ + if (mtu_mgrp >= mtu_required) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested mcast group has MTU %x, " + "which is not less than %x\n", + mtu_mgrp, mtu_required); + return FALSE; + } + break; + case 2: /* Exactly MTU specified */ + if (mtu_mgrp != mtu_required) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested mcast group has MTU %x, " + "which is not equal to %x\n", + mtu_mgrp, mtu_required); + return FALSE; + } + break; + default: + break; + } + } + + /* what about rate ? */ + if (comp_mask & IB_MCR_COMPMASK_RATE_SEL) { + rate_sel = (uint8_t) (p_recvd_mcmember_rec->rate >> 6); + /* Clearing last 2 bits */ + rate_required = (uint8_t) (p_recvd_mcmember_rec->rate & 0x3F); + rate_mgrp = (uint8_t) (p_mgrp->mcmember_rec.rate & 0x3F); + switch (rate_sel) { + case 0: /* Greater than RATE specified */ + if (ib_path_compare_rates(rate_mgrp, rate_required) <= 0) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested mcast group has RATE %x, " + "which is not greater than %x\n", + rate_mgrp, rate_required); + return FALSE; + } + break; + case 1: /* Less than RATE specified */ + if (ib_path_compare_rates(rate_mgrp, rate_required) >= 0) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested mcast group has RATE %x, " + "which is not less than %x\n", + rate_mgrp, rate_required); + return FALSE; + } + break; + case 2: /* Exactly RATE specified */ + if (ib_path_compare_rates(rate_mgrp, rate_required)) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested mcast group has RATE %x, " + "which is not equal to %x\n", + rate_mgrp, rate_required); + return FALSE; + } + break; + default: + break; + } + } + + return TRUE; +} + +/********************************************************************* + In joining an existing group, we make sure the following components + are physically realizable: MTU and RATE +**********************************************************************/ +static boolean_t validate_port_caps(osm_log_t * p_log, + const osm_mgrp_t * p_mgrp, + const osm_physp_t * p_physp) +{ + const ib_port_info_t *p_pi; + uint8_t mtu_required; + uint8_t mtu_mgrp; + uint8_t rate_required; + uint8_t rate_mgrp; + int extended; + + mtu_required = ib_port_info_get_neighbor_mtu(&p_physp->port_info); + mtu_mgrp = (uint8_t) (p_mgrp->mcmember_rec.mtu & 0x3F); + if (mtu_required < mtu_mgrp) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Port's MTU %x is less than %x\n", + mtu_required, mtu_mgrp); + return FALSE; + } + + p_pi = &p_physp->port_info; + extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + rate_required = ib_port_info_compute_rate(p_pi, extended); + rate_mgrp = (uint8_t) (p_mgrp->mcmember_rec.rate & 0x3F); + if (ib_path_compare_rates(rate_required, rate_mgrp) < 0) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Port's RATE %x is less than %x\n", + rate_required, rate_mgrp); + return FALSE; + } + + return TRUE; +} + +/********************************************************************** + * o15-0.2.1: If SA supports UD multicast, then if SA receives a SubnAdmSet() + * or SubnAdmDelete() method that would modify an existing + * MCMemberRecord, SA shall not modify that MCMemberRecord and shall + * return an error status of ERR_REQ_INVALID in response in the + * following cases: + * 1. Saved MCMemberRecord.ProxyJoin is not set and the request is + * issued by a requester with a GID other than the Port-GID. + * 2. Saved MCMemberRecord.ProxyJoin is set and the requester is not + * part of the partition for that MCMemberRecord. + **********************************************************************/ +static boolean_t validate_modify(IN osm_sa_t * sa, IN osm_mgrp_t * p_mgrp, + IN osm_mad_addr_t * p_mad_addr, + IN ib_member_rec_t * p_recvd_mcmember_rec, + OUT osm_mcm_alias_guid_t ** pp_mcm_alias_guid) +{ + ib_net64_t portguid; + ib_gid_t request_gid; + osm_physp_t *p_request_physp; + ib_api_status_t res; + + portguid = p_recvd_mcmember_rec->port_gid.unicast.interface_id; + + *pp_mcm_alias_guid = osm_mgrp_get_mcm_alias_guid(p_mgrp, portguid); + + /* o15-0.2.1: If this is a new port being added - nothing to check */ + if (!*pp_mcm_alias_guid) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "This is a new port in the MC group\n"); + return TRUE; + } + + /* We validate the request according the the proxy_join. + Check if the proxy_join is set or not */ + if ((*pp_mcm_alias_guid)->proxy_join == FALSE) { + /* The proxy_join is not set. Modifying can by done only + if the requester GID == PortGID */ + res = osm_get_gid_by_mad_addr(sa->p_log, sa->p_subn, p_mad_addr, + &request_gid); + if (res != IB_SUCCESS) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Could not find port for requested address\n"); + return FALSE; + } + + if ((*pp_mcm_alias_guid)->p_base_mcm_port->port->guid != + request_gid.unicast.interface_id || + (*pp_mcm_alias_guid)->port_gid.unicast.prefix != + request_gid.unicast.prefix) { + ib_gid_t base_port_gid; + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + + base_port_gid.unicast.prefix = (*pp_mcm_alias_guid)->port_gid.unicast.prefix; + base_port_gid.unicast.interface_id = (*pp_mcm_alias_guid)->p_base_mcm_port->port->guid; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "No ProxyJoin but different ports: stored:" + "%s request:%s\n", + inet_ntop(AF_INET6, base_port_gid.raw, gid_str, + sizeof gid_str), + inet_ntop(AF_INET6, request_gid.raw, gid_str2, + sizeof gid_str2)); + return FALSE; + } + } else { + /* The proxy_join is set. Modification allowed only if the + requester is part of the partition for this MCMemberRecord */ + p_request_physp = osm_get_physp_by_mad_addr(sa->p_log, + sa->p_subn, + p_mad_addr); + if (p_request_physp == NULL) + return FALSE; + + if (!osm_physp_has_pkey(sa->p_log, p_mgrp->mcmember_rec.pkey, + p_request_physp)) { + /* the request port is not part of the partition for this mgrp */ + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requesting port 0x%016" PRIx64 " has no PKey 0x%04x\n", + cl_ntoh64(p_request_physp->port_guid), + cl_ntoh16(p_mgrp->mcmember_rec.pkey)); + return FALSE; + } + } + return TRUE; +} + +/* + * Check legality of the requested MGID DELETE + * o15-0.1.14 = VALID DELETE: + * To be a valid delete MAD needs to: + * 1 the MADs PortGID and MGID components match the PortGID and + * MGID of a stored MCMemberRecord; + * 2 the MADs JoinState component contains at least one bit set to 1 + * in the same position as that stored MCMemberRecords JoinState + * has a bit set to 1, + * i.e., the logical AND of the two JoinState components + * is not all zeros; + * 3 the MADs JoinState component does not have some bits set + * which are not set in the stored MCMemberRecords JoinState component; + * 4 either the stored MCMemberRecord:ProxyJoin is reset (0), and the + * MADs source is the stored PortGID; + * OR + * the stored MCMemberRecord:ProxyJoin is set (1), (see o15- + * 0.1.2:); and the MADs source is a member of the partition indicated + * by the stored MCMemberRecord:P_Key. + */ +static boolean_t validate_delete(IN osm_sa_t * sa, IN osm_mgrp_t * p_mgrp, + IN osm_mad_addr_t * p_mad_addr, + IN ib_member_rec_t * p_recvd_mcmember_rec, + OUT osm_mcm_alias_guid_t ** pp_mcm_alias_guid) +{ + ib_net64_t portguid; + + portguid = p_recvd_mcmember_rec->port_gid.unicast.interface_id; + + *pp_mcm_alias_guid = osm_mgrp_get_mcm_alias_guid(p_mgrp, portguid); + + /* 1 */ + if (!*pp_mcm_alias_guid) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Failed to find the port in the MC group\n"); + return FALSE; + } + + /* 2 */ + if (!(p_recvd_mcmember_rec->scope_state & 0x0F & + (*pp_mcm_alias_guid)->scope_state)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Could not find any matching bits in the stored " + "and requested JoinStates\n"); + return FALSE; + } + + /* 3 */ + if (((p_recvd_mcmember_rec->scope_state & 0x0F) | + (0x0F & (*pp_mcm_alias_guid)->scope_state)) != + (0x0F & (*pp_mcm_alias_guid)->scope_state)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Some bits in the request JoinState (0x%X) are not " + "set in the stored port (0x%X)\n", + (p_recvd_mcmember_rec->scope_state & 0x0F), + (0x0F & (*pp_mcm_alias_guid)->scope_state)); + return FALSE; + } + + /* 4 */ + /* Validate according the the proxy_join (o15-0.1.2) */ + if (validate_modify(sa, p_mgrp, p_mad_addr, p_recvd_mcmember_rec, + pp_mcm_alias_guid) == FALSE) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "proxy_join validation failure\n"); + return FALSE; + } + return TRUE; +} + +/* + * Check legality of the requested MGID (note this does not hold for SA + * created MGIDs) + * + * Implementing o15-0.1.5: + * A multicast GID is considered to be invalid if: + * 1. It does not comply with the rules as specified in 4.1.1 "GID Usage and + * Properties" on page 145: + * + * 14) The multicast GID format is (bytes are comma sep): + * 0xff,,,,

,

,

,

,

,

,

,

,,,, + * Fl 4bit = Flags (b) + * Sc 4bit = Scope (c) + * Si 16bit = Signature (2) + * P 64bit = GID Prefix (should be a subnet unique ID - normally Subnet Prefix) + * Id 32bit = Unique ID in the Subnet (might be MLID or P_Key ?) + * + * a) 8-bits of 11111111 at the start of the GID identifies this as being a + * multicast GID. + * b) Flags is a set of four 1-bit flags: 000T with three flags reserved + * and defined as zero (0). The T flag is defined as follows: + * i) T = 0 indicates this is a permanently assigned (i.e. wellknown) + * multicast GID. See RFC 2373 and RFC 2375 as reference + * for these permanently assigned GIDs. + * ii) T = 1 indicates this is a non-permanently assigned (i.e. transient) + * multicast GID. + * c) Scope is a 4-bit multicast scope value used to limit the scope of + * the multicast group. The following table defines scope value and + * interpretation. + * + * Multicast Address Scope Values: + * 0x2 Link-local + * 0x5 Site-local + * 0x8 Organization-local + * 0xE Global + * + * 2. It contains the SA-specific signature of 0xA01B and has the link-local + * scope bits set. (EZ: the idea here is that SA created MGIDs are the + * only source for this signature with link-local scope) + */ +static boolean_t validate_requested_mgid(IN osm_sa_t * sa, + IN const ib_member_rec_t * p_mcm_rec) +{ + uint16_t signature; + boolean_t valid = TRUE; + + OSM_LOG_ENTER(sa->p_log); + + /* 14-a: mcast GID must start with 0xFF */ + if (p_mcm_rec->mgid.multicast.header[0] != 0xFF) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B01: " + "Invalid prefix 0x%02X in requested MGID, " + "must be 0xFF\n", + cl_ntoh16(p_mcm_rec->mgid.multicast.header[0])); + valid = FALSE; + goto Exit; + } + + /* the MGID signature can mark IPoIB or SA assigned MGIDs */ + memcpy(&signature, &(p_mcm_rec->mgid.multicast.raw_group_id), + sizeof(signature)); + signature = cl_ntoh16(signature); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MGID Signed as 0x%04X\n", signature); + + /* + * We skip any checks for MGIDs that follow IPoIB + * GID structure as defined by the IETF ipoib-link-multicast. + * + * For IPv4 over IB, the signature will be "0x401B". + * + * | 8 | 4 | 4 | 16 bits | 16 bits | 48 bits | 32 bits | + * +--------+----+----+-----------------+---------+----------+---------+ + * |11111111|0001|scop||< P_Key >|00.......0|| + * +--------+----+----+-----------------+---------+----------+---------+ + * + * For IPv6 over IB, the signature will be "0x601B". + * + * | 8 | 4 | 4 | 16 bits | 16 bits | 80 bits | + * +--------+----+----+-----------------+---------+--------------------+ + * |11111111|0001|scop||< P_Key >|000.............0001| + * +--------+----+----+-----------------+---------+--------------------+ + * + */ + if (signature == 0x401B || signature == 0x601B) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Skipping MGID Validation for IPoIB Signed (0x%04X) MGIDs\n", + signature); + goto Exit; + } + + /* 14-b: the 3 upper bits in the "flags" should be zero: */ + if (p_mcm_rec->mgid.multicast.header[1] & 0xE0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B28: " + "Requested MGID invalid, uses Reserved Flags: flags=0x%X\n", + (p_mcm_rec->mgid.multicast.header[1] & 0xE0) >> 4); + valid = FALSE; + goto Exit; + } + + /* 2 - now what if the link local format 0xA01B is used - + the scope should not be link local */ + if (signature == 0xA01B && + (p_mcm_rec->mgid.multicast.header[1] & 0x0F) == + IB_MC_SCOPE_LINK_LOCAL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B24: " + "Requested MGID invalid, " + "uses 0xA01B signature but with link-local scope\n"); + valid = FALSE; + goto Exit; + } + + /* + * For SA assigned MGIDs (signature 0xA01B): + * There is no real way to make sure the GID Prefix is really unique. + * If we could enforce using the Subnet Prefix for that purpose it would + * have been nice. But the spec does not require it. + */ + +Exit: + OSM_LOG_EXIT(sa->p_log); + return valid; +} + +/********************************************************************** + Check if the requested new MC group parameters are realizable. + Also set the default MTU and Rate if not provided by the user. +**********************************************************************/ +static boolean_t mgrp_request_is_realizable(IN osm_sa_t * sa, + IN ib_net64_t comp_mask, + IN ib_member_rec_t * p_mcm_rec, + IN const osm_physp_t * p_physp) +{ + uint8_t mtu_sel = 2; /* exactly */ + uint8_t mtu_required, mtu, port_mtu; + uint8_t rate_sel = 2; /* exactly */ + uint8_t rate_required, rate, port_rate, new_rate; + const ib_port_info_t *p_pi; + osm_log_t *p_log = sa->p_log; + int extended; + + OSM_LOG_ENTER(sa->p_log); + + /* + * End of o15-0.2.3 specifies: + * .... + * The entity may also supply the other components such as HopLimit, + * MTU, etc. during group creation time. If these components are not + * provided during group creation time, SA will provide them for the + * group. The values chosen are vendor-dependent and beyond the scope + * of the specification. + * + * so we might also need to assign RATE/MTU if they are not comp + * masked in. + */ + + p_pi = &p_physp->port_info; + port_mtu = p_physp ? ib_port_info_get_neighbor_mtu(p_pi) : 0; + if (!(comp_mask & IB_MCR_COMPMASK_MTU) || + !(comp_mask & IB_MCR_COMPMASK_MTU_SEL) || + (mtu_sel = (p_mcm_rec->mtu >> 6)) == 3) + mtu = port_mtu ? port_mtu : sa->p_subn->min_ca_mtu; + else { + mtu_required = (uint8_t) (p_mcm_rec->mtu & 0x3F); + mtu = mtu_required; + switch (mtu_sel) { + case 0: /* Greater than MTU specified */ + if (port_mtu && mtu_required >= port_mtu) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested MTU %x >= the port\'s mtu:%x\n", + mtu_required, port_mtu); + return FALSE; + } + /* we provide the largest MTU possible if we can */ + if (port_mtu) + mtu = port_mtu; + else if (mtu_required < sa->p_subn->min_ca_mtu) + mtu = sa->p_subn->min_ca_mtu; + else + mtu++; + break; + case 1: /* Less than MTU specified */ + /* use the smaller of the two: + a. one lower then the required + b. the mtu of the requesting port (if exists) */ + if (port_mtu && mtu_required > port_mtu) + mtu = port_mtu; + else + mtu--; + break; + case 2: /* Exactly MTU specified */ + default: + break; + } + /* make sure it still is in the range */ + if (mtu < IB_MIN_MTU || mtu > IB_MAX_MTU) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Calculated MTU %x is out of range\n", mtu); + return FALSE; + } + } + p_mcm_rec->mtu = (mtu_sel << 6) | mtu; + + if (p_physp) { + extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + port_rate = ib_port_info_compute_rate(p_pi, extended); + } else + port_rate = 0; + + if (!(comp_mask & IB_MCR_COMPMASK_RATE) + || !(comp_mask & IB_MCR_COMPMASK_RATE_SEL) + || (rate_sel = (p_mcm_rec->rate >> 6)) == 3) + rate = port_rate ? port_rate : sa->p_subn->min_ca_rate; + else { + rate_required = (uint8_t) (p_mcm_rec->rate & 0x3F); + rate = rate_required; + switch (rate_sel) { + case 0: /* Greater than RATE specified */ + if (ib_path_compare_rates(rate_required, port_rate) >= 0) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Requested RATE %x >= the port\'s rate:%x\n", + rate_required, port_rate); + return FALSE; + } + /* we provide the largest RATE possible if we can */ + if (port_rate) + rate = port_rate; + else if (ib_path_compare_rates(rate_required, + sa->p_subn->min_ca_rate) < 0) + rate = sa->p_subn->min_ca_rate; + else + rate = ib_path_rate_get_next(rate); + break; + case 1: /* Less than RATE specified */ + /* use the smaller of the two: + a. one lower then the required + b. the rate of the requesting port (if exists) */ + if (ib_path_compare_rates(rate_required, port_rate) > 0) + rate = port_rate; + else + rate = ib_path_rate_get_prev(rate); + break; + case 2: /* Exactly RATE specified */ + default: + break; + } + /* make sure it still is in the range */ + if (rate < IB_MIN_RATE || rate > IB_MAX_RATE) { + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Calculated RATE %x is out of range\n", rate); + return FALSE; + } + } + if (sa->p_subn->opt.use_original_extended_sa_rates_only) { + new_rate = ib_path_rate_max_12xedr(rate); + if (new_rate != rate) { + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Rate decreased from %u to %u\n", + rate, new_rate); + rate = new_rate; + } + } + p_mcm_rec->rate = (rate_sel << 6) | rate; + + OSM_LOG_EXIT(sa->p_log); + return TRUE; +} + +static unsigned build_new_mgid(osm_sa_t * sa, ib_net64_t comp_mask, + ib_member_rec_t * mcmr) +{ + static uint32_t uniq_count; + ib_gid_t *mgid = &mcmr->mgid; + uint8_t scope; + unsigned i; + + /* use the given scope state only if requested! */ + if (comp_mask & IB_MCR_COMPMASK_SCOPE) + ib_member_get_scope_state(mcmr->scope_state, &scope, NULL); + else + /* to guarantee no collision with other subnets use local scope! */ + scope = IB_MC_SCOPE_LINK_LOCAL; + + mgid->raw[0] = 0xff; + mgid->raw[1] = 0x10 | scope; + mgid->raw[2] = 0xa0; + mgid->raw[3] = 0x1b; + + memcpy(&mgid->raw[4], &sa->p_subn->opt.subnet_prefix, sizeof(uint64_t)); + + for (i = 0; i < 1000; i++) { + memcpy(&mgid->raw[10], &uniq_count, 4); + uniq_count++; + if (!osm_get_mgrp_by_mgid(sa->p_subn, mgid)) + return 1; + } + + return 0; +} + +/********************************************************************** + Call this function to create a new mgrp. +**********************************************************************/ +static ib_api_status_t mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, + IN ib_net64_t comp_mask, + IN const ib_member_rec_t * p_recvd_mcmember_rec, + IN const osm_physp_t * p_physp, + OUT osm_mgrp_t ** pp_mgrp) +{ + ib_net16_t mlid; + uint16_t signature; + ib_api_status_t status = IB_SUCCESS; + osm_mgrp_t *bcast_mgrp; + ib_gid_t bcast_mgid; + ib_member_rec_t mcm_rec = *p_recvd_mcmember_rec; /* copy for modifications */ + char gid_str[INET6_ADDRSTRLEN]; + + OSM_LOG_ENTER(sa->p_log); + + /* we need to create the new MGID if it was not defined */ + if (!ib_gid_is_notzero(&p_recvd_mcmember_rec->mgid)) { + /* create a new MGID */ + if (!build_new_mgid(sa, comp_mask, &mcm_rec)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B23: " + "cannot allocate unique MGID value\n"); + status = IB_SA_MAD_STATUS_NO_RESOURCES; + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Allocated new MGID:%s\n", + inet_ntop(AF_INET6, mcm_rec.mgid.raw, gid_str, + sizeof gid_str)); + } else if (sa->p_subn->opt.ipoib_mcgroup_creation_validation) { + /* a specific MGID was requested so validate the resulting MGID */ + if (validate_requested_mgid(sa, &mcm_rec)) { + memcpy(&signature, &(mcm_rec.mgid.multicast.raw_group_id), + sizeof(signature)); + signature = cl_ntoh16(signature); + /* Check for IPoIB signature in MGID */ + if (signature == 0x401B || signature == 0x601B) { + /* Derive IPoIB broadcast MGID */ + bcast_mgid.unicast.prefix = IPV4_BCAST_MGID_PREFIX; + bcast_mgid.unicast.interface_id = IPV4_BCAST_MGID_INT_ID; + /* Set scope in IPoIB broadcast MGID */ + bcast_mgid.multicast.header[1] = + (bcast_mgid.multicast.header[1] & 0xF0) | + (mcm_rec.mgid.multicast.header[1] & 0x0F); + /* Set P_Key in IPoIB broadcast MGID */ + bcast_mgid.multicast.raw_group_id[2] = + mcm_rec.mgid.multicast.raw_group_id[2]; + bcast_mgid.multicast.raw_group_id[3] = + mcm_rec.mgid.multicast.raw_group_id[3]; + /* Check MC group for the IPoIB broadcast group */ + if (signature != 0x401B || + memcmp(&bcast_mgid, &(mcm_rec.mgid), sizeof(ib_gid_t))) { + bcast_mgrp = osm_get_mgrp_by_mgid(sa->p_subn, + &bcast_mgid); + if (!bcast_mgrp) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, + "ERR 1B1B: Broadcast group %s not found, sending IB_SA_MAD_STATUS_REQ_INVALID\n", + inet_ntop(AF_INET6, bcast_mgid.raw, gid_str, sizeof gid_str)); + status = IB_SA_MAD_STATUS_REQ_INVALID; + goto Exit; + } + if (!validate_other_comp_fields(sa->p_log, comp_mask, p_recvd_mcmember_rec, bcast_mgrp, OSM_LOG_ERROR)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, + "ERR 1B1C: validate_other_comp_fields failed for MGID: %s, sending IB_SA_MAD_STATUS_REQ_INVALID\n", + inet_ntop(AF_INET6, &p_recvd_mcmember_rec->mgid, gid_str, sizeof gid_str)); + status = IB_SA_MAD_STATUS_REQ_INVALID; + goto Exit; + } + } + } + } else { + status = IB_SA_MAD_STATUS_REQ_INVALID; + goto Exit; + } + } + + /* check the requested parameters are realizable */ + if (mgrp_request_is_realizable(sa, comp_mask, &mcm_rec, p_physp) == + FALSE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B26: " + "Requested MGRP parameters are not realizable\n"); + status = IB_SA_MAD_STATUS_REQ_INVALID; + goto Exit; + } + + mlid = get_new_mlid(sa, &mcm_rec); + if (mlid == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B19: " + "get_new_mlid failed request mlid 0x%04x\n", + cl_ntoh16(mcm_rec.mlid)); + status = IB_SA_MAD_STATUS_NO_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Obtained new mlid 0x%X\n", + cl_ntoh16(mlid)); + + mcm_rec.mlid = mlid; + /* create a new MC Group */ + *pp_mgrp = osm_mgrp_new(sa->p_subn, mlid, &mcm_rec); + if (*pp_mgrp == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B08: " + "osm_mgrp_new failed\n"); + free_mlid(sa, mlid); + status = IB_SA_MAD_STATUS_NO_RESOURCES; + goto Exit; + } + + /* the mcmember_record should have mtu_sel, rate_sel, and pkt_lifetime_sel = 2 */ + (*pp_mgrp)->mcmember_rec.mtu &= 0x3f; + (*pp_mgrp)->mcmember_rec.mtu |= IB_PATH_SELECTOR_EXACTLY << 6; + (*pp_mgrp)->mcmember_rec.rate &= 0x3f; + (*pp_mgrp)->mcmember_rec.rate |= IB_PATH_SELECTOR_EXACTLY << 6; + (*pp_mgrp)->mcmember_rec.pkt_life &= 0x3f; + (*pp_mgrp)->mcmember_rec.pkt_life |= IB_PATH_SELECTOR_EXACTLY << 6; + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +/********************************************************************** + Call this function to find or create a new mgrp. +**********************************************************************/ +osm_mgrp_t *osm_mcmr_rcv_find_or_create_new_mgrp(IN osm_sa_t * sa, + IN ib_net64_t comp_mask, + IN ib_member_rec_t * + p_recvd_mcmember_rec) +{ + osm_mgrp_t *mgrp; + + if ((mgrp = osm_get_mgrp_by_mgid(sa->p_subn, + &p_recvd_mcmember_rec->mgid))) + return mgrp; + if (mcmr_rcv_create_new_mgrp(sa, comp_mask, p_recvd_mcmember_rec, NULL, + &mgrp) == IB_SUCCESS) + return mgrp; + return NULL; +} + +/********************************************************************* +Process a request for leaving the group +**********************************************************************/ +static void mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + osm_mgrp_t *p_mgrp; + ib_sa_mad_t *p_sa_mad; + ib_member_rec_t *p_recvd_mcmember_rec; + ib_member_rec_t mcmember_rec; + osm_mcm_alias_guid_t *p_mcm_alias_guid; + + OSM_LOG_ENTER(sa->p_log); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_mcmember_rec = + (ib_member_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + mcmember_rec = *p_recvd_mcmember_rec; + + /* Validate the subnet prefix in the PortGID */ + if (p_recvd_mcmember_rec->port_gid.unicast.prefix != + sa->p_subn->opt.subnet_prefix) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, + "ERR 1B41: PortGID subnet prefix 0x%" PRIx64 + " does not match configured prefix 0x%" PRIx64 "\n", + cl_ntoh64(p_recvd_mcmember_rec->port_gid.unicast.prefix), + cl_ntoh64(sa->p_subn->opt.subnet_prefix)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INVALID_GID); + goto Exit; + } + + CL_PLOCK_EXCL_ACQUIRE(sa->p_lock); + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + osm_physp_t *p_req_physp; + + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr(p_madw)); + if (p_req_physp == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B02: " + "Cannot find requester physical port\n"); + } else { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Dump of record\n"); + osm_dump_mc_record_v2(sa->p_log, &mcmember_rec, FILE_ID, OSM_LOG_DEBUG); + } + + p_mgrp = osm_get_mgrp_by_mgid(sa->p_subn, &p_recvd_mcmember_rec->mgid); + if (!p_mgrp) { + char gid_str[INET6_ADDRSTRLEN]; + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Failed since multicast group %s not present\n", + inet_ntop(AF_INET6, p_recvd_mcmember_rec->mgid.raw, + gid_str, sizeof gid_str)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* check validity of the delete request o15-0.1.14 */ + if (!validate_delete(sa, p_mgrp, osm_madw_get_mad_addr_ptr(p_madw), + p_recvd_mcmember_rec, &p_mcm_alias_guid)) { + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B25: " + "Received an invalid delete request for " + "MGID: %s for PortGID: %s\n", + inet_ntop(AF_INET6, p_recvd_mcmember_rec->mgid.raw, + gid_str, sizeof gid_str), + inet_ntop(AF_INET6, p_recvd_mcmember_rec->port_gid.raw, + gid_str2, sizeof gid_str2)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* remove port and/or update join state */ + osm_mgrp_remove_port(sa->p_subn, sa->p_log, p_mgrp, p_mcm_alias_guid, + &mcmember_rec); + CL_PLOCK_RELEASE(sa->p_lock); + + mcmr_rcv_respond(sa, p_madw, &mcmember_rec); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static int validate_other_comp_fields(osm_log_t * p_log, ib_net64_t comp_mask, + const ib_member_rec_t * p_mcmr, + osm_mgrp_t * p_mgrp, + osm_log_level_t log_level) +{ + int ret = 0; + + if ((IB_MCR_COMPMASK_QKEY & comp_mask) && + p_mcmr->qkey != p_mgrp->mcmember_rec.qkey) { + OSM_LOG(p_log, log_level, "ERR 1B30: " + "Q_Key mismatch: query 0x%x group 0x%x\n", + cl_ntoh32(p_mcmr->qkey), + cl_ntoh32(p_mgrp->mcmember_rec.qkey)); + goto Exit; + } + + if (IB_MCR_COMPMASK_PKEY & comp_mask) { + if (!(ib_pkey_is_full_member(p_mcmr->pkey) || + ib_pkey_is_full_member(p_mgrp->mcmember_rec.pkey))) { + OSM_LOG(p_log, log_level, "ERR 1B31: " + "Both limited P_Keys: query 0x%x group 0x%x\n", + cl_ntoh16(p_mcmr->pkey), + cl_ntoh16(p_mgrp->mcmember_rec.pkey)); + goto Exit; + } + if (ib_pkey_get_base(p_mcmr->pkey) != + ib_pkey_get_base(p_mgrp->mcmember_rec.pkey)) { + OSM_LOG(p_log, log_level, "ERR 1B32: " + "P_Key base mismatch: query 0x%x group 0x%x\n", + cl_ntoh16(p_mcmr->pkey), + cl_ntoh16(p_mgrp->mcmember_rec.pkey)); + goto Exit; + } + } + + if ((IB_MCR_COMPMASK_TCLASS & comp_mask) && + p_mcmr->tclass != p_mgrp->mcmember_rec.tclass) { + OSM_LOG(p_log, log_level, "ERR 1B33: " + "TClass mismatch: query %d group %d\n", + p_mcmr->tclass, p_mgrp->mcmember_rec.tclass); + goto Exit; + } + + /* check SL, Flow, and Hop limit */ + { + uint32_t mgrp_flow, query_flow; + uint8_t mgrp_sl, query_sl; + uint8_t mgrp_hop, query_hop; + + ib_member_get_sl_flow_hop(p_mcmr->sl_flow_hop, + &query_sl, &query_flow, &query_hop); + + ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop, + &mgrp_sl, &mgrp_flow, &mgrp_hop); + + if ((IB_MCR_COMPMASK_SL & comp_mask) && query_sl != mgrp_sl) { + OSM_LOG(p_log, log_level, "ERR 1B34: " + "SL mismatch: query %d group %d\n", + query_sl, mgrp_sl); + goto Exit; + } + + if ((IB_MCR_COMPMASK_FLOW & comp_mask) && + query_flow != mgrp_flow) { + OSM_LOG(p_log, log_level, "ERR 1B35: " + "FlowLabel mismatch: query 0x%x group 0x%x\n", + query_flow, mgrp_flow); + goto Exit; + } + + if ((IB_MCR_COMPMASK_HOP & comp_mask) && query_hop != mgrp_hop) { + OSM_LOG(p_log, log_level, "ERR 1B36: " + "Hop mismatch: query %d group %d\n", + query_hop, mgrp_hop); + goto Exit; + } + } + + ret = 1; +Exit: + return ret; +} + +/********************************************************************** + Handle a join (or create) request +**********************************************************************/ +static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + osm_mgrp_t *p_mgrp = NULL; + ib_api_status_t status; + ib_sa_mad_t *p_sa_mad; + ib_member_rec_t *p_recvd_mcmember_rec; + ib_member_rec_t mcmember_rec; + osm_mcm_port_t *p_mcmr_port; + osm_mcm_alias_guid_t *p_mcm_alias_guid; + ib_net64_t portguid; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_physp_t *p_request_physp; + uint8_t is_new_group; /* TRUE = there is a need to create a group */ + uint8_t join_state; + boolean_t proxy; + + OSM_LOG_ENTER(sa->p_log); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_mcmember_rec = ib_sa_mad_get_payload_ptr(p_sa_mad); + + portguid = p_recvd_mcmember_rec->port_gid.unicast.interface_id; + + mcmember_rec = *p_recvd_mcmember_rec; + + /* Validate the subnet prefix in the PortGID */ + if (p_recvd_mcmember_rec->port_gid.unicast.prefix != + sa->p_subn->opt.subnet_prefix) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, + "ERR 1B40: PortGID subnet prefix 0x%" PRIx64 + " does not match configured prefix 0x%" PRIx64 "\n", + cl_ntoh64(p_recvd_mcmember_rec->port_gid.unicast.prefix), + cl_ntoh64(sa->p_subn->opt.subnet_prefix)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INVALID_GID); + goto Exit; + } + + CL_PLOCK_EXCL_ACQUIRE(sa->p_lock); + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + osm_physp_t *p_req_physp; + + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr(p_madw)); + if (p_req_physp == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B03: " + "Cannot find requester physical port\n"); + } else { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Dump of incoming record\n"); + osm_dump_mc_record_v2(sa->p_log, &mcmember_rec, FILE_ID, OSM_LOG_DEBUG); + } + + /* make sure the requested port guid is known to the SM */ + p_port = osm_get_port_by_alias_guid(sa->p_subn, portguid); + if (!p_port) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Unknown port GUID 0x%016" PRIx64 "\n", + cl_ntoh64(portguid)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + p_physp = p_port->p_physp; + /* Check that the p_physp and the requester physp are in the same + partition. */ + p_request_physp = + osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr(p_madw)); + if (p_request_physp == NULL) { + CL_PLOCK_RELEASE(sa->p_lock); + goto Exit; + } + + proxy = (p_physp != p_request_physp); + + if (proxy && !osm_physp_share_pkey(sa->p_log, p_physp, p_request_physp, + sa->p_subn->opt.allow_both_pkeys)) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Port and requester don't share PKey\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + if ((p_sa_mad->comp_mask & IB_MCR_COMPMASK_PKEY) && + ib_pkey_is_invalid(p_recvd_mcmember_rec->pkey)) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Invalid PKey supplied in request\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + ib_member_get_scope_state(p_recvd_mcmember_rec->scope_state, NULL, + &join_state); + + /* do we need to create a new group? */ + p_mgrp = osm_get_mgrp_by_mgid(sa->p_subn, &p_recvd_mcmember_rec->mgid); + if (!p_mgrp) { + /* check for JoinState.FullMember = 1 o15.0.1.9 */ + if (!(join_state & (IB_JOIN_STATE_FULL | IB_JOIN_STATE_SEND_ONLY_FULL))) { + char gid_str[INET6_ADDRSTRLEN]; + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B10: " + "Failed to create multicast group " + "because Join State != FullMember | SendOnlyFullMember" + " - required for create, " + "MGID: %s from port 0x%016" PRIx64 " (%s)\n", + inet_ntop(AF_INET6, + p_recvd_mcmember_rec->mgid.raw, + gid_str, sizeof gid_str), + cl_ntoh64(portguid), + p_port->p_node->print_desc); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* check the comp_mask */ + if (!check_create_comp_mask(p_sa_mad->comp_mask, + p_recvd_mcmember_rec)) { + char gid_str[INET6_ADDRSTRLEN]; + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B11: " + "Port 0x%016" PRIx64 " (%s) failed to join " + "non-existing multicast group with MGID %s, " + "insufficient components specified for " + "implicit create (comp_mask 0x%" PRIx64 ")\n", + cl_ntoh64(portguid), p_port->p_node->print_desc, + inet_ntop(AF_INET6, + p_recvd_mcmember_rec->mgid.raw, + gid_str, sizeof gid_str), + cl_ntoh64(p_sa_mad->comp_mask)); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_INSUF_COMPS); + goto Exit; + } + + status = mcmr_rcv_create_new_mgrp(sa, p_sa_mad->comp_mask, + p_recvd_mcmember_rec, + p_physp, &p_mgrp); + if (status != IB_SUCCESS) { + CL_PLOCK_RELEASE(sa->p_lock); + osm_sa_send_error(sa, p_madw, status); + goto Exit; + } + /* copy the MGID to the result */ + mcmember_rec.mgid = p_mgrp->mcmember_rec.mgid; + is_new_group = 1; + } else { + /* no need for a new group */ + is_new_group = 0; + if (sa->p_subn->opt.mcgroup_join_validation && + !validate_other_comp_fields(sa->p_log, p_sa_mad->comp_mask, + p_recvd_mcmember_rec, p_mgrp, + OSM_LOG_ERROR)) { + char gid_str[INET6_ADDRSTRLEN]; + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B1A: " + "validate_other_comp_fields failed for " + "MGID: %s port 0x%016" PRIx64 + " (%s), sending IB_SA_MAD_STATUS_REQ_INVALID\n", + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str), + cl_ntoh64(portguid), + p_port->p_node->print_desc); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + } + + CL_ASSERT(p_mgrp); + + /* + * o15-0.2.4: If SA supports UD multicast, then SA shall cause an + * endport to join an existing multicast group if: + * 1. It receives a SubnAdmSet() method for a MCMemberRecord, and + * - WE KNOW THAT ALREADY + * 2. The MGID is specified and matches an existing multicast + * group, and + * - WE KNOW THAT ALREADY + * 3. The MCMemberRecord:JoinState is not all 0s, and + * 4. PortGID is specified and + * - WE KNOW THAT ALREADY (as it matched a real one) + * 5. All other components match that existing group, either by + * being wildcarded or by having values identical to those specified + * by the component mask and in use by the group with the exception + * of components such as ProxyJoin and Reserved, which are ignored + * by SA. + * + * We need to check #3 and #5 here: + */ + if (!validate_more_comp_fields(sa->p_log, p_mgrp, p_recvd_mcmember_rec, + p_sa_mad->comp_mask) + || !validate_port_caps(sa->p_log, p_mgrp, p_physp) + || !(join_state != 0)) { + char gid_str[INET6_ADDRSTRLEN]; + memset(gid_str, 0, sizeof(gid_str)); + + /* get the gid_str before the cleanup, the cleanup can free the pointer */ + inet_ntop(AF_INET6, p_mgrp->mcmember_rec.mgid.raw, gid_str, + sizeof gid_str); + + /* since we might have created the new group we need to cleanup */ + if (is_new_group) + osm_mgrp_cleanup(sa->p_subn, p_mgrp); + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B12: " + "validate_more_comp_fields, validate_port_caps, " + "or JoinState = 0 failed for MGID: %s port 0x%016" PRIx64 + " (%s), sending IB_SA_MAD_STATUS_REQ_INVALID\n", + gid_str, cl_ntoh64(portguid), p_port->p_node->print_desc); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* verify that the joining port is in the partition of the group */ + if (!osm_physp_has_pkey(sa->p_log, p_mgrp->mcmember_rec.pkey, p_physp)) { + char gid_str[INET6_ADDRSTRLEN]; + memset(gid_str, 0, sizeof(gid_str)); + inet_ntop(AF_INET6, p_mgrp->mcmember_rec.mgid.raw, gid_str, + sizeof(gid_str)); + + if (is_new_group) + osm_mgrp_cleanup(sa->p_subn, p_mgrp); + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B14: " + "Cannot join port 0x%016" PRIx64 " to MGID %s - " + "Port is not in partition of this MC group\n", + cl_ntoh64(portguid), gid_str); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* + * o15-0.2.1 requires validation of the requesting port + * in the case of modification: + */ + if (!is_new_group && + !validate_modify(sa, p_mgrp, osm_madw_get_mad_addr_ptr(p_madw), + p_recvd_mcmember_rec, &p_mcm_alias_guid)) { + char gid_str[INET6_ADDRSTRLEN]; + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B13: " + "validate_modify failed from port 0x%016" PRIx64 + " (%s) for MGID: %s, sending IB_SA_MAD_STATUS_REQ_INVALID\n", + cl_ntoh64(portguid), p_port->p_node->print_desc, + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof(gid_str))); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* copy qkey mlid tclass pkey sl_flow_hop mtu rate pkt_life */ + copy_from_create_mc_rec(&mcmember_rec, &p_mgrp->mcmember_rec); + + /* create or update existing port (join-state will be updated) */ + p_mcmr_port = osm_mgrp_add_port(sa->p_subn, sa->p_log, p_mgrp, p_port, + &mcmember_rec, proxy); + if (!p_mcmr_port) { + /* we fail to add the port so we might need to delete the group */ + if (is_new_group) + osm_mgrp_cleanup(sa->p_subn, p_mgrp); + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B06: " + "osm_mgrp_add_port failed\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES); + goto Exit; + } + + /* Release the lock as we don't need it. */ + CL_PLOCK_RELEASE(sa->p_lock); + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) + osm_dump_mc_record_v2(sa->p_log, &mcmember_rec, FILE_ID, OSM_LOG_DEBUG); + + mcmr_rcv_respond(sa, p_madw, &mcmember_rec); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +/********************************************************************** + Add a patched multicast group to the results list +**********************************************************************/ +static ib_api_status_t mcmr_rcv_new_mcmr(IN osm_sa_t * sa, + IN const ib_member_rec_t * p_rcvd_rec, + IN cl_qlist_t * p_list) +{ + osm_sa_item_t *p_rec_item; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_MCM_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B15: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + memset(p_rec_item, 0, sizeof(cl_list_item_t)); + + /* HACK: Untrusted requesters should result with 0 Join + State, Port Guid, and Proxy */ + p_rec_item->resp.mc_rec = *p_rcvd_rec; + cl_qlist_insert_tail(p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +/********************************************************************** + Match the given mgrp to the requested mcmr +**********************************************************************/ +static void mcmr_by_comp_mask(osm_sa_t * sa, const ib_member_rec_t * p_rcvd_rec, + ib_net64_t comp_mask, osm_mgrp_t * p_mgrp, + const osm_physp_t * p_req_physp, + boolean_t trusted_req, cl_qlist_t * list) +{ + /* since we might change scope_state */ + ib_member_rec_t match_rec; + osm_mcm_alias_guid_t *p_mcm_alias_guid; + ib_net64_t portguid = p_rcvd_rec->port_gid.unicast.interface_id; + /* will be used for group or port info */ + uint8_t scope_state; + uint8_t scope_state_mask = 0; + cl_map_item_t *p_item; + ib_gid_t port_gid; + boolean_t proxy_join = FALSE; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Checking mlid:0x%X\n", cl_ntoh16(p_mgrp->mlid)); + + memset(&port_gid, 0, sizeof(port_gid)); + + /* first try to eliminate the group by MGID, MLID, or P_Key */ + if ((IB_MCR_COMPMASK_MGID & comp_mask) && + memcmp(&p_rcvd_rec->mgid, &p_mgrp->mcmember_rec.mgid, + sizeof(ib_gid_t))) + goto Exit; + + if ((IB_MCR_COMPMASK_MLID & comp_mask) && + memcmp(&p_rcvd_rec->mlid, &p_mgrp->mcmember_rec.mlid, + sizeof(uint16_t))) + goto Exit; + + /* if the requester physical port doesn't have the pkey that is defined + for the group - exit. */ + if (!osm_physp_has_pkey(sa->p_log, p_mgrp->mcmember_rec.pkey, + p_req_physp)) + goto Exit; + + /* now do the rest of the match */ + if (!validate_other_comp_fields(sa->p_log, comp_mask, p_rcvd_rec, p_mgrp, + OSM_LOG_NONE)) + goto Exit; + + if ((IB_MCR_COMPMASK_PROXY & comp_mask) && + p_rcvd_rec->proxy_join != p_mgrp->mcmember_rec.proxy_join) + goto Exit; + + /* need to validate mtu, rate, and pkt_lifetime fields */ + if (validate_more_comp_fields(sa->p_log, p_mgrp, p_rcvd_rec, + comp_mask) == FALSE) + goto Exit; + + /* Port specific fields */ + /* so did we get the PortGUID mask */ + if (IB_MCR_COMPMASK_PORT_GID & comp_mask) { + /* try to find this port */ + p_mcm_alias_guid = osm_mgrp_get_mcm_alias_guid(p_mgrp, portguid); + if (!p_mcm_alias_guid) /* port not in group */ + goto Exit; + scope_state = p_mcm_alias_guid->scope_state; + memcpy(&port_gid, &(p_mcm_alias_guid->port_gid), + sizeof(ib_gid_t)); + proxy_join = p_mcm_alias_guid->proxy_join; + } else /* point to the group information */ + scope_state = p_mgrp->mcmember_rec.scope_state; + + if (IB_MCR_COMPMASK_SCOPE & comp_mask) + scope_state_mask = 0xF0; + + if (IB_MCR_COMPMASK_JOIN_STATE & comp_mask) + scope_state_mask = scope_state_mask | 0x0F; + + /* Many MC records returned */ + if (trusted_req == TRUE && !(IB_MCR_COMPMASK_PORT_GID & comp_mask)) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Trusted req is TRUE and no specific port defined\n"); + + /* return all the ports that match in this MC group */ + p_item = cl_qmap_head(&(p_mgrp->mcm_alias_port_tbl)); + while (p_item != cl_qmap_end(&(p_mgrp->mcm_alias_port_tbl))) { + p_mcm_alias_guid = (osm_mcm_alias_guid_t *) p_item; + + if ((scope_state_mask & p_rcvd_rec->scope_state) == + (scope_state_mask & p_mcm_alias_guid->scope_state)) { + /* add to the list */ + match_rec = p_mgrp->mcmember_rec; + match_rec.scope_state = p_mcm_alias_guid->scope_state; + memcpy(&match_rec.port_gid, + &p_mcm_alias_guid->port_gid, + sizeof(ib_gid_t)); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Record of port_gid: %s" + " in multicast_lid: 0x%X is returned\n", + inet_ntop(AF_INET6, + match_rec.port_gid.raw, + gid_str, sizeof gid_str), + cl_ntoh16(p_mgrp->mlid)); + + match_rec.proxy_join = + (uint8_t) (p_mcm_alias_guid->proxy_join); + + mcmr_rcv_new_mcmr(sa, &match_rec, list); + } + p_item = cl_qmap_next(p_item); + } + } else { /* One MC record returned */ + if ((scope_state_mask & p_rcvd_rec->scope_state) != + (scope_state_mask & scope_state)) + goto Exit; + + /* add to the list */ + match_rec = p_mgrp->mcmember_rec; + match_rec.scope_state = scope_state; + memcpy(&(match_rec.port_gid), &port_gid, sizeof(ib_gid_t)); + match_rec.proxy_join = (uint8_t) proxy_join; + + mcmr_rcv_new_mcmr(sa, &match_rec, list); + } + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +/********************************************************************** + Handle a query request +**********************************************************************/ +static void mcmr_query_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + const ib_sa_mad_t *p_rcvd_mad; + const ib_member_rec_t *p_rcvd_rec; + cl_qlist_t rec_list; + ib_net64_t comp_mask; + osm_physp_t *p_req_physp; + boolean_t trusted_req; + osm_mgrp_t *p_mgrp; + + OSM_LOG_ENTER(sa->p_log); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = (ib_member_rec_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + comp_mask = p_rcvd_mad->comp_mask; + + /* + if sm_key is not zero and does not match we never get here + see main SA receiver + */ + trusted_req = (p_rcvd_mad->sm_key != 0); + + CL_PLOCK_ACQUIRE(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + CL_PLOCK_RELEASE(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B04: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Dump of record\n"); + osm_dump_mc_record(sa->p_log, p_rcvd_rec, OSM_LOG_DEBUG); + } + + cl_qlist_init(&rec_list); + + /* simply go over all MCGs and match */ + for (p_mgrp = (osm_mgrp_t *) cl_fmap_head(&sa->p_subn->mgrp_mgid_tbl); + p_mgrp != (osm_mgrp_t *) cl_fmap_end(&sa->p_subn->mgrp_mgid_tbl); + p_mgrp = (osm_mgrp_t *) cl_fmap_next(&p_mgrp->map_item)) + mcmr_by_comp_mask(sa, p_rcvd_rec, comp_mask, p_mgrp, + p_req_physp, trusted_req, &rec_list); + + CL_PLOCK_RELEASE(sa->p_lock); + + /* + p923 - The PortGID, JoinState and ProxyJoin shall be zero, + except in the case of a trusted request. + Note: In the mad controller we check that the SM_Key received on + the mad is valid. Meaning - is either zero or equal to the local + sm_key. + */ + + if (!p_rcvd_mad->sm_key) { + osm_sa_item_t *item; + for (item = (osm_sa_item_t *) cl_qlist_head(&rec_list); + item != (osm_sa_item_t *) cl_qlist_end(&rec_list); + item = + (osm_sa_item_t *) cl_qlist_next(&item->list_item)) { + memset(&item->resp.mc_rec.port_gid, 0, sizeof(ib_gid_t)); + ib_member_set_join_state(&item->resp.mc_rec, 0); + item->resp.mc_rec.proxy_join = 0; + } + } + + osm_sa_respond(sa, p_madw, sizeof(ib_member_rec_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static uint8_t rate_is_valid(IN const ib_sa_mad_t *p_sa_mad, + IN const ib_member_rec_t *p_recvd_mcmember_rec) +{ + uint8_t rate; + + /* Validate rate if supplied */ + if ((p_sa_mad->comp_mask & IB_MCR_COMPMASK_RATE_SEL) && + (p_sa_mad->comp_mask & IB_MCR_COMPMASK_RATE)) { + rate = (uint8_t) (p_recvd_mcmember_rec->rate & 0x3F); + return ib_rate_is_valid(rate); + } + return 1; +} + +static int mtu_is_valid(IN const ib_sa_mad_t *p_sa_mad, + IN const ib_member_rec_t *p_recvd_mcmember_rec) +{ + uint8_t mtu; + + /* Validate MTU if supplied */ + if ((p_sa_mad->comp_mask & IB_MCR_COMPMASK_MTU_SEL) && + (p_sa_mad->comp_mask & IB_MCR_COMPMASK_MTU)) { + mtu = (uint8_t) (p_recvd_mcmember_rec->mtu & 0x3F); + return ib_mtu_is_valid(mtu); + } + return 1; +} + +void osm_mcmr_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + ib_sa_mad_t *p_sa_mad; + ib_member_rec_t *p_recvd_mcmember_rec; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_mcmember_rec = + (ib_member_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MCMEMBER_RECORD); + + switch (p_sa_mad->method) { + case IB_MAD_METHOD_SET: + if (!check_join_comp_mask(p_sa_mad->comp_mask)) { + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B18: " + "component mask = 0x%016" PRIx64 ", " + "expected comp mask = 0x%016" PRIx64 ", " + "MGID: %s for PortGID: %s\n", + cl_ntoh64(p_sa_mad->comp_mask), + CL_NTOH64(JOIN_MC_COMP_MASK), + inet_ntop(AF_INET6, + p_recvd_mcmember_rec->mgid.raw, + gid_str, sizeof gid_str), + inet_ntop(AF_INET6, + p_recvd_mcmember_rec->port_gid.raw, + gid_str2, sizeof gid_str2)); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_INSUF_COMPS); + goto Exit; + } + if (!rate_is_valid(p_sa_mad, p_recvd_mcmember_rec) || + !mtu_is_valid(p_sa_mad, p_recvd_mcmember_rec)) { + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* + * Join or Create Multicast Group + */ + mcmr_rcv_join_mgrp(sa, p_madw); + break; + case IB_MAD_METHOD_DELETE: + if (!check_join_comp_mask(p_sa_mad->comp_mask)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B20: " + "component mask = 0x%016" PRIx64 ", " + "expected comp mask = 0x%016" PRIx64 "\n", + cl_ntoh64(p_sa_mad->comp_mask), + CL_NTOH64(JOIN_MC_COMP_MASK)); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_INSUF_COMPS); + goto Exit; + } + if (!rate_is_valid(p_sa_mad, p_recvd_mcmember_rec) || + !mtu_is_valid(p_sa_mad, p_recvd_mcmember_rec)) { + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* + * Leave Multicast Group + */ + mcmr_rcv_leave_mgrp(sa, p_madw); + break; + case IB_MAD_METHOD_GET: + case IB_MAD_METHOD_GETTABLE: + if (!rate_is_valid(p_sa_mad, p_recvd_mcmember_rec) || + !mtu_is_valid(p_sa_mad, p_recvd_mcmember_rec)) { + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* + * Querying a Multicast Group + */ + mcmr_query_mgrp(sa, p_madw); + break; + default: + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B21: " + "Unsupported Method (%s) for MCMemberRecord request\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + return; +} diff --git a/opensm/osm_sa_mft_record.c b/opensm/osm_sa_mft_record.c new file mode 100644 index 0000000..3f9b14a --- /dev/null +++ b/opensm/osm_sa_mft_record.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mftr_rcv_t. + * This object represents the MulticastForwardingTable Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_MFT_RECORD_C +#include +#include +#include +#include +#include + +#define SA_MFTR_RESP_SIZE SA_ITEM_RESP_SIZE(mft_rec) + +typedef struct osm_mftr_search_ctxt { + const ib_mft_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_mftr_search_ctxt_t; + +static ib_api_status_t mftr_rcv_new_mftr(IN osm_sa_t * sa, + IN osm_switch_t * p_sw, + IN cl_qlist_t * p_list, + IN ib_net16_t lid, IN uint16_t block, + IN uint8_t position) +{ + osm_sa_item_t *p_rec_item; + ib_api_status_t status = IB_SUCCESS; + uint16_t position_block_num; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_MFTR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A02: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New MulticastForwardingTable: sw 0x%016" PRIx64 + "\n\t\t\t\tblock %u position %u lid %u\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), + block, position, cl_ntoh16(lid)); + + position_block_num = ((uint16_t) position << 12) | + (block & IB_MCAST_BLOCK_ID_MASK_HO); + + memset(p_rec_item, 0, SA_MFTR_RESP_SIZE); + + p_rec_item->resp.mft_rec.lid = lid; + p_rec_item->resp.mft_rec.position_block_num = cl_hton16(position_block_num); + + /* copy the mft block */ + osm_switch_get_mft_block(p_sw, block, position, p_rec_item->resp.mft_rec.mft); + + cl_qlist_insert_tail(p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void mftr_rcv_by_comp_mask(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + const osm_mftr_search_ctxt_t *p_ctxt = cxt; + osm_switch_t *p_sw = (osm_switch_t *) p_map_item; + const ib_mft_record_t *const p_rcvd_rec = p_ctxt->p_rcvd_rec; + osm_sa_t *sa = p_ctxt->sa; + ib_net64_t const comp_mask = p_ctxt->comp_mask; + const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp; + osm_port_t *p_port; + uint16_t min_lid_ho, max_lid_ho; + uint16_t position_block_num_ho; + uint16_t min_block, max_block, block; + const osm_physp_t *p_physp; + uint8_t min_position, max_position, position; + + /* In switches, the port guid is the node guid. */ + p_port = + osm_get_port_by_guid(sa->p_subn, p_sw->p_node->node_info.port_guid); + if (!p_port) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A05: " + "Failed to find Port by Node Guid:0x%016" PRIx64 + "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid)); + return; + } + + /* check that the requester physp and the current physp are under + the same partition. */ + p_physp = p_port->p_physp; + if (!p_physp) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A06: " + "Failed to find default physical Port by Node Guid:0x%016" + PRIx64 "\n", + cl_ntoh64(p_sw->p_node->node_info.node_guid)); + return; + } + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_physp, + sa->p_subn->opt.allow_both_pkeys)) + return; + + /* get the port 0 of the switch */ + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + + /* compare the lids - if required */ + if (comp_mask & IB_MFTR_COMPMASK_LID) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Comparing lid:%u to port lid range: %u .. %u\n", + cl_ntoh16(p_rcvd_rec->lid), min_lid_ho, max_lid_ho); + /* ok we are ready for range check */ + if (min_lid_ho > cl_ntoh16(p_rcvd_rec->lid) || + max_lid_ho < cl_ntoh16(p_rcvd_rec->lid)) + return; + } + + if (!osm_switch_supports_mcast(p_sw)) + return; + + /* Are there any blocks in use ? */ + if (osm_switch_get_mft_max_block_in_use(p_sw) == -1) + return; + + position_block_num_ho = cl_ntoh16(p_rcvd_rec->position_block_num); + + /* now we need to decide which blocks to output */ + if (comp_mask & IB_MFTR_COMPMASK_BLOCK) { + max_block = min_block = + position_block_num_ho & IB_MCAST_BLOCK_ID_MASK_HO; + if (max_block > osm_switch_get_mft_max_block_in_use(p_sw)) + return; + } else { + /* use as many blocks as needed */ + min_block = 0; + max_block = osm_switch_get_mft_max_block_in_use(p_sw); + } + + /* need to decide which positions to output */ + if (comp_mask & IB_MFTR_COMPMASK_POSITION) { + min_position = max_position = + (position_block_num_ho & 0xF000) >> 12; + if (max_position > osm_switch_get_mft_max_position(p_sw)) + return; + } else { + /* use as many positions as needed */ + min_position = 0; + max_position = osm_switch_get_mft_max_position(p_sw); + } + + /* so we can add these one by one ... */ + for (block = min_block; block <= max_block; block++) + for (position = min_position; position <= max_position; + position++) + mftr_rcv_new_mftr(sa, p_sw, p_ctxt->p_list, + osm_port_get_base_lid(p_port), block, + position); +} + +void osm_mftr_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_rcvd_mad; + const ib_mft_record_t *p_rcvd_rec; + cl_qlist_t rec_list; + osm_mftr_search_ctxt_t context; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = (ib_mft_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + + CL_ASSERT(p_rcvd_mad->attr_id == IB_MAD_ATTR_MFT_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_rcvd_mad->method != IB_MAD_METHOD_GET && + p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A08: " + "Unsupported Method (%s) for MFTRecord request\n", + ib_get_sa_method_str(p_rcvd_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A07: " + "Cannot find requester physical port\n"); + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.sa = sa; + context.p_req_physp = p_req_physp; + + /* Go over all switches */ + cl_qmap_apply_func(&sa->p_subn->sw_guid_tbl, mftr_rcv_by_comp_mask, + &context); + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_mft_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_multipath_record.c b/opensm/osm_sa_multipath_record.c new file mode 100644 index 0000000..0c4d94e --- /dev/null +++ b/opensm/osm_sa_multipath_record.c @@ -0,0 +1,1689 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_mpr_rcv_t. + * This object represents the MultiPath Record Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_MULTIPATH_RECORD_C +#include +#include +#include +#include +#include +#include +#include +#include + +#define OSM_SA_MPR_MAX_NUM_PATH 127 +#define MAX_HOPS 64 + +#define SA_MPR_RESP_SIZE SA_ITEM_RESP_SIZE(mpr_rec) + +static boolean_t sa_multipath_rec_is_tavor_port(IN const osm_port_t * p_port) +{ + osm_node_t const *p_node; + ib_net32_t vend_id; + + p_node = p_port->p_node; + vend_id = ib_node_info_get_vendor_id(&p_node->node_info); + + return ((p_node->node_info.device_id == CL_HTON16(23108)) && + ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || + (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) || + (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || + (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE)))); +} + +static boolean_t +sa_multipath_rec_apply_tavor_mtu_limit(IN const ib_multipath_rec_t * p_mpr, + IN const osm_port_t * p_src_port, + IN const osm_port_t * p_dest_port, + IN const ib_net64_t comp_mask) +{ + uint8_t required_mtu; + + /* only if at least one of the ports is a Tavor device */ + if (!sa_multipath_rec_is_tavor_port(p_src_port) && + !sa_multipath_rec_is_tavor_port(p_dest_port)) + return FALSE; + + /* + we can apply the patch if either: + 1. No MTU required + 2. Required MTU < + 3. Required MTU = 1K or 512 or 256 + 4. Required MTU > 256 or 512 + */ + required_mtu = ib_multipath_rec_mtu(p_mpr); + if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) && + (comp_mask & IB_MPR_COMPMASK_MTU)) { + switch (ib_multipath_rec_mtu_sel(p_mpr)) { + case 0: /* must be greater than */ + case 2: /* exact match */ + if (IB_MTU_LEN_1024 < required_mtu) + return FALSE; + break; + + case 1: /* must be less than */ + /* can't be disqualified by this one */ + break; + + case 3: /* largest available */ + /* the ULP intentionally requested */ + /* the largest MTU possible */ + return FALSE; + break; + + default: + /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ + CL_ASSERT(FALSE); + break; + } + } + + return TRUE; +} + +static ib_api_status_t mpr_rcv_get_path_parms(IN osm_sa_t * sa, + IN const ib_multipath_rec_t * + p_mpr, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const uint16_t src_lid_ho, + IN const uint16_t dest_lid_ho, + IN const ib_net64_t comp_mask, + OUT osm_path_parms_t * p_parms) +{ + const osm_node_t *p_node; + const osm_physp_t *p_physp, *p_physp0; + const osm_physp_t *p_src_physp; + const osm_physp_t *p_dest_physp; + const osm_prtn_t *p_prtn = NULL; + const ib_port_info_t *p_pi, *p_pi0; + ib_slvl_table_t *p_slvl_tbl; + ib_api_status_t status = IB_SUCCESS; + uint8_t mtu; + uint8_t rate, p0_extended_rate, dest_rate; + uint8_t pkt_life; + uint8_t required_mtu; + uint8_t required_rate; + ib_net16_t required_pkey; + uint8_t required_sl; + uint8_t required_pkt_life; + ib_net16_t dest_lid; + int hops = 0; + int in_port_num = 0; + uint8_t i; + osm_qos_level_t *p_qos_level = NULL; + uint16_t valid_sl_mask = 0xffff; + int extended, p0_extended; + + OSM_LOG_ENTER(sa->p_log); + + dest_lid = cl_hton16(dest_lid_ho); + + p_dest_physp = p_dest_alias_guid->p_base_port->p_physp; + p_physp = p_src_alias_guid->p_base_port->p_physp; + p_src_physp = p_physp; + p_pi = &p_physp->port_info; + + mtu = ib_port_info_get_mtu_cap(p_pi); + extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + rate = ib_port_info_compute_rate(p_pi, extended); + + /* + Mellanox Tavor device performance is better using 1K MTU. + If required MTU and MTU selector are such that 1K is OK + and at least one end of the path is Tavor we override the + port MTU with 1K. + */ + if (sa->p_subn->opt.enable_quirks && + sa_multipath_rec_apply_tavor_mtu_limit(p_mpr, + p_src_alias_guid->p_base_port, + p_dest_alias_guid->p_base_port, + comp_mask)) + if (mtu > IB_MTU_LEN_1024) { + mtu = IB_MTU_LEN_1024; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Optimized Path MTU to 1K for Mellanox Tavor device\n"); + } + + /* + Walk the subnet object from source to destination, + tracking the most restrictive rate and mtu values along the way... + + If source port node is a switch, then p_physp should + point to the port that routes the destination lid + */ + + p_node = osm_physp_get_node_ptr(p_physp); + + if (p_node->sw) { + /* + * Source node is a switch. + * Make sure that p_physp points to the out port of the + * switch that routes to the destination lid (dest_lid_ho) + */ + p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); + if (p_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: " + "Can't find routing from LID %u to LID %u on " + "switch %s (GUID 0x%016" PRIx64 ")\n", + src_lid_ho, dest_lid_ho, p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node))); + status = IB_NOT_FOUND; + goto Exit; + } + } + + if (sa->p_subn->opt.qos) { + + /* + * Whether this node is switch or CA, the IN port for + * the sl2vl table is 0, because this is a source node. + */ + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); + + /* update valid SLs that still exist on this route */ + for (i = 0; i < IB_MAX_NUM_VLS; i++) { + if (valid_sl_mask & (1 << i) && + ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL) + valid_sl_mask &= ~(1 << i); + } + if (!valid_sl_mask) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "All the SLs lead to VL15 on this path\n"); + status = IB_NOT_FOUND; + goto Exit; + } + } + + /* + * Same as above + */ + p_node = osm_physp_get_node_ptr(p_dest_physp); + + if (p_node->sw) { + /* + * if destination is switch, we want p_dest_physp to point to port 0 + */ + p_dest_physp = + osm_switch_get_route_by_lid(p_node->sw, dest_lid); + + if (p_dest_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: " + "Can't find routing from LID %u to LID %u on " + "switch %s (GUID 0x%016" PRIx64 ")\n", + src_lid_ho, dest_lid_ho, p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node))); + status = IB_NOT_FOUND; + goto Exit; + } + + } + + /* + * Now go through the path step by step + */ + + while (p_physp != p_dest_physp) { + + int tmp_pnum = p_physp->port_num; + p_node = osm_physp_get_node_ptr(p_physp); + p_physp = osm_physp_get_remote(p_physp); + + if (p_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: " + "Can't find remote phys port of %s (GUID " + "0x%016" PRIx64 ") port %d " + "while routing from LID %u to LID %u", + p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node)), + tmp_pnum, src_lid_ho, dest_lid_ho); + status = IB_ERROR; + goto Exit; + } + + /* update number of hops traversed */ + hops++; + if (hops > MAX_HOPS) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4520: " + "Path from GUID 0x%016" PRIx64 " (%s) to" + " lid %u GUID 0x%016" PRIx64 " (%s) needs" + " more than %d hops, max %d hops allowed\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, dest_lid_ho, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, hops, + MAX_HOPS); + status = IB_NOT_FOUND; + goto Exit; + } + + in_port_num = osm_physp_get_port_num(p_physp); + + /* + This is point to point case (no switch in between) + */ + if (p_physp == p_dest_physp) + break; + + p_node = osm_physp_get_node_ptr(p_physp); + + if (!p_node->sw) { + /* + There is some sort of problem in the subnet object! + If this isn't a switch, we should have reached + the destination by now! + */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: " + "Internal error, bad path while routing " + "from %s (GUID: 0x%016"PRIx64") port %d " + "to %s (GUID: 0x%016"PRIx64") port %d; " + "ended at %s port %d\n", + p_src_alias_guid->p_base_port->p_node->print_desc, + cl_ntoh64(p_src_alias_guid->p_base_port->p_node->node_info.node_guid), + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + cl_ntoh64(p_dest_alias_guid->p_base_port->p_node->node_info.node_guid), + p_dest_alias_guid->p_base_port->p_physp->port_num, + p_node->print_desc, + p_physp->port_num); + status = IB_ERROR; + goto Exit; + } + + /* + Check parameters for the ingress port in this switch. + */ + p_pi = &p_physp->port_info; + + if (mtu > ib_port_info_get_mtu_cap(p_pi)) + mtu = ib_port_info_get_mtu_cap(p_pi); + + p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0); + p_pi0 = &p_physp0->port_info; + p0_extended = p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended); + if (ib_path_compare_rates(rate, p0_extended_rate) > 0) + rate = p0_extended_rate; + + /* + Continue with the egress port on this switch. + */ + p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); + if (p_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: " + "Dead end path on switch " + "%s (GUID: 0x%016"PRIx64") to LID %u\n", + p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node)), + dest_lid_ho); + status = IB_ERROR; + goto Exit; + } + + p_pi = &p_physp->port_info; + + if (mtu > ib_port_info_get_mtu_cap(p_pi)) + mtu = ib_port_info_get_mtu_cap(p_pi); + + p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended); + if (ib_path_compare_rates(rate, p0_extended_rate) > 0) + rate = p0_extended_rate; + + if (sa->p_subn->opt.qos) { + /* + * Check SL2VL table of the switch and update valid SLs + */ + p_slvl_tbl = + osm_physp_get_slvl_tbl(p_physp, in_port_num); + for (i = 0; i < IB_MAX_NUM_VLS; i++) { + if (valid_sl_mask & (1 << i) && + ib_slvl_table_get(p_slvl_tbl, + i) == IB_DROP_VL) + valid_sl_mask &= ~(1 << i); + } + if (!valid_sl_mask) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "All the SLs lead to VL15 " + "on this path\n"); + status = IB_NOT_FOUND; + goto Exit; + } + } + } + + /* + p_physp now points to the destination + */ + p_pi = &p_physp->port_info; + + if (mtu > ib_port_info_get_mtu_cap(p_pi)) + mtu = ib_port_info_get_mtu_cap(p_pi); + + extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + dest_rate = ib_port_info_compute_rate(p_pi, extended); + if (ib_path_compare_rates(rate, dest_rate) > 0) + rate = dest_rate; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Path min MTU = %u, min rate = %u\n", mtu, rate); + + /* + * Get QoS Level object according to the MultiPath request + * and adjust MultiPath parameters according to QoS settings + */ + if (sa->p_subn->opt.qos && sa->p_subn->p_qos_policy && + (p_qos_level = + osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy, + p_mpr, p_src_physp, + p_dest_physp, comp_mask))) { + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "MultiPathRecord request matches QoS Level '%s' (%s)\n", + p_qos_level->name, + p_qos_level->use ? p_qos_level->use : "no description"); + + if (p_qos_level->mtu_limit_set + && (mtu > p_qos_level->mtu_limit)) + mtu = p_qos_level->mtu_limit; + + if (p_qos_level->rate_limit_set + && (ib_path_compare_rates(rate, p_qos_level->rate_limit) > 0)) + rate = p_qos_level->rate_limit; + + if (p_qos_level->sl_set) { + required_sl = p_qos_level->sl; + if (!(valid_sl_mask & (1 << required_sl))) { + status = IB_NOT_FOUND; + goto Exit; + } + } + } + + /* + Determine if these values meet the user criteria + */ + + /* we silently ignore cases where only the MTU selector is defined */ + if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) && + (comp_mask & IB_MPR_COMPMASK_MTU)) { + required_mtu = ib_multipath_rec_mtu(p_mpr); + switch (ib_multipath_rec_mtu_sel(p_mpr)) { + case 0: /* must be greater than */ + if (mtu <= required_mtu) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if (mtu >= required_mtu) { + /* adjust to use the highest mtu + lower then the required one */ + if (required_mtu > 1) + mtu = required_mtu - 1; + else + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if (mtu < required_mtu) + status = IB_NOT_FOUND; + else + mtu = required_mtu; + break; + + case 3: /* largest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ + CL_ASSERT(FALSE); + status = IB_ERROR; + break; + } + } + if (status != IB_SUCCESS) + goto Exit; + + /* we silently ignore cases where only the Rate selector is defined */ + if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) && + (comp_mask & IB_MPR_COMPMASK_RATE)) { + required_rate = ib_multipath_rec_rate(p_mpr); + switch (ib_multipath_rec_rate_sel(p_mpr)) { + case 0: /* must be greater than */ + if (ib_path_compare_rates(rate, required_rate) <= 0) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if (ib_path_compare_rates(rate, required_rate) >= 0) { + /* adjust the rate to use the highest rate + lower then the required one */ + rate = ib_path_rate_get_prev(required_rate); + if (!rate) + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if (ib_path_compare_rates(rate, required_rate)) + status = IB_NOT_FOUND; + else + rate = required_rate; + break; + + case 3: /* largest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ + CL_ASSERT(FALSE); + status = IB_ERROR; + break; + } + } + if (status != IB_SUCCESS) + goto Exit; + + /* Verify the pkt_life_time */ + /* According to spec definition IBA 1.2 Table 205 PacketLifeTime description, + for loopback paths, packetLifeTime shall be zero. */ + if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port) + pkt_life = 0; /* loopback */ + else if (p_qos_level && p_qos_level->pkt_life_set) + pkt_life = p_qos_level->pkt_life; + else + pkt_life = sa->p_subn->opt.subnet_timeout; + + /* we silently ignore cases where only the PktLife selector is defined */ + if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) && + (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) { + required_pkt_life = ib_multipath_rec_pkt_life(p_mpr); + switch (ib_multipath_rec_pkt_life_sel(p_mpr)) { + case 0: /* must be greater than */ + if (pkt_life <= required_pkt_life) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if (pkt_life >= required_pkt_life) { + /* adjust the lifetime to use the highest possible + lower then the required one */ + if (required_pkt_life > 1) + pkt_life = required_pkt_life - 1; + else + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if (pkt_life < required_pkt_life) + status = IB_NOT_FOUND; + else + pkt_life = required_pkt_life; + break; + + case 3: /* smallest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ + CL_ASSERT(FALSE); + status = IB_ERROR; + break; + } + } + + if (status != IB_SUCCESS) + goto Exit; + + /* + * set Pkey for this MultiPath record request + */ + + if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC && + cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31)) + required_pkey = + osm_physp_find_common_pkey(p_src_physp, p_dest_physp, + sa->p_subn->opt.allow_both_pkeys); + + else if (comp_mask & IB_MPR_COMPMASK_PKEY) { + /* + * MPR request has a specific pkey: + * Check that source and destination share this pkey. + * If QoS level has pkeys, check that this pkey exists + * in the QoS level pkeys. + * MPR returned pkey is the requested pkey. + */ + required_pkey = p_mpr->pkey; + if (!osm_physp_share_this_pkey + (p_src_physp, p_dest_physp, required_pkey, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: " + "Ports src 0x%016"PRIx64" (%s port %d) " + "and dst 0x%016"PRIx64" (%s port %d) " + "do not share the specified PKey 0x%04x\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_physp->port_num, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_physp->port_num, + cl_ntoh16(required_pkey)); + status = IB_NOT_FOUND; + goto Exit; + } + if (p_qos_level && p_qos_level->pkey_range_len && + !osm_qos_level_has_pkey(p_qos_level, required_pkey)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: " + "Ports src 0x%016"PRIx64" (%s port %d) " + "and dst 0x%016"PRIx64" (%s port %d) " + "do not share specified PKey (0x%04x) as " + "defined by QoS level \"%s\"\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_physp->port_num, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_physp->port_num, + cl_ntoh16(required_pkey), + p_qos_level->name); + status = IB_NOT_FOUND; + goto Exit; + } + + } else if (p_qos_level && p_qos_level->pkey_range_len) { + /* + * MPR request doesn't have a specific pkey, but QoS level + * has pkeys - get shared pkey from QoS level pkeys + */ + required_pkey = osm_qos_level_get_shared_pkey(p_qos_level, + p_src_physp, + p_dest_physp, + sa->p_subn->opt.allow_both_pkeys); + if (!required_pkey) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: " + "Ports src 0x%016"PRIx64" (%s port %d) " + "and dst 0x%016"PRIx64" (%s port %d) " + "do not share a PKey as defined by QoS " + "level \"%s\"\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_physp->port_num, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_physp->port_num, + p_qos_level->name); + status = IB_NOT_FOUND; + goto Exit; + } + + } else { + /* + * Neither MPR request nor QoS level have pkey. + * Just get any shared pkey. + */ + required_pkey = + osm_physp_find_common_pkey(p_src_physp, p_dest_physp, + sa->p_subn->opt.allow_both_pkeys); + if (!required_pkey) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: " + "Ports src 0x%016"PRIx64" (%s port %d) " + "and dst 0x%016"PRIx64" (%s port %d) " + "do not have any shared PKeys\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_physp->port_num, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + } + + if (required_pkey) { + p_prtn = + (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, + required_pkey & + cl_ntoh16((uint16_t) ~ 0x8000)); + if (p_prtn == + (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl)) + p_prtn = NULL; + } + + /* + * Set MultiPathRecord SL. + */ + + if (comp_mask & IB_MPR_COMPMASK_SL) { + /* + * Specific SL was requested + */ + required_sl = ib_multipath_rec_sl(p_mpr); + + if (p_qos_level && p_qos_level->sl_set && + p_qos_level->sl != required_sl) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: " + "QoS constraints: required MultiPathRecord SL " + "(%u) doesn't match QoS policy \"%s\" SL (%u) " + "[%s port %d <-> %s port %d]\n", required_sl, + p_qos_level->name, + p_qos_level->sl, + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + + } else if (p_qos_level && p_qos_level->sl_set) { + /* + * No specific SL was requested, + * but there is an SL in QoS level. + */ + required_sl = p_qos_level->sl; + + if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl) + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "QoS level SL (%u) overrides partition SL (%u)\n", + p_qos_level->sl, p_prtn->sl); + + } else if (required_pkey) { + /* + * No specific SL in request or in QoS level - use partition SL + */ + p_prtn = + (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, + required_pkey & + cl_ntoh16((uint16_t) ~ 0x8000)); + if (!p_prtn) { + required_sl = OSM_DEFAULT_SL; + /* this may be possible when pkey tables are created somehow in + previous runs or things are going wrong here */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: " + "No partition found for PKey 0x%04x - " + "using default SL %d " + "[%s port %d <-> %s port %d]\n", + cl_ntoh16(required_pkey), required_sl, + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + } else + required_sl = p_prtn->sl; + + } else if (sa->p_subn->opt.qos) { + if (valid_sl_mask & (1 << OSM_DEFAULT_SL)) + required_sl = OSM_DEFAULT_SL; + else { + for (i = 0; i < IB_MAX_NUM_VLS; i++) + if (valid_sl_mask & (1 << i)) + break; + required_sl = i; + } + } else + required_sl = OSM_DEFAULT_SL; + + if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: " + "Selected SL (%u) leads to VL15 " + "[%s port %d <-> %s port %d]\n", + required_sl, + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + + /* reset pkey when raw traffic */ + if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC && + cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31)) + required_pkey = 0; + + p_parms->mtu = mtu; + p_parms->rate = rate; + p_parms->pkey = required_pkey; + p_parms->pkt_life = pkt_life; + p_parms->sl = required_sl; + p_parms->hops = hops; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:" + " mtu = %u, rate = %u, packet lifetime = %u," + " pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate, + pkt_life, cl_ntoh16(required_pkey), required_sl, hops); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void mpr_rcv_build_pr(IN osm_sa_t * sa, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN uint16_t src_lid_ho, IN uint16_t dest_lid_ho, + IN uint8_t preference, + IN const osm_path_parms_t * p_parms, + OUT ib_path_rec_t * p_pr) +{ + const osm_physp_t *p_src_physp, *p_dest_physp; + uint8_t rate, new_rate; + + OSM_LOG_ENTER(sa->p_log); + + p_src_physp = p_src_alias_guid->p_base_port->p_physp; + p_dest_physp = p_dest_alias_guid->p_base_port->p_physp; + + p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp); + p_pr->dgid.unicast.interface_id = p_dest_alias_guid->alias_guid; + + p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp); + p_pr->sgid.unicast.interface_id = p_src_alias_guid->alias_guid; + + p_pr->dlid = cl_hton16(dest_lid_ho); + p_pr->slid = cl_hton16(src_lid_ho); + + p_pr->hop_flow_raw &= cl_hton32(1 << 31); + + p_pr->pkey = p_parms->pkey; + ib_path_rec_set_qos_class(p_pr, 0); + ib_path_rec_set_sl(p_pr, p_parms->sl); + p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80); + rate = p_parms->rate; + if (sa->p_subn->opt.use_original_extended_sa_rates_only) { + new_rate = ib_path_rate_max_12xedr(rate); + if (new_rate != rate) { + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Rate decreased from %u to %u\n", + rate, new_rate); + rate = new_rate; + } + } else if (rate >= IB_PATH_RECORD_RATE_28_GBS) { + /* + * If one of the new 2x or HDR rates, make sure that + * src (and dest if reversible) ports support this + */ + rate = ib_path_rate_2x_hdr_fixups(&p_src_physp->port_info, rate); + if (p_parms->reversible) + rate = ib_path_rate_2x_hdr_fixups(&p_dest_physp->port_info, rate); + } + p_pr->rate = (uint8_t) (rate | 0x80); + + /* According to 1.2 spec definition Table 205 PacketLifeTime description, + for loopback paths, packetLifeTime shall be zero. */ + if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port) + p_pr->pkt_life = 0x80; /* loopback */ + else + p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80); + + p_pr->preference = preference; + + /* always return num_path = 0 so this is only the reversible component */ + if (p_parms->reversible) + p_pr->num_path = 0x80; + + OSM_LOG_EXIT(sa->p_log); +} + +static osm_sa_item_t *mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa, + IN const ib_multipath_rec_t * + p_mpr, + IN const osm_alias_guid_t * + p_src_alias_guid, + IN const osm_alias_guid_t * + p_dest_alias_guid, + IN const uint16_t src_lid_ho, + IN const uint16_t dest_lid_ho, + IN const ib_net64_t comp_mask, + IN const uint8_t preference) +{ + osm_path_parms_t path_parms; + osm_path_parms_t rev_path_parms; + osm_sa_item_t *p_pr_item; + ib_api_status_t status, rev_path_status; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n", + src_lid_ho, dest_lid_ho); + + p_pr_item = malloc(SA_MPR_RESP_SIZE); + if (p_pr_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4501: " + "Unable to allocate path record\n"); + goto Exit; + } + memset(p_pr_item, 0, SA_MPR_RESP_SIZE); + + status = mpr_rcv_get_path_parms(sa, p_mpr, p_src_alias_guid, + p_dest_alias_guid, + src_lid_ho, dest_lid_ho, + comp_mask, &path_parms); + + if (status != IB_SUCCESS) { + free(p_pr_item); + p_pr_item = NULL; + goto Exit; + } + + /* now try the reversible path */ + rev_path_status = mpr_rcv_get_path_parms(sa, p_mpr, p_dest_alias_guid, + p_src_alias_guid, + dest_lid_ho, src_lid_ho, + comp_mask, &rev_path_parms); + path_parms.reversible = (rev_path_status == IB_SUCCESS); + + /* did we get a Reversible Path compmask ? */ + /* + NOTE that if the reversible component = 0, it is a don't care + rather then requiring non-reversible paths ... + see Vol1 Ver1.2 p900 l16 + */ + if (comp_mask & IB_MPR_COMPMASK_REVERSIBLE) { + if ((!path_parms.reversible && (p_mpr->num_path & 0x80))) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requested reversible path but failed to get one\n"); + + free(p_pr_item); + p_pr_item = NULL; + goto Exit; + } + } + + p_pr_item->resp.mpr_rec.p_src_port = p_src_alias_guid->p_base_port; + p_pr_item->resp.mpr_rec.p_dest_port = p_dest_alias_guid->p_base_port; + p_pr_item->resp.mpr_rec.hops = path_parms.hops; + + mpr_rcv_build_pr(sa, p_src_alias_guid, p_dest_alias_guid, src_lid_ho, + dest_lid_ho, preference, &path_parms, + &p_pr_item->resp.mpr_rec.path_rec); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return p_pr_item; +} + +static uint32_t mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa, + IN const ib_multipath_rec_t * p_mpr, + IN const osm_port_t * p_req_port, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const uint32_t rem_paths, + IN const ib_net64_t comp_mask, + IN cl_qlist_t * p_list) +{ + osm_sa_item_t *p_pr_item; + uint16_t src_lid_min_ho; + uint16_t src_lid_max_ho; + uint16_t dest_lid_min_ho; + uint16_t dest_lid_max_ho; + uint16_t src_lid_ho; + uint16_t dest_lid_ho; + uint32_t path_num = 0; + uint8_t preference; + unsigned src_offset, dest_offset; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n", + cl_ntoh64(p_src_alias_guid->alias_guid), + cl_ntoh64(p_dest_alias_guid->alias_guid)); + + /* Check that the req_port, src_port and dest_port all share a + pkey. The check is done on the default physical port of the ports. */ + if (osm_port_share_pkey(sa->p_log, p_req_port, + p_src_alias_guid->p_base_port, + sa->p_subn->opt.allow_both_pkeys) == FALSE + || osm_port_share_pkey(sa->p_log, p_req_port, + p_dest_alias_guid->p_base_port, + sa->p_subn->opt.allow_both_pkeys) == FALSE + || osm_port_share_pkey(sa->p_log, p_src_alias_guid->p_base_port, + p_dest_alias_guid->p_base_port, + sa->p_subn->opt.allow_both_pkeys) == FALSE) + /* One of the pairs doesn't share a pkey so the path is disqualified. */ + goto Exit; + + /* + We shouldn't be here if the paths are disqualified in some way... + Thus, we assume every possible connection is valid. + + We desire to return high-quality paths first. + In OpenSM, higher quality mean least overlap with other paths. + This is acheived in practice by returning paths with + different LID value on each end, which means these + paths are more redundant that paths with the same LID repeated + on one side. For example, in OpenSM the paths between two + endpoints with LMC = 1 might be as follows: + + Port A, LID 1 <-> Port B, LID 3 + Port A, LID 1 <-> Port B, LID 4 + Port A, LID 2 <-> Port B, LID 3 + Port A, LID 2 <-> Port B, LID 4 + + The OpenSM unicast routing algorithms attempt to disperse each path + to as varied a physical path as is reasonable. 1<->3 and 1<->4 have + more physical overlap (hence less redundancy) than 1<->3 and 2<->4. + + OpenSM ranks paths in three preference groups: + + Preference Value Description + ---------------- ------------------------------------------- + 0 Redundant in both directions with other + pref value = 0 paths + + 1 Redundant in one direction with other + pref value = 0 and pref value = 1 paths + + 2 Not redundant in either direction with + other paths + + 3-FF Unused + + SA clients don't need to know these details, only that the lower + preference paths are preferred, as stated in the spec. The paths + may not actually be physically redundant depending on the topology + of the subnet, but the point of LMC > 0 is to offer redundancy, + so I assume the subnet is physically appropriate for the specified + LMC value. A more advanced implementation could inspect for physical + redundancy, but I'm not going to bother with that now. + */ + + osm_port_get_lid_range_ho(p_src_alias_guid->p_base_port, + &src_lid_min_ho, &src_lid_max_ho); + osm_port_get_lid_range_ho(p_dest_alias_guid->p_base_port, + &dest_lid_min_ho, &dest_lid_max_ho); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID [%u-%u], Dest LID [%u-%u]\n", + src_lid_min_ho, src_lid_max_ho, + dest_lid_min_ho, dest_lid_max_ho); + + src_lid_ho = src_lid_min_ho; + dest_lid_ho = dest_lid_min_ho; + + /* + Preferred paths come first in OpenSM + */ + preference = 0; + + while (path_num < rem_paths) { + /* + These paths are "fully redundant" + */ + p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr, + p_src_alias_guid, + p_dest_alias_guid, + src_lid_ho, dest_lid_ho, + comp_mask, preference); + + if (p_pr_item) { + cl_qlist_insert_tail(p_list, &p_pr_item->list_item); + ++path_num; + } + + if (++src_lid_ho > src_lid_max_ho) + break; + + if (++dest_lid_ho > dest_lid_max_ho) + break; + } + + /* + Check if we've accumulated all the paths that the user cares to see + */ + if (path_num == rem_paths) + goto Exit; + + /* + Don't bother reporting preference 1 paths for now. + It's more trouble than it's worth and can only occur + if ports have different LMC values, which isn't supported + by OpenSM right now anyway. + */ + preference = 2; + src_lid_ho = src_lid_min_ho; + dest_lid_ho = dest_lid_min_ho; + src_offset = 0; + dest_offset = 0; + + /* + Iterate over the remaining paths + */ + while (path_num < rem_paths) { + dest_offset++; + dest_lid_ho++; + + if (dest_lid_ho > dest_lid_max_ho) { + src_offset++; + src_lid_ho++; + + if (src_lid_ho > src_lid_max_ho) + break; /* done */ + + dest_offset = 0; + dest_lid_ho = dest_lid_min_ho; + } + + /* + These paths are "fully non-redundant" with paths already + identified above and consequently not of much value. + + Don't return paths we already identified above, as indicated + by the offset values being equal. + */ + if (src_offset == dest_offset) + continue; /* already reported */ + + p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr, + p_src_alias_guid, + p_dest_alias_guid, + src_lid_ho, dest_lid_ho, + comp_mask, preference); + + if (p_pr_item) { + cl_qlist_insert_tail(p_list, &p_pr_item->list_item); + ++path_num; + } + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + return path_num; +} + +#undef min +#define min(x,y) (((x) < (y)) ? (x) : (y)) + +static osm_sa_item_t *mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa, + IN const + ib_multipath_rec_t * + p_mpr, + IN const osm_alias_guid_t * + p_src_alias_guid, + IN const osm_alias_guid_t * + p_dest_alias_guid, + IN int base_offs, + IN const ib_net64_t + comp_mask, + IN cl_qlist_t * p_list) +{ + osm_sa_item_t *p_pr_item = 0; + uint16_t src_lid_min_ho; + uint16_t src_lid_max_ho; + uint16_t dest_lid_min_ho; + uint16_t dest_lid_max_ho; + uint16_t src_lid_ho; + uint16_t dest_lid_ho; + unsigned iterations; + int src_lids, dest_lids; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", " + "Dst port 0x%016" PRIx64 ", base offs %d\n", + cl_ntoh64(p_src_alias_guid->alias_guid), + cl_ntoh64(p_dest_alias_guid->alias_guid), + base_offs); + + osm_port_get_lid_range_ho(p_src_alias_guid->p_base_port, + &src_lid_min_ho, &src_lid_max_ho); + osm_port_get_lid_range_ho(p_dest_alias_guid->p_base_port, + &dest_lid_min_ho, &dest_lid_max_ho); + + src_lid_ho = src_lid_min_ho; + dest_lid_ho = dest_lid_min_ho; + + src_lids = src_lid_max_ho - src_lid_min_ho + 1; + dest_lids = dest_lid_max_ho - dest_lid_min_ho + 1; + + src_lid_ho += base_offs % src_lids; + dest_lid_ho += base_offs % dest_lids; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Src LIDs [%u-%u] hashed %u, " + "Dest LIDs [%u-%u] hashed %u\n", + src_lid_min_ho, src_lid_max_ho, src_lid_ho, + dest_lid_min_ho, dest_lid_max_ho, dest_lid_ho); + + iterations = min(src_lids, dest_lids); + + while (iterations--) { + /* + These paths are "fully redundant" + */ + p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr, + p_src_alias_guid, + p_dest_alias_guid, + src_lid_ho, dest_lid_ho, + comp_mask, 0); + + if (p_pr_item) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Found matching path from Src LID %u to Dest LID %u with %d hops\n", + src_lid_ho, dest_lid_ho, p_pr_item->resp.mpr_rec.hops); + break; + } + + if (++src_lid_ho > src_lid_max_ho) + src_lid_ho = src_lid_min_ho; + + if (++dest_lid_ho > dest_lid_max_ho) + dest_lid_ho = dest_lid_min_ho; + } + + OSM_LOG_EXIT(sa->p_log); + return p_pr_item; +} + +static ib_net16_t mpr_rcv_get_gids(IN osm_sa_t * sa, IN const ib_gid_t * gids, + IN int ngids, IN int is_sgid, + OUT osm_alias_guid_t ** pp_alias_guid) +{ + osm_alias_guid_t *p_alias_guid; + ib_net16_t ib_status = IB_SUCCESS; + int i; + + OSM_LOG_ENTER(sa->p_log); + + for (i = 0; i < ngids; i++, gids++) { + if (!ib_gid_is_link_local(gids)) { + if ((is_sgid && ib_gid_is_multicast(gids)) || + (ib_gid_get_subnet_prefix(gids) != + sa->p_subn->opt.subnet_prefix)) { + /* + This 'error' is the client's fault (bad gid) + so don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "ERR 451B: " + "%sGID 0x%016" PRIx64 + " is multicast or non local subnet prefix\n", + is_sgid ? "S" : "D", + cl_ntoh64(gids->unicast.prefix)); + + ib_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + } + + p_alias_guid = + osm_get_alias_guid_by_guid(sa->p_subn, + gids->unicast.interface_id); + if (!p_alias_guid) { + /* + This 'error' is the client's fault (bad gid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4506: " + "No port with GUID 0x%016" PRIx64 "\n", + cl_ntoh64(gids->unicast.interface_id)); + + ib_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + + pp_alias_guid[i] = p_alias_guid; + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + + return ib_status; +} + +static ib_net16_t mpr_rcv_get_end_points(IN osm_sa_t * sa, + IN const osm_madw_t * p_madw, + OUT osm_alias_guid_t ** pp_alias_guids, + OUT int *nsrc, OUT int *ndest) +{ + const ib_multipath_rec_t *p_mpr; + const ib_sa_mad_t *p_sa_mad; + ib_net64_t comp_mask; + ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; + ib_gid_t *gids; + + OSM_LOG_ENTER(sa->p_log); + + /* + Determine what fields are valid and then get a pointer + to the source and destination port objects, if possible. + */ + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + gids = (ib_gid_t *) p_mpr->gids; + + comp_mask = p_sa_mad->comp_mask; + + /* + Check a few easy disqualifying cases up front before getting + into the endpoints. + */ + *nsrc = *ndest = 0; + + if (comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) { + *nsrc = p_mpr->sgid_count; + if (*nsrc > IB_MULTIPATH_MAX_GIDS) + *nsrc = IB_MULTIPATH_MAX_GIDS; + sa_status = mpr_rcv_get_gids(sa, gids, *nsrc, 1, pp_alias_guids); + if (sa_status != IB_SUCCESS) + goto Exit; + } + + if (comp_mask & IB_MPR_COMPMASK_DGIDCOUNT) { + *ndest = p_mpr->dgid_count; + if (*ndest + *nsrc > IB_MULTIPATH_MAX_GIDS) + *ndest = IB_MULTIPATH_MAX_GIDS - *nsrc; + sa_status = + mpr_rcv_get_gids(sa, gids + *nsrc, *ndest, 0, + pp_alias_guids + *nsrc); + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + return sa_status; +} + +#define hash_lids(a, b, lmc) \ + (((((a) >> (lmc)) << 4) | ((b) >> (lmc))) % 103) + +static void mpr_rcv_get_apm_paths(IN osm_sa_t * sa, + IN const ib_multipath_rec_t * p_mpr, + IN const osm_port_t * p_req_port, + IN osm_alias_guid_t ** _pp_alias_guids, + IN const ib_net64_t comp_mask, + IN cl_qlist_t * p_list) +{ + osm_alias_guid_t *pp_alias_guids[4]; + osm_sa_item_t *matrix[2][2]; + int base_offs, src_lid_ho, dest_lid_ho; + int sumA, sumB, minA, minB; + + OSM_LOG_ENTER(sa->p_log); + + /* + * We want to: + * 1. use different lid offsets (from base) for the resultant paths + * to increase the probability of redundant paths or in case + * of Clos - to ensure it (different offset => different spine!) + * 2. keep consistent paths no matter of direction and order of ports + * 3. distibute the lid offsets to balance the load + * So, we sort the ports (within the srcs, and within the dests), + * hash the lids of S0, D0 (after the sort), and call mpr_rcv_get_apm_port_pair_paths + * with base_lid for S0, D0 and base_lid + 1 for S1, D1. This way we will get + * always the same offsets - order independent, and make sure different spines are used. + * Note that the diagonals on a Clos have the same number of hops, so it doesn't + * really matter which diagonal we use. + */ + if (_pp_alias_guids[0]->p_base_port->guid < + _pp_alias_guids[1]->p_base_port->guid) { + pp_alias_guids[0] = _pp_alias_guids[0]; + pp_alias_guids[1] = _pp_alias_guids[1]; + } else { + pp_alias_guids[0] = _pp_alias_guids[1]; + pp_alias_guids[1] = _pp_alias_guids[0]; + } + if (_pp_alias_guids[2]->p_base_port->guid < + _pp_alias_guids[3]->p_base_port->guid) { + pp_alias_guids[2] = _pp_alias_guids[2]; + pp_alias_guids[3] = _pp_alias_guids[3]; + } else { + pp_alias_guids[2] = _pp_alias_guids[3]; + pp_alias_guids[3] = _pp_alias_guids[2]; + } + + src_lid_ho = osm_port_get_base_lid(pp_alias_guids[0]->p_base_port); + dest_lid_ho = osm_port_get_base_lid(pp_alias_guids[2]->p_base_port); + + base_offs = src_lid_ho < dest_lid_ho ? + hash_lids(src_lid_ho, dest_lid_ho, sa->p_subn->opt.lmc) : + hash_lids(dest_lid_ho, src_lid_ho, sa->p_subn->opt.lmc); + + matrix[0][0] = + mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0], + pp_alias_guids[2], base_offs, + comp_mask, p_list); + matrix[0][1] = + mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0], + pp_alias_guids[3], base_offs, + comp_mask, p_list); + matrix[1][0] = + mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1], + pp_alias_guids[2], base_offs + 1, + comp_mask, p_list); + matrix[1][1] = + mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1], + pp_alias_guids[3], base_offs + 1, + comp_mask, p_list); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n" + "\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n" + "\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n", + matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0, + matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0, + matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0, + matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0, + matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0, + matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0, + matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0, + matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid : 0, + matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0, + matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0, + matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0, + matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0); + + sumA = minA = sumB = minB = 0; + + /* check diagonal A {(0,0), (1,1)} */ + if (matrix[0][0]) { + sumA += matrix[0][0]->resp.mpr_rec.hops; + minA = matrix[0][0]->resp.mpr_rec.hops; + } + if (matrix[1][1]) { + sumA += matrix[1][1]->resp.mpr_rec.hops; + if (minA) + minA = min(minA, matrix[1][1]->resp.mpr_rec.hops); + else + minA = matrix[1][1]->resp.mpr_rec.hops; + } + + /* check diagonal B {(0,1), (1,0)} */ + if (matrix[0][1]) { + sumB += matrix[0][1]->resp.mpr_rec.hops; + minB = matrix[0][1]->resp.mpr_rec.hops; + } + if (matrix[1][0]) { + sumB += matrix[1][0]->resp.mpr_rec.hops; + if (minB) + minB = min(minB, matrix[1][0]->resp.mpr_rec.hops); + else + minB = matrix[1][0]->resp.mpr_rec.hops; + } + + /* and the winner is... */ + if (minA <= minB || (minA == minB && sumA < sumB)) { + /* Diag A */ + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Diag {0,0} & {1,1} is the best:\n" + "\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n", + matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0, + matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0, + matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0, + matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0, + matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0, + matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0); + if (matrix[0][0]) + cl_qlist_insert_tail(p_list, &matrix[0][0]->list_item); + if (matrix[1][1]) + cl_qlist_insert_tail(p_list, &matrix[1][1]->list_item); + free(matrix[0][1]); + free(matrix[1][0]); + } else { + /* Diag B */ + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Diag {0,1} & {1,0} is the best:\n" + "\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n", + matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0, + matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0, + matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0, + matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0, + matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid: 0, + matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0); + if (matrix[0][1]) + cl_qlist_insert_tail(p_list, &matrix[0][1]->list_item); + if (matrix[1][0]) + cl_qlist_insert_tail(p_list, &matrix[1][0]->list_item); + free(matrix[0][0]); + free(matrix[1][1]); + } + + OSM_LOG_EXIT(sa->p_log); +} + +static void mpr_rcv_process_pairs(IN osm_sa_t * sa, + IN const ib_multipath_rec_t * p_mpr, + IN osm_port_t * p_req_port, + IN osm_alias_guid_t ** pp_alias_guids, + IN const int nsrc, IN int ndest, + IN ib_net64_t comp_mask, + IN cl_qlist_t * p_list) +{ + osm_alias_guid_t **pp_src_alias_guid, **pp_es; + osm_alias_guid_t **pp_dest_alias_guid, **pp_ed; + uint32_t max_paths, num_paths, total_paths = 0; + + OSM_LOG_ENTER(sa->p_log); + + if (comp_mask & IB_MPR_COMPMASK_NUMBPATH) + max_paths = p_mpr->num_path & 0x7F; + else + max_paths = OSM_SA_MPR_MAX_NUM_PATH; + + for (pp_src_alias_guid = pp_alias_guids, pp_es = pp_alias_guids + nsrc; + pp_src_alias_guid < pp_es; pp_src_alias_guid++) { + for (pp_dest_alias_guid = pp_es, pp_ed = pp_es + ndest; + pp_dest_alias_guid < pp_ed; pp_dest_alias_guid++) { + num_paths = + mpr_rcv_get_port_pair_paths(sa, p_mpr, p_req_port, + *pp_src_alias_guid, + *pp_dest_alias_guid, + max_paths - total_paths, + comp_mask, p_list); + total_paths += num_paths; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "%d paths %d total paths %d max paths\n", + num_paths, total_paths, max_paths); + /* Just take first NumbPaths found */ + if (total_paths >= max_paths) + goto Exit; + } + } + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +void osm_mpr_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + const ib_multipath_rec_t *p_mpr; + ib_sa_mad_t *p_sa_mad; + osm_port_t *requester_port; + osm_alias_guid_t *pp_alias_guids[IB_MULTIPATH_MAX_GIDS]; + cl_qlist_t pr_list; + ib_net16_t sa_status; + int nsrc, ndest; + uint8_t rate, mtu; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD); + + if ((p_sa_mad->rmpp_flags & IB_RMPP_FLAG_ACTIVE) != IB_RMPP_FLAG_ACTIVE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4510: " + "Invalid request since RMPP_FLAG_ACTIVE is not set\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* we only support SubnAdmGetMulti method */ + if (p_sa_mad->method != IB_MAD_METHOD_GETMULTI) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4513: " + "Unsupported Method (%s) for MultiPathRecord request\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) + osm_dump_multipath_record_v2(sa->p_log, p_mpr, FILE_ID, OSM_LOG_DEBUG); + + /* Make sure required components (S/DGIDCount) are supplied */ + if (!(p_sa_mad->comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) || + !(p_sa_mad->comp_mask & IB_MPR_COMPMASK_DGIDCOUNT)) { + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INSUF_COMPS); + goto Exit; + } + + /* Validate rate if supplied */ + if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_RATESELEC) && + (p_sa_mad->comp_mask & IB_MPR_COMPMASK_RATE)) { + rate = ib_multipath_rec_rate(p_mpr); + if (!ib_rate_is_valid(rate)) { + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + } + /* Validate MTU if supplied */ + if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_MTUSELEC) && + (p_sa_mad->comp_mask & IB_MPR_COMPMASK_MTU)) { + mtu = ib_multipath_rec_mtu(p_mpr); + if (!ib_mtu_is_valid(mtu)) { + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + } + + /* Make sure either none or both ServiceID parameters are supplied */ + if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_SERVICEID) != 0 && + (p_sa_mad->comp_mask & IB_MPR_COMPMASK_SERVICEID) != + IB_MPR_COMPMASK_SERVICEID) { + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INSUF_COMPS); + goto Exit; + } + + cl_qlist_init(&pr_list); + + /* + Most SA functions (including this one) are read-only on the + subnet object, so we grab the lock non-exclusively. + */ + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (requester_port == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4517: " + "Cannot find requester physical port\n"); + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_port_get_guid(requester_port))); + + sa_status = mpr_rcv_get_end_points(sa, p_madw, pp_alias_guids, + &nsrc, &ndest); + + if (sa_status != IB_SA_MAD_STATUS_SUCCESS || !nsrc || !ndest) { + cl_plock_release(sa->p_lock); + if (sa_status == IB_SA_MAD_STATUS_SUCCESS && (!nsrc || !ndest)) + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4512: " + "mpr_rcv_get_end_points failed, # GIDs found; " + "src %d; dest %d)\n", nsrc, ndest); + if (sa_status == IB_SA_MAD_STATUS_SUCCESS) + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + else + osm_sa_send_error(sa, p_madw, sa_status); + goto Exit; + } + + /* APM request */ + if (nsrc == 2 && ndest == 2 && (p_mpr->num_path & 0x7F) == 2) + mpr_rcv_get_apm_paths(sa, p_mpr, requester_port, pp_alias_guids, + p_sa_mad->comp_mask, &pr_list); + else + mpr_rcv_process_pairs(sa, p_mpr, requester_port, pp_alias_guids, + nsrc, ndest, p_sa_mad->comp_mask, + &pr_list); + + cl_plock_release(sa->p_lock); + + /* o15-0.2.7: If MultiPath is supported, then SA shall respond to a + SubnAdmGetMulti() containing a valid MultiPathRecord attribute with + a set of zero or more PathRecords satisfying the constraints + indicated in the MultiPathRecord received. The PathRecord Attribute + ID shall be used in the response. + */ + p_sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD; + osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} +#endif diff --git a/opensm/osm_sa_node_record.c b/opensm/osm_sa_node_record.c new file mode 100644 index 0000000..0cb0ffb --- /dev/null +++ b/opensm/osm_sa_node_record.c @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_nr_rcv_t. + * This object represents the NodeInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_NODE_RECORD_C +#include +#include +#include +#include +#include + +#define SA_NR_RESP_SIZE SA_ITEM_RESP_SIZE(node_rec) + +typedef struct osm_nr_search_ctxt { + const ib_node_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_nr_search_ctxt_t; + +static ib_api_status_t nr_rcv_new_nr(osm_sa_t * sa, + IN const osm_node_t * p_node, + IN cl_qlist_t * p_list, + IN ib_net64_t port_guid, IN ib_net16_t lid, + IN unsigned int port_num) +{ + osm_sa_item_t *p_rec_item; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_NR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D02: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New NodeRecord: node 0x%016" PRIx64 + ", port 0x%016" PRIx64 ", lid %u\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), + cl_ntoh64(port_guid), cl_ntoh16(lid)); + + memset(p_rec_item, 0, SA_NR_RESP_SIZE); + + p_rec_item->resp.node_rec.lid = lid; + + p_rec_item->resp.node_rec.node_info = p_node->node_info; + p_rec_item->resp.node_rec.node_info.port_guid = port_guid; + p_rec_item->resp.node_rec.node_info.port_num_vendor_id = + (p_rec_item->resp.node_rec.node_info.port_num_vendor_id & IB_NODE_INFO_VEND_ID_MASK) | + ((port_num << IB_NODE_INFO_PORT_NUM_SHIFT) & IB_NODE_INFO_PORT_NUM_MASK); + memcpy(&(p_rec_item->resp.node_rec.node_desc), &(p_node->node_desc), + IB_NODE_DESCRIPTION_SIZE); + cl_qlist_insert_tail(p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void nr_rcv_create_nr(IN osm_sa_t * sa, IN osm_node_t * p_node, + IN cl_qlist_t * p_list, + IN ib_net64_t const match_port_guid, + IN ib_net16_t const match_lid, + IN unsigned int const match_port_num, + IN const osm_physp_t * p_req_physp, + IN const ib_net64_t comp_mask) +{ + const osm_physp_t *p_physp; + uint8_t port_num; + uint8_t num_ports; + uint16_t match_lid_ho; + ib_net16_t base_lid; + ib_net16_t base_lid_ho; + ib_net16_t max_lid_ho; + uint8_t lmc; + ib_net64_t port_guid; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Looking for NodeRecord with LID: %u GUID: 0x%016" + PRIx64 "\n", cl_ntoh16(match_lid), cl_ntoh64(match_port_guid)); + + /* + For switches, do not return the NodeInfo record + for each port on the switch, just for port 0. + */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) + num_ports = 1; + else + num_ports = osm_node_get_num_physp(p_node); + + for (port_num = 0; port_num < num_ports; port_num++) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) + continue; + + /* Check to see if the found p_physp and the requester physp + share a pkey. If not - continue */ + if (!osm_physp_share_pkey(sa->p_log, p_physp, p_req_physp, + sa->p_subn->opt.allow_both_pkeys)) + continue; + + port_guid = osm_physp_get_port_guid(p_physp); + + if ((comp_mask & IB_NR_COMPMASK_PORTGUID) + && (port_guid != match_port_guid)) + continue; + + base_lid = osm_physp_get_base_lid(p_physp); + + if (comp_mask & IB_NR_COMPMASK_LID) { + base_lid_ho = cl_ntoh16(base_lid); + lmc = osm_physp_get_lmc(p_physp); + max_lid_ho = (uint16_t) (base_lid_ho + (1 << lmc) - 1); + match_lid_ho = cl_ntoh16(match_lid); + + /* + We validate that the lid belongs to this node. + */ + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Comparing LID: %u <= %u <= %u\n", + base_lid_ho, match_lid_ho, max_lid_ho); + + if (match_lid_ho < base_lid_ho + || match_lid_ho > max_lid_ho) + continue; + } + + if ((comp_mask & IB_NR_COMPMASK_PORTNUM) && + (port_num != match_port_num)) + continue; + + nr_rcv_new_nr(sa, p_node, p_list, port_guid, base_lid, port_num); + } + + OSM_LOG_EXIT(sa->p_log); +} + +static void nr_rcv_by_comp_mask(IN cl_map_item_t * p_map_item, IN void *context) +{ + const osm_nr_search_ctxt_t *p_ctxt = context; + osm_node_t *p_node = (osm_node_t *) p_map_item; + const ib_node_record_t *const p_rcvd_rec = p_ctxt->p_rcvd_rec; + const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp; + osm_sa_t *sa = p_ctxt->sa; + ib_net64_t comp_mask = p_ctxt->comp_mask; + ib_net64_t match_port_guid = 0; + ib_net16_t match_lid = 0; + unsigned int match_port_num = 0; + + OSM_LOG_ENTER(p_ctxt->sa->p_log); + + osm_dump_node_info_v2(p_ctxt->sa->p_log, &p_node->node_info, + FILE_ID, OSM_LOG_DEBUG); + + if (comp_mask & IB_NR_COMPMASK_LID) + match_lid = p_rcvd_rec->lid; + + if (comp_mask & IB_NR_COMPMASK_NODEGUID) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Looking for node 0x%016" PRIx64 + ", found 0x%016" PRIx64 "\n", + cl_ntoh64(p_rcvd_rec->node_info.node_guid), + cl_ntoh64(osm_node_get_node_guid(p_node))); + + if (p_node->node_info.node_guid != + p_rcvd_rec->node_info.node_guid) + goto Exit; + } + + if (comp_mask & IB_NR_COMPMASK_PORTGUID) + match_port_guid = p_rcvd_rec->node_info.port_guid; + + if ((comp_mask & IB_NR_COMPMASK_SYSIMAGEGUID) && + p_node->node_info.sys_guid != p_rcvd_rec->node_info.sys_guid) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_BASEVERSION) && + p_node->node_info.base_version != + p_rcvd_rec->node_info.base_version) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_CLASSVERSION) && + p_node->node_info.class_version != + p_rcvd_rec->node_info.class_version) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_NODETYPE) && + p_node->node_info.node_type != p_rcvd_rec->node_info.node_type) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_NUMPORTS) && + p_node->node_info.num_ports != p_rcvd_rec->node_info.num_ports) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_PARTCAP) && + p_node->node_info.partition_cap != + p_rcvd_rec->node_info.partition_cap) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_DEVID) && + p_node->node_info.device_id != p_rcvd_rec->node_info.device_id) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_REV) && + p_node->node_info.revision != + p_rcvd_rec->node_info.revision) + goto Exit; + + if (comp_mask & IB_NR_COMPMASK_PORTNUM) + match_port_num = ib_node_info_get_local_port_num(&p_rcvd_rec->node_info); + + if ((comp_mask & IB_NR_COMPMASK_VENDID) && + ib_node_info_get_vendor_id(&p_node->node_info) != + ib_node_info_get_vendor_id(&p_rcvd_rec->node_info)) + goto Exit; + + if ((comp_mask & IB_NR_COMPMASK_NODEDESC) && + strncmp((char *)&p_node->node_desc, (char *)&p_rcvd_rec->node_desc, + sizeof(ib_node_desc_t))) + goto Exit; + + nr_rcv_create_nr(sa, p_node, p_ctxt->p_list, match_port_guid, + match_lid, match_port_num, p_req_physp, comp_mask); + +Exit: + OSM_LOG_EXIT(p_ctxt->sa->p_log); +} + +void osm_nr_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_rcvd_mad; + const ib_node_record_t *p_rcvd_rec; + cl_qlist_t rec_list; + osm_nr_search_ctxt_t context; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = (ib_node_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + + CL_ASSERT(p_rcvd_mad->attr_id == IB_MAD_ATTR_NODE_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_rcvd_mad->method != IB_MAD_METHOD_GET && + p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D05: " + "Unsupported Method (%s) for NodeRecord request\n", + ib_get_sa_method_str(p_rcvd_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D04: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + osm_dump_node_record_v2(sa->p_log, p_rcvd_rec, FILE_ID, OSM_LOG_DEBUG); + } + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.sa = sa; + context.p_req_physp = p_req_physp; + + cl_qmap_apply_func(&sa->p_subn->node_guid_tbl, nr_rcv_by_comp_mask, + &context); + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_node_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_path_record.c b/opensm/osm_sa_path_record.c new file mode 100644 index 0000000..bb3290f --- /dev/null +++ b/opensm/osm_sa_path_record.c @@ -0,0 +1,2002 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2010 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_pr_rcv_t. + * This object represents the PathRecord Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_PATH_RECORD_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SA_PR_RESP_SIZE SA_ITEM_RESP_SIZE(path_rec) + +#define MAX_HOPS 64 + +static inline boolean_t sa_path_rec_is_tavor_port(IN const osm_port_t * p_port) +{ + osm_node_t const *p_node; + ib_net32_t vend_id; + + p_node = p_port->p_node; + vend_id = ib_node_info_get_vendor_id(&p_node->node_info); + + return ((p_node->node_info.device_id == CL_HTON16(23108)) && + ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || + (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) || + (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || + (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE)))); +} + +static boolean_t +sa_path_rec_apply_tavor_mtu_limit(IN const ib_path_rec_t * p_pr, + IN const osm_port_t * p_src_port, + IN const osm_port_t * p_dest_port, + IN const ib_net64_t comp_mask) +{ + uint8_t required_mtu; + + /* only if at least one of the ports is a Tavor device */ + if (!sa_path_rec_is_tavor_port(p_src_port) && + !sa_path_rec_is_tavor_port(p_dest_port)) + return FALSE; + + /* + we can apply the patch if either: + 1. No MTU required + 2. Required MTU < + 3. Required MTU = 1K or 512 or 256 + 4. Required MTU > 256 or 512 + */ + required_mtu = ib_path_rec_mtu(p_pr); + if ((comp_mask & IB_PR_COMPMASK_MTUSELEC) && + (comp_mask & IB_PR_COMPMASK_MTU)) { + switch (ib_path_rec_mtu_sel(p_pr)) { + case 0: /* must be greater than */ + case 2: /* exact match */ + if (IB_MTU_LEN_1024 < required_mtu) + return FALSE; + break; + + case 1: /* must be less than */ + /* can't be disqualified by this one */ + break; + + case 3: /* largest available */ + /* the ULP intentionally requested */ + /* the largest MTU possible */ + return FALSE; + + default: + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ + CL_ASSERT(FALSE); + break; + } + } + + return TRUE; +} + +static ib_api_status_t pr_rcv_get_path_parms(IN osm_sa_t * sa, + IN const ib_path_rec_t * p_pr, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const uint16_t src_lid_ho, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const uint16_t dest_lid_ho, + IN const ib_net64_t comp_mask, + OUT osm_path_parms_t * p_parms) +{ + const osm_node_t *p_node; + const osm_physp_t *p_physp, *p_physp0; + const osm_physp_t *p_src_physp; + const osm_physp_t *p_dest_physp; + const osm_prtn_t *p_prtn = NULL; + osm_opensm_t *p_osm; + struct osm_routing_engine *p_re; + const ib_port_info_t *p_pi, *p_pi0; + ib_api_status_t status = IB_SUCCESS; + ib_net16_t pkey; + uint8_t mtu; + uint8_t rate, p0_extended_rate, dest_rate; + uint8_t pkt_life; + uint8_t required_mtu; + uint8_t required_rate; + uint8_t required_pkt_life; + uint8_t sl; + uint8_t in_port_num; + ib_net16_t dest_lid; + uint8_t i; + ib_slvl_table_t *p_slvl_tbl = NULL; + osm_qos_level_t *p_qos_level = NULL; + uint16_t valid_sl_mask = 0xffff; + int hops = 0; + int extended, p0_extended; + + OSM_LOG_ENTER(sa->p_log); + + dest_lid = cl_hton16(dest_lid_ho); + + p_dest_physp = p_dest_alias_guid->p_base_port->p_physp; + p_physp = p_src_alias_guid->p_base_port->p_physp; + p_src_physp = p_physp; + p_pi = &p_physp->port_info; + p_osm = sa->p_subn->p_osm; + p_re = p_osm->routing_engine_used; + + mtu = ib_port_info_get_mtu_cap(p_pi); + extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + rate = ib_port_info_compute_rate(p_pi, extended); + + /* + Mellanox Tavor device performance is better using 1K MTU. + If required MTU and MTU selector are such that 1K is OK + and at least one end of the path is Tavor we override the + port MTU with 1K. + */ + if (sa->p_subn->opt.enable_quirks && + sa_path_rec_apply_tavor_mtu_limit(p_pr, + p_src_alias_guid->p_base_port, + p_dest_alias_guid->p_base_port, + comp_mask)) + if (mtu > IB_MTU_LEN_1024) { + mtu = IB_MTU_LEN_1024; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Optimized Path MTU to 1K for Mellanox Tavor device\n"); + } + + /* + Walk the subnet object from source to destination, + tracking the most restrictive rate and mtu values along the way... + + If source port node is a switch, then p_physp should + point to the port that routes the destination lid + */ + + p_node = osm_physp_get_node_ptr(p_physp); + + if (p_node->sw) { + /* + * Source node is a switch. + * Make sure that p_physp points to the out port of the + * switch that routes to the destination lid (dest_lid_ho) + */ + p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); + if (p_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F02: " + "Cannot find routing from LID %u to LID %u on " + "switch %s (GUID: 0x%016" PRIx64 ")\n", + src_lid_ho, dest_lid_ho, p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node))); + status = IB_NOT_FOUND; + goto Exit; + } + } + + if (sa->p_subn->opt.qos) { + /* + * Whether this node is switch or CA, the IN port for + * the sl2vl table is 0, because this is a source node. + */ + p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); + + /* update valid SLs that still exist on this route */ + for (i = 0; i < IB_MAX_NUM_VLS; i++) { + if (valid_sl_mask & (1 << i) && + ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL) + valid_sl_mask &= ~(1 << i); + } + if (!valid_sl_mask) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "All the SLs lead to VL15 on this path\n"); + status = IB_NOT_FOUND; + goto Exit; + } + } + + /* + * Same as above + */ + p_node = osm_physp_get_node_ptr(p_dest_physp); + + if (p_node->sw) { + /* + * if destination is switch, we want p_dest_physp to point to port 0 + */ + p_dest_physp = + osm_switch_get_route_by_lid(p_node->sw, dest_lid); + + if (p_dest_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F03: " + "Can't find routing from LID %u to LID %u on " + "switch %s (GUID: 0x%016" PRIx64 ")\n", + src_lid_ho, dest_lid_ho, p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node))); + status = IB_NOT_FOUND; + goto Exit; + } + + } + + /* + * Now go through the path step by step + */ + + while (p_physp != p_dest_physp) { + + int tmp_pnum = p_physp->port_num; + p_node = osm_physp_get_node_ptr(p_physp); + p_physp = osm_physp_get_remote(p_physp); + + if (p_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F05: " + "Can't find remote phys port of %s (GUID: " + "0x%016"PRIx64") port %d " + "while routing from LID %u to LID %u\n", + p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node)), + tmp_pnum, src_lid_ho, dest_lid_ho); + status = IB_ERROR; + goto Exit; + } + + in_port_num = osm_physp_get_port_num(p_physp); + + /* + This is point to point case (no switch in between) + */ + if (p_physp == p_dest_physp) + break; + + p_node = osm_physp_get_node_ptr(p_physp); + + if (!p_node->sw) { + /* + There is some sort of problem in the subnet object! + If this isn't a switch, we should have reached + the destination by now! + */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F06: " + "Internal error, bad path while routing " + "%s (GUID: 0x%016"PRIx64") port %d to " + "%s (GUID: 0x%016"PRIx64") port %d; " + "ended at %s port %d\n", + p_src_alias_guid->p_base_port->p_node->print_desc, + cl_ntoh64(p_src_alias_guid->p_base_port->p_node->node_info.node_guid), + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + cl_ntoh64(p_dest_alias_guid->p_base_port->p_node->node_info.node_guid), + p_dest_alias_guid->p_base_port->p_physp->port_num, + p_node->print_desc, + p_physp->port_num); + status = IB_ERROR; + goto Exit; + } + + /* + Check parameters for the ingress port in this switch. + */ + p_pi = &p_physp->port_info; + + if (mtu > ib_port_info_get_mtu_cap(p_pi)) + mtu = ib_port_info_get_mtu_cap(p_pi); + + p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0); + p_pi0 = &p_physp0->port_info; + p0_extended = p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended); + if (ib_path_compare_rates(rate, p0_extended_rate) > 0) + rate = p0_extended_rate; + + /* + Continue with the egress port on this switch. + */ + p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); + if (p_physp == 0) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F07: " + "Dead end path on switch " + "%s (GUID: 0x%016"PRIx64") to LID %u\n", + p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(p_node)), + dest_lid_ho); + status = IB_ERROR; + goto Exit; + } + + p_pi = &p_physp->port_info; + + if (mtu > ib_port_info_get_mtu_cap(p_pi)) + mtu = ib_port_info_get_mtu_cap(p_pi); + + p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended); + if (ib_path_compare_rates(rate, p0_extended_rate) > 0) + rate = p0_extended_rate; + + if (sa->p_subn->opt.qos) { + /* + * Check SL2VL table of the switch and update valid SLs + */ + p_slvl_tbl = + osm_physp_get_slvl_tbl(p_physp, in_port_num); + for (i = 0; i < IB_MAX_NUM_VLS; i++) { + if (valid_sl_mask & (1 << i) && + ib_slvl_table_get(p_slvl_tbl, + i) == IB_DROP_VL) + valid_sl_mask &= ~(1 << i); + } + if (!valid_sl_mask) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "All the SLs " + "lead to VL15 on this path\n"); + status = IB_NOT_FOUND; + goto Exit; + } + } + + /* update number of hops traversed */ + hops++; + if (hops > MAX_HOPS) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F25: " + "Path from GUID 0x%016" PRIx64 " (%s port %d) " + "to lid %u GUID 0x%016" PRIx64 " (%s port %d) " + "needs more than %d hops, max %d hops allowed\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_physp->port_num, + dest_lid_ho, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_physp->port_num, + hops, + MAX_HOPS); + status = IB_NOT_FOUND; + goto Exit; + } + } + + /* + p_physp now points to the destination + */ + p_pi = &p_physp->port_info; + + if (mtu > ib_port_info_get_mtu_cap(p_pi)) + mtu = ib_port_info_get_mtu_cap(p_pi); + + extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; + dest_rate = ib_port_info_compute_rate(p_pi, extended); + if (ib_path_compare_rates(rate, dest_rate) > 0) + rate = dest_rate; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Path min MTU = %u, min rate = %u\n", mtu, rate); + + /* + * Get QoS Level object according to the path request + * and adjust path parameters according to QoS settings + */ + if (sa->p_subn->opt.qos && + sa->p_subn->p_qos_policy && + (p_qos_level = + osm_qos_policy_get_qos_level_by_pr(sa->p_subn->p_qos_policy, + p_pr, p_src_physp, p_dest_physp, + comp_mask))) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "PathRecord request matches QoS Level '%s' (%s)\n", + p_qos_level->name, p_qos_level->use ? + p_qos_level->use : "no description"); + + if (p_qos_level->mtu_limit_set + && (mtu > p_qos_level->mtu_limit)) + mtu = p_qos_level->mtu_limit; + + if (p_qos_level->rate_limit_set + && (ib_path_compare_rates(rate, p_qos_level->rate_limit) > 0)) + rate = p_qos_level->rate_limit; + + if (p_qos_level->sl_set) { + sl = p_qos_level->sl; + if (!(valid_sl_mask & (1 << sl))) { + status = IB_NOT_FOUND; + goto Exit; + } + } + } + + /* + * Set packet lifetime. + * According to spec definition IBA 1.2 Table 205 + * PacketLifeTime description, for loopback paths, + * packetLifeTime shall be zero. + */ + if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port) + pkt_life = 0; + else if (p_qos_level && p_qos_level->pkt_life_set) + pkt_life = p_qos_level->pkt_life; + else + pkt_life = sa->p_subn->opt.subnet_timeout; + + /* + Determine if these values meet the user criteria + and adjust appropriately + */ + + /* we silently ignore cases where only the MTU selector is defined */ + if ((comp_mask & IB_PR_COMPMASK_MTUSELEC) && + (comp_mask & IB_PR_COMPMASK_MTU)) { + required_mtu = ib_path_rec_mtu(p_pr); + switch (ib_path_rec_mtu_sel(p_pr)) { + case 0: /* must be greater than */ + if (mtu <= required_mtu) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if (mtu >= required_mtu) { + /* adjust to use the highest mtu + lower than the required one */ + if (required_mtu > 1) + mtu = required_mtu - 1; + else + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if (mtu < required_mtu) + status = IB_NOT_FOUND; + else + mtu = required_mtu; + break; + + case 3: /* largest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ + CL_ASSERT(FALSE); + status = IB_ERROR; + break; + } + } + if (status != IB_SUCCESS) + goto Exit; + + /* we silently ignore cases where only the Rate selector is defined */ + if ((comp_mask & IB_PR_COMPMASK_RATESELEC) && + (comp_mask & IB_PR_COMPMASK_RATE)) { + required_rate = ib_path_rec_rate(p_pr); + switch (ib_path_rec_rate_sel(p_pr)) { + case 0: /* must be greater than */ + if (ib_path_compare_rates(rate, required_rate) <= 0) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if (ib_path_compare_rates(rate, required_rate) >= 0) { + /* adjust the rate to use the highest rate + lower than the required one */ + rate = ib_path_rate_get_prev(required_rate); + if (!rate) + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if (ib_path_compare_rates(rate, required_rate)) + status = IB_NOT_FOUND; + else + rate = required_rate; + break; + + case 3: /* largest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_path_rec_mtu_sel() */ + CL_ASSERT(FALSE); + status = IB_ERROR; + break; + } + } + if (status != IB_SUCCESS) + goto Exit; + + /* we silently ignore cases where only the PktLife selector is defined */ + if ((comp_mask & IB_PR_COMPMASK_PKTLIFETIMESELEC) && + (comp_mask & IB_PR_COMPMASK_PKTLIFETIME)) { + required_pkt_life = ib_path_rec_pkt_life(p_pr); + switch (ib_path_rec_pkt_life_sel(p_pr)) { + case 0: /* must be greater than */ + if (pkt_life <= required_pkt_life) + status = IB_NOT_FOUND; + break; + + case 1: /* must be less than */ + if (pkt_life >= required_pkt_life) { + /* adjust the lifetime to use the highest possible + lower than the required one */ + if (required_pkt_life > 1) + pkt_life = required_pkt_life - 1; + else + status = IB_NOT_FOUND; + } + break; + + case 2: /* exact match */ + if (pkt_life < required_pkt_life) + status = IB_NOT_FOUND; + else + pkt_life = required_pkt_life; + break; + + case 3: /* smallest available */ + /* can't be disqualified by this one */ + break; + + default: + /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ + CL_ASSERT(FALSE); + status = IB_ERROR; + break; + } + } + + if (status != IB_SUCCESS) + goto Exit; + + /* + * set Pkey for this path record request + */ + + if ((comp_mask & IB_PR_COMPMASK_RAWTRAFFIC) && + (cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31))) + pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp, + sa->p_subn->opt.allow_both_pkeys); + + else if (comp_mask & IB_PR_COMPMASK_PKEY) { + /* + * PR request has a specific pkey: + * Check that source and destination share this pkey. + * If QoS level has pkeys, check that this pkey exists + * in the QoS level pkeys. + * PR returned pkey is the requested pkey. + */ + pkey = p_pr->pkey; + if (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey, + sa->p_subn->opt.allow_both_pkeys)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1A: " + "Ports 0x%016" PRIx64 " (%s port %d) and " + "0x%016" PRIx64 " (%s port %d) " + "do not share specified PKey 0x%04x\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_physp->port_num, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_physp->port_num, + cl_ntoh16(pkey)); + status = IB_NOT_FOUND; + goto Exit; + } + if (p_qos_level && p_qos_level->pkey_range_len && + !osm_qos_level_has_pkey(p_qos_level, pkey)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1D: " + "QoS level \"%s\" doesn't define specified PKey 0x%04x " + "for ports 0x%016" PRIx64 " (%s port %d) and " + "0x%016"PRIx64" (%s port %d)\n", + p_qos_level->name, + cl_ntoh16(pkey), + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + + } else if (p_qos_level && p_qos_level->pkey_range_len) { + /* + * PR request doesn't have a specific pkey, but QoS level + * has pkeys - get shared pkey from QoS level pkeys + */ + pkey = osm_qos_level_get_shared_pkey(p_qos_level, + p_src_physp, p_dest_physp, + sa->p_subn->opt.allow_both_pkeys); + if (!pkey) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1E: " + "Ports 0x%016" PRIx64 " (%s) and " + "0x%016" PRIx64 " (%s) do not share " + "PKeys defined by QoS level \"%s\"\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_qos_level->name); + status = IB_NOT_FOUND; + goto Exit; + } + } else { + /* + * Neither PR request nor QoS level have pkey. + * Just get any shared pkey. + */ + pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp, + sa->p_subn->opt.allow_both_pkeys); + if (!pkey) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1B: " + "Ports src 0x%016"PRIx64" (%s port %d) and " + "dst 0x%016"PRIx64" (%s port %d) do not have " + "any shared PKeys\n", + cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), + p_src_physp->p_node->print_desc, + p_src_physp->port_num, + cl_ntoh64(osm_physp_get_port_guid + (p_dest_physp)), + p_dest_physp->p_node->print_desc, + p_dest_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + } + + if (pkey) { + p_prtn = + (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, + pkey & cl_hton16((uint16_t) ~ + 0x8000)); + if (p_prtn == + (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl)) + p_prtn = NULL; + } + + /* + * Set PathRecord SL + */ + + if (comp_mask & IB_PR_COMPMASK_SL) { + /* + * Specific SL was requested + */ + sl = ib_path_rec_sl(p_pr); + + if (p_qos_level && p_qos_level->sl_set + && (p_qos_level->sl != sl)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1F: " + "QoS constraints: required PathRecord SL (%u) " + "doesn't match QoS policy \"%s\" SL (%u) " + "[%s port %d <-> %s port %d]\n", sl, + p_qos_level->name, + p_qos_level->sl, + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + + } else if (p_qos_level && p_qos_level->sl_set) { + /* + * No specific SL was requested, but there is an SL in + * QoS level. + */ + sl = p_qos_level->sl; + + if (pkey && p_prtn && p_prtn->sl != p_qos_level->sl) + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "QoS level SL (%u) overrides partition SL (%u)\n", + p_qos_level->sl, p_prtn->sl); + + } else if (pkey) { + /* + * No specific SL in request or in QoS level - use partition SL + */ + if (!p_prtn) { + sl = OSM_DEFAULT_SL; + /* this may be possible when pkey tables are created somehow in + previous runs or things are going wrong here */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1C: " + "No partition found for PKey 0x%04x - " + "using default SL %d " + "[%s port %d <-> %s port %d]\n", + cl_ntoh16(pkey), sl, + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + } else + sl = p_prtn->sl; + } else if (sa->p_subn->opt.qos) { + if (valid_sl_mask & (1 << OSM_DEFAULT_SL)) + sl = OSM_DEFAULT_SL; + else { + for (i = 0; i < IB_MAX_NUM_VLS; i++) + if (valid_sl_mask & (1 << i)) + break; + sl = i; + } + } else + sl = OSM_DEFAULT_SL; + + if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << sl))) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F24: " + "Selected SL (%u) leads to VL15 " + "[%s port %d <-> %s port %d]\n", + sl, + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + + /* + * If the routing engine wants to have a say in path SL selection, + * send the currently computed SL value as a hint and let the routing + * engine override it. + */ + if (p_re && p_re->path_sl) { + uint8_t pr_sl; + pr_sl = sl; + + sl = p_re->path_sl(p_re->context, sl, + cl_hton16(src_lid_ho), cl_hton16(dest_lid_ho)); + + if ((comp_mask & IB_PR_COMPMASK_SL) && (sl != pr_sl)) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F2A: " + "Requested SL (%u) doesn't match SL calculated" + "by routing engine (%u) " + "[%s port %d <-> %s port %d]\n", + pr_sl, + sl, + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num, + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + status = IB_NOT_FOUND; + goto Exit; + } + } + /* reset pkey when raw traffic */ + if (comp_mask & IB_PR_COMPMASK_RAWTRAFFIC && + cl_ntoh32(p_pr->hop_flow_raw) & (1 << 31)) + pkey = 0; + + p_parms->mtu = mtu; + p_parms->rate = rate; + p_parms->pkt_life = pkt_life; + p_parms->pkey = pkey; + p_parms->sl = sl; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Path params: mtu = %u, rate = %u," + " packet lifetime = %u, pkey = 0x%04X, sl = %u\n", + mtu, rate, pkt_life, cl_ntoh16(pkey), sl); +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +ib_api_status_t osm_get_path_params(IN osm_sa_t * sa, + IN const osm_port_t * p_src_port, + IN const uint16_t slid_ho, + IN const osm_port_t * p_dest_port, + IN const uint16_t dlid_ho, + OUT osm_path_parms_t * p_parms) +{ + osm_alias_guid_t *p_src_alias_guid, *p_dest_alias_guid; + ib_path_rec_t pr; + + if (!p_src_port || !slid_ho || !p_dest_port || !dlid_ho) + return IB_INVALID_PARAMETER; + + memset(&pr, 0, sizeof(ib_path_rec_t)); + + p_src_alias_guid = osm_get_alias_guid_by_guid(sa->p_subn, + osm_port_get_guid(p_src_port)); + p_dest_alias_guid = osm_get_alias_guid_by_guid(sa->p_subn, + osm_port_get_guid(p_dest_port)); + return pr_rcv_get_path_parms(sa, &pr, + p_src_alias_guid, slid_ho, + p_dest_alias_guid, dlid_ho, 0, p_parms); +} + +static void pr_rcv_build_pr(IN osm_sa_t * sa, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN const uint16_t src_lid_ho, + IN const uint16_t dest_lid_ho, + IN const uint8_t preference, + IN const osm_path_parms_t * p_parms, + OUT ib_path_rec_t * p_pr) +{ + const osm_physp_t *p_src_physp = NULL, *p_dest_physp = NULL; + osm_port_t *p_port; + uint8_t rate, new_rate; + + OSM_LOG_ENTER(sa->p_log); + + if (p_dgid) + p_pr->dgid = *p_dgid; + else { + p_dest_physp = p_dest_alias_guid->p_base_port->p_physp; + + p_pr->dgid.unicast.prefix = + osm_physp_get_subnet_prefix(p_dest_physp); + p_pr->dgid.unicast.interface_id = p_dest_alias_guid->alias_guid; + } + if (p_sgid) + p_pr->sgid = *p_sgid; + else { + p_src_physp = p_src_alias_guid->p_base_port->p_physp; + + p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp); + p_pr->sgid.unicast.interface_id = p_src_alias_guid->alias_guid; + } + + p_pr->dlid = cl_hton16(dest_lid_ho); + p_pr->slid = cl_hton16(src_lid_ho); + + p_pr->hop_flow_raw &= cl_hton32(1 << 31); + + /* Only set HopLimit if going through a router */ + if (p_dgid) + p_pr->hop_flow_raw |= cl_hton32(IB_HOPLIMIT_MAX); + + p_pr->pkey = p_parms->pkey; + ib_path_rec_set_sl(p_pr, p_parms->sl); + ib_path_rec_set_qos_class(p_pr, 0); + p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80); + rate = p_parms->rate; + if (sa->p_subn->opt.use_original_extended_sa_rates_only) { + new_rate = ib_path_rate_max_12xedr(rate); + if (new_rate != rate) { + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Rate decreased from %u to %u\n", + rate, new_rate); + rate = new_rate; + } + } else if (rate >= IB_PATH_RECORD_RATE_28_GBS) { + /* + * If one of the new 2x or HDR rates, make sure that + * src (and dest if reversible) ports support this + */ + if (p_src_physp == NULL) { + p_port = osm_get_port_by_lid_ho(sa->p_subn, src_lid_ho); + if (p_port) + p_src_physp = p_port->p_physp; + } + + if (p_src_physp) + rate = ib_path_rate_2x_hdr_fixups(&p_src_physp->port_info, rate); + if (p_parms->reversible) { + if (p_dest_physp == NULL) { + p_port = osm_get_port_by_lid_ho(sa->p_subn, + dest_lid_ho); + if (p_port) + p_dest_physp = p_port->p_physp; + } + if (p_dest_physp) + rate = ib_path_rate_2x_hdr_fixups(&p_dest_physp->port_info, rate); + } + } + p_pr->rate = (uint8_t) (rate | 0x80); + + /* According to 1.2 spec definition Table 205 PacketLifeTime description, + for loopback paths, packetLifeTime shall be zero. */ + if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port) + p_pr->pkt_life = 0x80; /* loopback */ + else + p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80); + + p_pr->preference = preference; + + /* always return num_path = 0 so this is only the reversible component */ + if (p_parms->reversible) + p_pr->num_path = 0x80; + + OSM_LOG_EXIT(sa->p_log); +} + +static osm_sa_item_t *pr_rcv_get_lid_pair_path(IN osm_sa_t * sa, + IN const ib_path_rec_t * p_pr, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN const uint16_t src_lid_ho, + IN const uint16_t dest_lid_ho, + IN const ib_net64_t comp_mask, + IN const uint8_t preference) +{ + osm_path_parms_t path_parms; + osm_path_parms_t rev_path_parms; + osm_sa_item_t *p_pr_item; + ib_api_status_t status, rev_path_status; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n", + src_lid_ho, dest_lid_ho); + + p_pr_item = malloc(SA_PR_RESP_SIZE); + if (p_pr_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F01: " + "Unable to allocate path record\n"); + goto Exit; + } + memset(p_pr_item, 0, SA_PR_RESP_SIZE); + + status = pr_rcv_get_path_parms(sa, p_pr, p_src_alias_guid, src_lid_ho, + p_dest_alias_guid, dest_lid_ho, + comp_mask, &path_parms); + + if (status != IB_SUCCESS) { + free(p_pr_item); + p_pr_item = NULL; + goto Exit; + } + + /* now try the reversible path */ + rev_path_status = pr_rcv_get_path_parms(sa, p_pr, p_dest_alias_guid, + dest_lid_ho, p_src_alias_guid, + src_lid_ho, comp_mask, + &rev_path_parms); + + path_parms.reversible = (rev_path_status == IB_SUCCESS); + + /* did we get a Reversible Path compmask ? */ + /* + NOTE that if the reversible component = 0, it is a don't care + rather than requiring non-reversible paths ... + see Vol1 Ver1.2 p900 l16 + */ + if ((comp_mask & IB_PR_COMPMASK_REVERSIBLE) && + !path_parms.reversible && (p_pr->num_path & 0x80)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requested reversible path but failed to get one\n"); + free(p_pr_item); + p_pr_item = NULL; + goto Exit; + } + + pr_rcv_build_pr(sa, p_src_alias_guid, p_dest_alias_guid, p_sgid, p_dgid, + src_lid_ho, dest_lid_ho, preference, &path_parms, + &p_pr_item->resp.path_rec); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return p_pr_item; +} + +static void pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, + IN const ib_sa_mad_t *sa_mad, + IN const osm_port_t * p_req_port, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN cl_qlist_t * p_list) +{ + const ib_path_rec_t *p_pr = ib_sa_mad_get_payload_ptr(sa_mad); + ib_net64_t comp_mask = sa_mad->comp_mask; + osm_sa_item_t *p_pr_item; + uint16_t src_lid_min_ho; + uint16_t src_lid_max_ho; + uint16_t dest_lid_min_ho; + uint16_t dest_lid_max_ho; + uint16_t src_lid_ho; + uint16_t dest_lid_ho; + uint32_t path_num; + uint8_t preference; + unsigned iterations, src_offset, dest_offset; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n", + cl_ntoh64(p_src_alias_guid->alias_guid), + cl_ntoh64(p_dest_alias_guid->alias_guid)); + + /* Check that the req_port, src_port and dest_port all share a + pkey. The check is done on the default physical port of the ports. */ + if (osm_port_share_pkey(sa->p_log, p_req_port, + p_src_alias_guid->p_base_port, + sa->p_subn->opt.allow_both_pkeys) == FALSE + || osm_port_share_pkey(sa->p_log, p_req_port, + p_dest_alias_guid->p_base_port, + sa->p_subn->opt.allow_both_pkeys) == FALSE + || osm_port_share_pkey(sa->p_log, p_src_alias_guid->p_base_port, + p_dest_alias_guid->p_base_port, + sa->p_subn->opt.allow_both_pkeys) == FALSE) + /* One of the pairs doesn't share a pkey so the path is disqualified. */ + goto Exit; + + /* + We shouldn't be here if the paths are disqualified in some way... + Thus, we assume every possible connection is valid. + + We desire to return high-quality paths first. + In OpenSM, higher quality means least overlap with other paths. + This is acheived in practice by returning paths with + different LID value on each end, which means these + paths are more redundant that paths with the same LID repeated + on one side. For example, in OpenSM the paths between two + endpoints with LMC = 1 might be as follows: + + Port A, LID 1 <-> Port B, LID 3 + Port A, LID 1 <-> Port B, LID 4 + Port A, LID 2 <-> Port B, LID 3 + Port A, LID 2 <-> Port B, LID 4 + + The OpenSM unicast routing algorithms attempt to disperse each path + to as varied a physical path as is reasonable. 1<->3 and 1<->4 have + more physical overlap (hence less redundancy) than 1<->3 and 2<->4. + + OpenSM ranks paths in three preference groups: + + Preference Value Description + ---------------- ------------------------------------------- + 0 Redundant in both directions with other + pref value = 0 paths + + 1 Redundant in one direction with other + pref value = 0 and pref value = 1 paths + + 2 Not redundant in either direction with + other paths + + 3-FF Unused + + SA clients don't need to know these details, only that the lower + preference paths are preferred, as stated in the spec. The paths + may not actually be physically redundant depending on the topology + of the subnet, but the point of LMC > 0 is to offer redundancy, + so it is assumed that the subnet is physically appropriate for the + specified LMC value. A more advanced implementation would inspect for + physical redundancy, but I'm not going to bother with that now. + */ + + /* + Refine our search if the client specified end-point LIDs + */ + if (comp_mask & IB_PR_COMPMASK_DLID) + dest_lid_max_ho = dest_lid_min_ho = cl_ntoh16(p_pr->dlid); + else + osm_port_get_lid_range_ho(p_dest_alias_guid->p_base_port, + &dest_lid_min_ho, &dest_lid_max_ho); + + if (comp_mask & IB_PR_COMPMASK_SLID) + src_lid_max_ho = src_lid_min_ho = cl_ntoh16(p_pr->slid); + else + osm_port_get_lid_range_ho(p_src_alias_guid->p_base_port, + &src_lid_min_ho, &src_lid_max_ho); + + if (src_lid_min_ho == 0) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Obtained source LID of 0. No such LID possible " + "(%s port %d)\n", + p_src_alias_guid->p_base_port->p_node->print_desc, + p_src_alias_guid->p_base_port->p_physp->port_num); + goto Exit; + } + + if (dest_lid_min_ho == 0) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Obtained destination LID of 0. No such LID possible " + "(%s port %d)\n", + p_dest_alias_guid->p_base_port->p_node->print_desc, + p_dest_alias_guid->p_base_port->p_physp->port_num); + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Src LIDs [%u-%u], Dest LIDs [%u-%u]\n", + src_lid_min_ho, src_lid_max_ho, + dest_lid_min_ho, dest_lid_max_ho); + + src_lid_ho = src_lid_min_ho; + dest_lid_ho = dest_lid_min_ho; + + /* + Preferred paths come first in OpenSM + */ + preference = 0; + path_num = 0; + + /* If SubnAdmGet, assume NumbPaths 1 (1.2 erratum) */ + if (sa_mad->method == IB_MAD_METHOD_GET) + iterations = 1; + else if (comp_mask & IB_PR_COMPMASK_NUMBPATH) + iterations = ib_path_rec_num_path(p_pr); + else + iterations = (unsigned) (-1); + + while (path_num < iterations) { + /* + These paths are "fully redundant" + */ + + p_pr_item = pr_rcv_get_lid_pair_path(sa, p_pr, p_src_alias_guid, + p_dest_alias_guid, + p_sgid, p_dgid, + src_lid_ho, dest_lid_ho, + comp_mask, preference); + + if (p_pr_item) { + cl_qlist_insert_tail(p_list, &p_pr_item->list_item); + ++path_num; + } + + if (++src_lid_ho > src_lid_max_ho) + break; + + if (++dest_lid_ho > dest_lid_max_ho) + break; + } + + /* + Check if we've accumulated all the paths that the user cares to see + */ + if (path_num == iterations) + goto Exit; + + /* + Don't bother reporting preference 1 paths for now. + It's more trouble than it's worth and can only occur + if ports have different LMC values, which isn't supported + by OpenSM right now anyway. + */ + preference = 2; + src_lid_ho = src_lid_min_ho; + dest_lid_ho = dest_lid_min_ho; + src_offset = 0; + dest_offset = 0; + + /* + Iterate over the remaining paths + */ + while (path_num < iterations) { + dest_offset++; + dest_lid_ho++; + + if (dest_lid_ho > dest_lid_max_ho) { + src_offset++; + src_lid_ho++; + + if (src_lid_ho > src_lid_max_ho) + break; /* done */ + + dest_offset = 0; + dest_lid_ho = dest_lid_min_ho; + } + + /* + These paths are "fully non-redundant" with paths already + identified above and consequently not of much value. + + Don't return paths we already identified above, as indicated + by the offset values being equal. + */ + if (src_offset == dest_offset) + continue; /* already reported */ + + p_pr_item = pr_rcv_get_lid_pair_path(sa, p_pr, p_src_alias_guid, + p_dest_alias_guid, p_sgid, + p_dgid, src_lid_ho, + dest_lid_ho, comp_mask, + preference); + + if (p_pr_item) { + cl_qlist_insert_tail(p_list, &p_pr_item->list_item); + ++path_num; + } + } + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +/* Find the router port that is configured to handle this prefix, if any */ +static ib_net64_t find_router(const osm_sa_t *sa, ib_net64_t prefix) +{ + osm_prefix_route_t *route = NULL; + osm_router_t *rtr; + cl_qlist_t *l = &sa->p_subn->prefix_routes_list; + cl_list_item_t *i; + + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Non local DGID subnet prefix " + "0x%016" PRIx64 "\n", cl_ntoh64(prefix)); + + for (i = cl_qlist_head(l); i != cl_qlist_end(l); i = cl_qlist_next(i)) { + osm_prefix_route_t *r = (osm_prefix_route_t *)i; + if (!r->prefix || r->prefix == prefix) { + route = r; + break; + } + } + if (!route) + return 0; + + if (route->guid == 0) /* first router */ + rtr = (osm_router_t *) cl_qmap_head(&sa->p_subn->rtr_guid_tbl); + else + rtr = (osm_router_t *) cl_qmap_get(&sa->p_subn->rtr_guid_tbl, + route->guid); + + if (rtr == (osm_router_t *) cl_qmap_end(&sa->p_subn->rtr_guid_tbl)) + return 0; + + return osm_port_get_guid(osm_router_get_port_ptr(rtr)); +} + +ib_net16_t osm_pr_get_end_points(IN osm_sa_t * sa, + IN const ib_sa_mad_t *sa_mad, + OUT const osm_alias_guid_t ** pp_src_alias_guid, + OUT const osm_alias_guid_t ** pp_dest_alias_guid, + OUT const osm_port_t ** pp_src_port, + OUT const osm_port_t ** pp_dest_port, + OUT const ib_gid_t ** pp_sgid, + OUT const ib_gid_t ** pp_dgid) +{ + const ib_path_rec_t *p_pr = ib_sa_mad_get_payload_ptr(sa_mad); + ib_net64_t comp_mask = sa_mad->comp_mask; + ib_net64_t dest_guid; + ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + /* + Determine what fields are valid and then get a pointer + to the source and destination port objects, if possible. + */ + + /* + Check a few easy disqualifying cases up front before getting + into the endpoints. + */ + + *pp_src_alias_guid = NULL; + *pp_src_port = NULL; + if (comp_mask & IB_PR_COMPMASK_SGID) { + if (!ib_gid_is_link_local(&p_pr->sgid)) { + if (ib_gid_get_subnet_prefix(&p_pr->sgid) != + sa->p_subn->opt.subnet_prefix) { + /* + This 'error' is the client's fault (bad gid) + so don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Non local SGID subnet prefix 0x%016" + PRIx64 "\n", + cl_ntoh64(p_pr->sgid.unicast.prefix)); + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + } + + *pp_src_alias_guid = osm_get_alias_guid_by_guid(sa->p_subn, + p_pr->sgid.unicast.interface_id); + if (!*pp_src_alias_guid) { + /* + This 'error' is the client's fault (bad gid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "No source port with GUID 0x%016" PRIx64 "\n", + cl_ntoh64(p_pr->sgid.unicast.interface_id)); + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + if (pp_sgid) + *pp_sgid = &p_pr->sgid; + } + + if (comp_mask & IB_PR_COMPMASK_SLID) { + *pp_src_port = osm_get_port_by_lid(sa->p_subn, p_pr->slid); + if (!*pp_src_port) { + /* + This 'error' is the client's fault (bad lid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No source port " + "with LID %u\n", cl_ntoh16(p_pr->slid)); + sa_status = IB_SA_MAD_STATUS_NO_RECORDS; + goto Exit; + } + } + + *pp_dest_alias_guid = NULL; + *pp_dest_port = NULL; + if (comp_mask & IB_PR_COMPMASK_DGID) { + if (!ib_gid_is_link_local(&p_pr->dgid) && + !ib_gid_is_multicast(&p_pr->dgid) && + ib_gid_get_subnet_prefix(&p_pr->dgid) != + sa->p_subn->opt.subnet_prefix) { + dest_guid = find_router(sa, p_pr->dgid.unicast.prefix); + if (!dest_guid) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Off subnet DGID %s, but router not " + "found\n", + inet_ntop(AF_INET6, p_pr->dgid.raw, + gid_str, sizeof(gid_str))); + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + if (pp_dgid) + *pp_dgid = &p_pr->dgid; + } else + dest_guid = p_pr->dgid.unicast.interface_id; + + *pp_dest_alias_guid = osm_get_alias_guid_by_guid(sa->p_subn, + dest_guid); + if (!*pp_dest_alias_guid) { + /* + This 'error' is the client's fault (bad gid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "No dest port with GUID 0x%016" PRIx64 "\n", + cl_ntoh64(dest_guid)); + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } + } + + if (comp_mask & IB_PR_COMPMASK_DLID) { + *pp_dest_port = osm_get_port_by_lid(sa->p_subn, p_pr->dlid); + if (!*pp_dest_port) { + /* + This 'error' is the client's fault (bad lid) + so don't enter it as an error in our own log. + Return an error response to the client. + */ + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No dest port " + "with LID %u\n", cl_ntoh16(p_pr->dlid)); + sa_status = IB_SA_MAD_STATUS_NO_RECORDS; + goto Exit; + } + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + return sa_status; +} + +static void pr_rcv_process_world(IN osm_sa_t * sa, IN const ib_sa_mad_t * sa_mad, + IN const osm_port_t * requester_port, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN cl_qlist_t * p_list) +{ + const cl_qmap_t *p_tbl; + const osm_alias_guid_t *p_dest_alias_guid, *p_src_alias_guid; + + OSM_LOG_ENTER(sa->p_log); + + /* + Iterate the entire port space over itself. + A path record from a port to itself is legit, so no + need for a special case there. + + We compute both A -> B and B -> A, since we don't have + any check to determine the reversability of the paths. + */ + p_tbl = &sa->p_subn->alias_port_guid_tbl; + + p_dest_alias_guid = (osm_alias_guid_t *) cl_qmap_head(p_tbl); + while (p_dest_alias_guid != (osm_alias_guid_t *) cl_qmap_end(p_tbl)) { + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_head(p_tbl); + while (p_src_alias_guid != (osm_alias_guid_t *) cl_qmap_end(p_tbl)) { + pr_rcv_get_port_pair_paths(sa, sa_mad, requester_port, + p_src_alias_guid, + p_dest_alias_guid, + p_sgid, p_dgid, p_list); + if (sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(p_list) > 0) + goto Exit; + + p_src_alias_guid = + (osm_alias_guid_t *) cl_qmap_next(&p_src_alias_guid->map_item); + } + + p_dest_alias_guid = + (osm_alias_guid_t *) cl_qmap_next(&p_dest_alias_guid->map_item); + } + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +void osm_pr_process_half(IN osm_sa_t * sa, IN const ib_sa_mad_t * sa_mad, + IN const osm_port_t * requester_port, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN cl_qlist_t * p_list) +{ + const cl_qmap_t *p_tbl; + const osm_alias_guid_t *p_alias_guid; + + OSM_LOG_ENTER(sa->p_log); + + /* + Iterate over every port, looking for matches... + A path record from a port to itself is legit, so no + need to special case that one. + */ + p_tbl = &sa->p_subn->alias_port_guid_tbl; + + if (p_src_alias_guid) { + /* + The src port if fixed, so iterate over destination ports. + */ + p_alias_guid = (osm_alias_guid_t *) cl_qmap_head(p_tbl); + while (p_alias_guid != (osm_alias_guid_t *) cl_qmap_end(p_tbl)) { + pr_rcv_get_port_pair_paths(sa, sa_mad, requester_port, + p_src_alias_guid, + p_alias_guid, + p_sgid, p_dgid, p_list); + if (sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(p_list) > 0) + break; + p_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_alias_guid->map_item); + } + } else { + /* + The dest port if fixed, so iterate over source ports. + */ + p_alias_guid = (osm_alias_guid_t *) cl_qmap_head(p_tbl); + while (p_alias_guid != (osm_alias_guid_t *) cl_qmap_end(p_tbl)) { + pr_rcv_get_port_pair_paths(sa, sa_mad, requester_port, + p_alias_guid, + p_dest_alias_guid, p_sgid, + p_dgid, p_list); + if (sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(p_list) > 0) + break; + p_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_alias_guid->map_item); + } + } + + OSM_LOG_EXIT(sa->p_log); +} + +void osm_pr_process_pair(IN osm_sa_t * sa, IN const ib_sa_mad_t * sa_mad, + IN const osm_port_t * requester_port, + IN const osm_alias_guid_t * p_src_alias_guid, + IN const osm_alias_guid_t * p_dest_alias_guid, + IN const ib_gid_t * p_sgid, + IN const ib_gid_t * p_dgid, + IN cl_qlist_t * p_list) +{ + OSM_LOG_ENTER(sa->p_log); + + pr_rcv_get_port_pair_paths(sa, sa_mad, requester_port, p_src_alias_guid, + p_dest_alias_guid, p_sgid, p_dgid, p_list); + + OSM_LOG_EXIT(sa->p_log); +} + +static ib_api_status_t pr_match_mgrp_attributes(IN osm_sa_t * sa, + IN const ib_sa_mad_t * sa_mad, + IN const osm_mgrp_t * p_mgrp) +{ + const ib_path_rec_t *p_pr = ib_sa_mad_get_payload_ptr(sa_mad); + ib_net64_t comp_mask = sa_mad->comp_mask; + const osm_port_t *port; + ib_api_status_t status = IB_ERROR; + uint32_t flow_label; + uint8_t sl, hop_limit; + + OSM_LOG_ENTER(sa->p_log); + + /* check that MLID of the MC group matches the PathRecord DLID */ + if ((comp_mask & IB_PR_COMPMASK_DLID) && p_mgrp->mlid != p_pr->dlid) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "DLID 0x%x is not MLID 0x%x for MC group\n", + cl_ntoh16(p_pr->dlid), cl_ntoh16(p_mgrp->mlid)); + goto Exit; + } + + /* If SGID and/or SLID specified, should validate as member of MC group */ + if (comp_mask & IB_PR_COMPMASK_SGID) { + if (!osm_mgrp_get_mcm_alias_guid(p_mgrp, + p_pr->sgid.unicast.interface_id)) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "SGID %s is not a member of MC group\n", + inet_ntop(AF_INET6, p_pr->sgid.raw, + gid_str, sizeof gid_str)); + goto Exit; + } + } + + if (comp_mask & IB_PR_COMPMASK_SLID) { + port = osm_get_port_by_lid(sa->p_subn, p_pr->slid); + if (!port || !osm_mgrp_get_mcm_port(p_mgrp, port->guid)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Either no port with SLID %u found or " + "SLID not a member of MC group\n", + cl_ntoh16(p_pr->slid)); + goto Exit; + } + } + + /* Also, MTU, rate, packet lifetime, and raw traffic requested are not currently checked */ + if ((comp_mask & IB_PR_COMPMASK_PKEY) && + p_pr->pkey != p_mgrp->mcmember_rec.pkey) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Pkey 0x%x doesn't match MC group Pkey 0x%x\n", + cl_ntoh16(p_pr->pkey), + cl_ntoh16(p_mgrp->mcmember_rec.pkey)); + goto Exit; + } + + ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop, + &sl, &flow_label, &hop_limit); + + if ((comp_mask & IB_PR_COMPMASK_SL) && ib_path_rec_sl(p_pr) != sl) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "SL %d doesn't match MC group SL %d\n", + ib_path_rec_sl(p_pr), sl); + goto Exit; + } + + /* If SubnAdmGet, assume NumbPaths of 1 (1.2 erratum) */ + if ((comp_mask & IB_PR_COMPMASK_NUMBPATH) && + sa_mad->method != IB_MAD_METHOD_GET && + ib_path_rec_num_path(p_pr) == 0) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Number of paths requested is 0\n"); + goto Exit; + } + + if ((comp_mask & IB_PR_COMPMASK_FLOWLABEL) && + ib_path_rec_flow_lbl(p_pr) != flow_label) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Flow label 0x%x doesn't match MC group " + " flow label 0x%x\n", + ib_path_rec_flow_lbl(p_pr), flow_label); + goto Exit; + } + + if ((comp_mask & IB_PR_COMPMASK_HOPLIMIT) && + ib_path_rec_hop_limit(p_pr) != hop_limit) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Hop limit %u doesn't match MC group hop limit %u\n", + ib_path_rec_hop_limit(p_pr), hop_limit); + goto Exit; + } + + + if ((comp_mask & IB_PR_COMPMASK_TCLASS) && + p_pr->tclass != p_mgrp->mcmember_rec.tclass) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "TClass 0x%02x doesn't match MC group TClass 0x%02x\n", + p_pr->tclass, p_mgrp->mcmember_rec.tclass); + goto Exit; + } + + status = IB_SUCCESS; + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void pr_process_multicast(osm_sa_t * sa, const ib_sa_mad_t *sa_mad, + cl_qlist_t *list) +{ + ib_path_rec_t *pr = ib_sa_mad_get_payload_ptr(sa_mad); + osm_mgrp_t *mgrp; + ib_api_status_t status; + osm_sa_item_t *pr_item; + uint32_t flow_label; + uint8_t sl, hop_limit; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Multicast destination requested\n"); + + mgrp = osm_get_mgrp_by_mgid(sa->p_subn, &pr->dgid); + if (!mgrp) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F09: " + "No MC group found for PathRecord destination GID %s\n", + inet_ntop(AF_INET6, pr->dgid.raw, gid_str, + sizeof gid_str)); + return; + } + + /* Make sure the rest of the PathRecord matches the MC group attributes */ + status = pr_match_mgrp_attributes(sa, sa_mad, mgrp); + if (status != IB_SUCCESS) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F19: " + "MC group attributes don't match PathRecord request\n"); + return; + } + + pr_item = malloc(SA_PR_RESP_SIZE); + if (pr_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F18: " + "Unable to allocate path record for MC group\n"); + return; + } + memset(pr_item, 0, sizeof(cl_list_item_t)); + + /* Copy PathRecord request into response */ + pr_item->resp.path_rec = *pr; + + /* Now, use the MC info to cruft up the PathRecord response */ + pr_item->resp.path_rec.dgid = mgrp->mcmember_rec.mgid; + pr_item->resp.path_rec.dlid = mgrp->mcmember_rec.mlid; + pr_item->resp.path_rec.tclass = mgrp->mcmember_rec.tclass; + pr_item->resp.path_rec.num_path = 1; + pr_item->resp.path_rec.pkey = mgrp->mcmember_rec.pkey; + + /* MTU, rate, and packet lifetime should be exactly */ + pr_item->resp.path_rec.mtu = (IB_PATH_SELECTOR_EXACTLY << 6) | mgrp->mcmember_rec.mtu; + pr_item->resp.path_rec.rate = (IB_PATH_SELECTOR_EXACTLY << 6) | mgrp->mcmember_rec.rate; + pr_item->resp.path_rec.pkt_life = (IB_PATH_SELECTOR_EXACTLY << 6) | mgrp->mcmember_rec.pkt_life; + + /* SL, Hop Limit, and Flow Label */ + ib_member_get_sl_flow_hop(mgrp->mcmember_rec.sl_flow_hop, + &sl, &flow_label, &hop_limit); + ib_path_rec_set_sl(&pr_item->resp.path_rec, sl); + ib_path_rec_set_qos_class(&pr_item->resp.path_rec, 0); + + /* HopLimit is not yet set in non link local MC groups */ + /* If it were, this would not be needed */ + if (ib_mgid_get_scope(&mgrp->mcmember_rec.mgid) != + IB_MC_SCOPE_LINK_LOCAL) + hop_limit = IB_HOPLIMIT_MAX; + + pr_item->resp.path_rec.hop_flow_raw = + cl_hton32(hop_limit) | (flow_label << 8); + + cl_qlist_insert_tail(list, &pr_item->list_item); +} + +void osm_pr_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + ib_path_rec_t *p_pr = ib_sa_mad_get_payload_ptr(p_sa_mad); + cl_qlist_t pr_list; + const ib_gid_t *p_sgid = NULL, *p_dgid = NULL; + const osm_alias_guid_t *p_src_alias_guid, *p_dest_alias_guid; + const osm_port_t *p_src_port, *p_dest_port; + osm_port_t *requester_port; + uint8_t rate, mtu; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_PATH_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_sa_mad->method != IB_MAD_METHOD_GET && + p_sa_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F17: " + "Unsupported Method (%s) for PathRecord request\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + /* Validate rate if supplied */ + if ((p_sa_mad->comp_mask & IB_PR_COMPMASK_RATESELEC) && + (p_sa_mad->comp_mask & IB_PR_COMPMASK_RATE)) { + rate = ib_path_rec_rate(p_pr); + if (!ib_rate_is_valid(rate)) { + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + } + /* Validate MTU if supplied */ + if ((p_sa_mad->comp_mask & IB_PR_COMPMASK_MTUSELEC) && + (p_sa_mad->comp_mask & IB_PR_COMPMASK_MTU)) { + mtu = ib_path_rec_mtu(p_pr); + if (!ib_mtu_is_valid(mtu)) { + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + } + + /* Make sure either none or both ServiceID parameters are supplied */ + if ((p_sa_mad->comp_mask & IB_PR_COMPMASK_SERVICEID) != 0 && + (p_sa_mad->comp_mask & IB_PR_COMPMASK_SERVICEID) != + IB_PR_COMPMASK_SERVICEID) { + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INSUF_COMPS); + goto Exit; + } + + cl_qlist_init(&pr_list); + + /* + Most SA functions (including this one) are read-only on the + subnet object, so we grab the lock non-exclusively. + */ + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (requester_port == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F16: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_port_get_guid(requester_port))); + osm_dump_path_record_v2(sa->p_log, p_pr, FILE_ID, OSM_LOG_DEBUG); + } + + /* Handle multicast destinations separately */ + if ((p_sa_mad->comp_mask & IB_PR_COMPMASK_DGID) && + ib_gid_is_multicast(&p_pr->dgid)) { + pr_process_multicast(sa, p_sa_mad, &pr_list); + goto Unlock; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unicast destination requested\n"); + + if (osm_pr_get_end_points(sa, p_sa_mad, + &p_src_alias_guid, &p_dest_alias_guid, + &p_src_port, &p_dest_port, + &p_sgid, &p_dgid) != IB_SA_MAD_STATUS_SUCCESS) + goto Unlock; + + if (p_src_alias_guid && p_src_port && + p_src_alias_guid->p_base_port != p_src_port) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Requester port GUID 0x%" PRIx64 ": Port for SGUID " + "0x%" PRIx64 " not same as port for SLID %u\n", + cl_ntoh64(osm_port_get_guid(requester_port)), + cl_ntoh64(p_pr->sgid.unicast.interface_id), + cl_ntoh16(p_pr->slid)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + if (p_dest_alias_guid && p_dest_port && + p_dest_alias_guid->p_base_port != p_dest_port) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Requester port GUID 0x%" PRIx64 ": Port for DGUID " + "0x%" PRIx64 " not same as port for DLID %u\n", + cl_ntoh64(osm_port_get_guid(requester_port)), + cl_ntoh64(p_pr->dgid.unicast.interface_id), + cl_ntoh16(p_pr->dlid)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* + What happens next depends on the type of endpoint information + that was specified.... + */ + if (p_src_alias_guid) { + if (p_dest_alias_guid) + osm_pr_process_pair(sa, p_sa_mad, requester_port, + p_src_alias_guid, p_dest_alias_guid, + p_sgid, p_dgid, &pr_list); + else if (!p_dest_port) + osm_pr_process_half(sa, p_sa_mad, requester_port, + p_src_alias_guid, NULL, p_sgid, + p_dgid, &pr_list); + else { + /* Get all alias GUIDs for the dest port */ + p_dest_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&sa->p_subn->alias_port_guid_tbl); + while (p_dest_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&sa->p_subn->alias_port_guid_tbl)) { + if (osm_get_port_by_alias_guid(sa->p_subn, p_dest_alias_guid->alias_guid) == + p_dest_port) + osm_pr_process_pair(sa, p_sa_mad, + requester_port, + p_src_alias_guid, + p_dest_alias_guid, + p_sgid, p_dgid, + &pr_list); + if (p_sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(&pr_list) > 0) + break; + + p_dest_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_dest_alias_guid->map_item); + } + } + } else { + if (p_dest_alias_guid && !p_src_port) + osm_pr_process_half(sa, p_sa_mad, requester_port, + NULL, p_dest_alias_guid, p_sgid, + p_dgid, &pr_list); + else if (!p_src_port && !p_dest_port) + /* + Katie, bar the door! + */ + pr_rcv_process_world(sa, p_sa_mad, requester_port, + p_sgid, p_dgid, &pr_list); + else if (p_dest_alias_guid && p_src_port) { + /* Get all alias GUIDs for the src port */ + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&sa->p_subn->alias_port_guid_tbl); + while (p_src_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&sa->p_subn->alias_port_guid_tbl)) { + if (osm_get_port_by_alias_guid(sa->p_subn, + p_src_alias_guid->alias_guid) == + p_src_port) + osm_pr_process_pair(sa, p_sa_mad, + requester_port, + p_src_alias_guid, + p_dest_alias_guid, + p_sgid, p_dgid, + &pr_list); + if (p_sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(&pr_list) > 0) + break; + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_src_alias_guid->map_item); + } + } else if (p_src_port && !p_dest_port) { + /* Get all alias GUIDs for the src port */ + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&sa->p_subn->alias_port_guid_tbl); + while (p_src_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&sa->p_subn->alias_port_guid_tbl)) { + if (osm_get_port_by_alias_guid(sa->p_subn, + p_src_alias_guid->alias_guid) == + p_src_port) + osm_pr_process_half(sa, p_sa_mad, + requester_port, + p_src_alias_guid, + NULL, p_sgid, + p_dgid, &pr_list); + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_src_alias_guid->map_item); + } + } else if (p_dest_port && !p_src_port) { + /* Get all alias GUIDs for the dest port */ + p_dest_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&sa->p_subn->alias_port_guid_tbl); + while (p_dest_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&sa->p_subn->alias_port_guid_tbl)) { + if (osm_get_port_by_alias_guid(sa->p_subn, + p_dest_alias_guid->alias_guid) == + p_dest_port) + osm_pr_process_half(sa, p_sa_mad, + requester_port, + NULL, + p_dest_alias_guid, + p_sgid, p_dgid, + &pr_list); + p_dest_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_dest_alias_guid->map_item); + } + } else { + /* Get all alias GUIDs for the src port */ + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&sa->p_subn->alias_port_guid_tbl); + while (p_src_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&sa->p_subn->alias_port_guid_tbl)) { + if (osm_get_port_by_alias_guid(sa->p_subn, + p_src_alias_guid->alias_guid) == + p_src_port) { + /* Get all alias GUIDs for the dest port */ + p_dest_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&sa->p_subn->alias_port_guid_tbl); + while (p_dest_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&sa->p_subn->alias_port_guid_tbl)) { + if (osm_get_port_by_alias_guid(sa->p_subn, + p_dest_alias_guid->alias_guid) == + p_dest_port) + osm_pr_process_pair(sa, + p_sa_mad, + requester_port, + p_src_alias_guid, + p_dest_alias_guid, + p_sgid, + p_dgid, + &pr_list); + if (p_sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(&pr_list) > 0) + break; + p_dest_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_dest_alias_guid->map_item); + } + } + if (p_sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(&pr_list) > 0) + break; + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_src_alias_guid->map_item); + } + } + } + +Unlock: + cl_plock_release(sa->p_lock); + + /* Now, (finally) respond to the PathRecord request */ + osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_pkey_record.c b/opensm/osm_sa_pkey_record.c new file mode 100644 index 0000000..296d0a5 --- /dev/null +++ b/opensm/osm_sa_pkey_record.c @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_PKEY_RECORD_C +#include +#include +#include +#include +#include +#include + +#define SA_PKEY_RESP_SIZE SA_ITEM_RESP_SIZE(pkey_rec) + +typedef struct osm_pkey_search_ctxt { + const ib_pkey_table_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + uint16_t block_num; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_pkey_search_ctxt_t; + +static void sa_pkey_create(IN osm_sa_t * sa, IN osm_physp_t * p_physp, + IN osm_pkey_search_ctxt_t * p_ctxt, + IN uint16_t block) +{ + osm_sa_item_t *p_rec_item; + uint16_t lid; + ib_pkey_table_t *tbl; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_PKEY_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4602: " + "rec_item alloc failed\n"); + goto Exit; + } + + if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) + lid = p_physp->port_info.base_lid; + else + lid = osm_node_get_base_lid(p_physp->p_node, 0); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New P_Key table for: port 0x%016" PRIx64 + ", lid %u, port %u Block:%u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + cl_ntoh16(lid), osm_physp_get_port_num(p_physp), block); + + memset(p_rec_item, 0, SA_PKEY_RESP_SIZE); + + p_rec_item->resp.pkey_rec.lid = lid; + p_rec_item->resp.pkey_rec.block_num = block; + p_rec_item->resp.pkey_rec.port_num = osm_physp_get_port_num(p_physp); + /* FIXME: There are ninf.PartitionCap or swinf.PartitionEnforcementCap + pkey entries so everything in that range is a valid block number + even if opensm is not using it. Return 0. However things outside + that range should return no entries. Not sure how to figure that + here? The range of pkey_tbl can be less than the cap, so + this falsely triggers. */ + tbl = osm_pkey_tbl_block_get(osm_physp_get_pkey_tbl(p_physp), block); + if (tbl) + p_rec_item->resp.pkey_rec.pkey_tbl = *tbl; + + cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_pkey_check_physp(IN osm_sa_t * sa, IN osm_physp_t * p_physp, + osm_pkey_search_ctxt_t * p_ctxt) +{ + ib_net64_t comp_mask = p_ctxt->comp_mask; + uint16_t block, num_blocks; + + OSM_LOG_ENTER(sa->p_log); + + /* we got here with the phys port - all is left is to get the right block */ + if (comp_mask & IB_PKEY_COMPMASK_BLOCK) { + sa_pkey_create(sa, p_physp, p_ctxt, p_ctxt->block_num); + } else { + num_blocks = + osm_pkey_tbl_get_num_blocks(osm_physp_get_pkey_tbl + (p_physp)); + for (block = 0; block < num_blocks; block++) + sa_pkey_create(sa, p_physp, p_ctxt, block); + } + + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_pkey_by_comp_mask(IN osm_sa_t * sa, IN const osm_port_t * p_port, + osm_pkey_search_ctxt_t * p_ctxt) +{ + const ib_pkey_table_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + osm_physp_t *p_physp; + uint8_t port_num; + uint8_t num_ports; + const osm_physp_t *p_req_physp; + + OSM_LOG_ENTER(sa->p_log); + + p_rcvd_rec = p_ctxt->p_rcvd_rec; + comp_mask = p_ctxt->comp_mask; + port_num = p_rcvd_rec->port_num; + p_req_physp = p_ctxt->p_req_physp; + + /* if this is a switch port we can search all ports + otherwise we must be looking on port 0 */ + if (p_port->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) { + /* we put it in the comp mask and port num */ + port_num = p_port->p_physp->port_num; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Using Physical Default Port Number: 0x%X (for End Node)\n", + port_num); + comp_mask |= IB_PKEY_COMPMASK_PORT; + } + + if (comp_mask & IB_PKEY_COMPMASK_PORT) { + if (port_num < osm_node_get_num_physp(p_port->p_node)) { + p_physp = + osm_node_get_physp_ptr(p_port->p_node, port_num); + /* Check that the p_physp is valid, and that is shares a pkey + with the p_req_physp. */ + if (p_physp && + osm_physp_share_pkey(sa->p_log, p_req_physp, + p_physp, sa->p_subn->opt.allow_both_pkeys)) + sa_pkey_check_physp(sa, p_physp, p_ctxt); + } else { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4603: " + "Given Physical Port Number: 0x%X is out of range should be < 0x%X\n", + port_num, + osm_node_get_num_physp(p_port->p_node)); + goto Exit; + } + } else { + num_ports = osm_node_get_num_physp(p_port->p_node); + for (port_num = 0; port_num < num_ports; port_num++) { + p_physp = + osm_node_get_physp_ptr(p_port->p_node, port_num); + if (!p_physp) + continue; + + /* if the requester and the p_physp don't share a pkey - + continue */ + if (!osm_physp_share_pkey + (sa->p_log, p_req_physp, p_physp, sa->p_subn->opt.allow_both_pkeys)) + continue; + + sa_pkey_check_physp(sa, p_physp, p_ctxt); + } + } +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_pkey_by_comp_mask_cb(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + const osm_port_t *p_port = (osm_port_t *) p_map_item; + osm_pkey_search_ctxt_t *p_ctxt = cxt; + + sa_pkey_by_comp_mask(p_ctxt->sa, p_port, p_ctxt); +} + +void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_rcvd_mad; + const ib_pkey_table_record_t *p_rcvd_rec; + const osm_port_t *p_port = NULL; + cl_qlist_t rec_list; + osm_pkey_search_ctxt_t context; + ib_net64_t comp_mask; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_pkey_table_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + comp_mask = p_rcvd_mad->comp_mask; + + CL_ASSERT(p_rcvd_mad->attr_id == IB_MAD_ATTR_PKEY_TBL_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_rcvd_mad->method != IB_MAD_METHOD_GET && + p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4605: " + "Unsupported Method (%s) for PKeyRecord request\n", + ib_get_sa_method_str(p_rcvd_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + /* + p922 - P_KeyTableRecords shall only be provided in response + to trusted requests. + Check that the requester is a trusted one. + */ + if (p_rcvd_mad->sm_key != sa->p_subn->opt.sa_key) { + /* This is not a trusted requester! */ + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4608: " + "Ignoring PKeyRecord request from non-trusted requester" + " with SM_Key 0x%016" PRIx64 "\n", + cl_ntoh64(p_rcvd_mad->sm_key)); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4604: " + "Cannot find requester physical port\n"); + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.sa = sa; + context.block_num = cl_ntoh16(p_rcvd_rec->block_num); + context.p_req_physp = p_req_physp; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Got Query Lid:%u(%02X), Block:0x%02X(%02X), Port:0x%02X(%02X)\n", + cl_ntoh16(p_rcvd_rec->lid), + (comp_mask & IB_PKEY_COMPMASK_LID) != 0, p_rcvd_rec->port_num, + (comp_mask & IB_PKEY_COMPMASK_PORT) != 0, context.block_num, + (comp_mask & IB_PKEY_COMPMASK_BLOCK) != 0); + + /* + If the user specified a LID, it obviously narrows our + work load, since we don't have to search every port + */ + if (comp_mask & IB_PKEY_COMPMASK_LID) { + p_port = osm_get_port_by_lid(sa->p_subn, p_rcvd_rec->lid); + if (!p_port) + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 460B: " + "No port found with LID %u\n", + cl_ntoh16(p_rcvd_rec->lid)); + else + sa_pkey_by_comp_mask(sa, p_port, &context); + } else + cl_qmap_apply_func(&sa->p_subn->port_guid_tbl, + sa_pkey_by_comp_mask_cb, &context); + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_pkey_table_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_portinfo_record.c b/opensm/osm_sa_portinfo_record.c new file mode 100644 index 0000000..c362c62 --- /dev/null +++ b/opensm/osm_sa_portinfo_record.c @@ -0,0 +1,608 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_pir_rcv_t. + * This object represents the PortInfoRecord Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_PORTINFO_RECORD_C +#include +#include +#include +#include +#include +#include +#include + +#define SA_PIR_RESP_SIZE SA_ITEM_RESP_SIZE(port_rec) + +typedef struct osm_pir_search_ctxt { + const ib_portinfo_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; + boolean_t is_enhanced_comp_mask; +} osm_pir_search_ctxt_t; + +static ib_api_status_t pir_rcv_new_pir(IN osm_sa_t * sa, + IN const osm_physp_t * p_physp, + IN osm_pir_search_ctxt_t * p_ctxt, + IN ib_net16_t const lid) +{ + osm_sa_item_t *p_rec_item; + ib_port_info_t *p_pi; + osm_physp_t *p_physp0; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_PIR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2102: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New PortInfoRecord: port 0x%016" PRIx64 + ", lid %u, port %u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + cl_ntoh16(lid), osm_physp_get_port_num(p_physp)); + + memset(p_rec_item, 0, SA_PIR_RESP_SIZE); + + p_rec_item->resp.port_rec.lid = lid; + p_rec_item->resp.port_rec.port_info = p_physp->port_info; + if (p_ctxt->comp_mask & IB_PIR_COMPMASK_OPTIONS) + p_rec_item->resp.port_rec.options = p_ctxt->p_rcvd_rec->options; + if ((p_ctxt->comp_mask & IB_PIR_COMPMASK_OPTIONS) == 0 || + (p_ctxt->p_rcvd_rec->options & 0x80) == 0) { + /* Does requested port have an extended link speed active ? */ + if (osm_node_get_type(p_physp->p_node) == + IB_NODE_TYPE_SWITCH) { + p_physp0 = osm_node_get_physp_ptr(p_physp->p_node, 0); + p_pi = &p_physp0->port_info; + } else + p_pi = (ib_port_info_t *) &p_physp->port_info; + if ((p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) > 0) { + if (ib_port_info_get_link_speed_ext_active(&p_physp->port_info)) { + /* Add QDR bits to original link speed components */ + p_pi = &p_rec_item->resp.port_rec.port_info; + ib_port_info_set_link_speed_enabled(p_pi, + ib_port_info_get_link_speed_enabled(p_pi) | IB_LINK_SPEED_ACTIVE_10); + p_pi->state_info1 = + (uint8_t) ((p_pi->state_info1 & IB_PORT_STATE_MASK) | + (ib_port_info_get_link_speed_sup(p_pi) | IB_LINK_SPEED_ACTIVE_10) << IB_PORT_LINK_SPEED_SHIFT); + p_pi->link_speed = + (uint8_t) ((p_pi->link_speed & IB_PORT_LINK_SPEED_ENABLED_MASK) | + (ib_port_info_get_link_speed_active(p_pi) | IB_LINK_SPEED_ACTIVE_10) << IB_PORT_LINK_SPEED_SHIFT); + } + } + } + p_rec_item->resp.port_rec.port_num = osm_physp_get_port_num(p_physp); + + cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void sa_pir_create(IN osm_sa_t * sa, IN const osm_physp_t * p_physp, + IN osm_pir_search_ctxt_t * p_ctxt) +{ + uint8_t lmc; + uint16_t max_lid_ho; + uint16_t base_lid_ho; + uint16_t match_lid_ho; + osm_physp_t *p_node_physp; + + OSM_LOG_ENTER(sa->p_log); + + if (p_physp->p_node->sw) { + p_node_physp = osm_node_get_physp_ptr(p_physp->p_node, 0); + base_lid_ho = cl_ntoh16(osm_physp_get_base_lid(p_node_physp)); + lmc = + osm_switch_sp0_is_lmc_capable(p_physp->p_node->sw, + sa->p_subn) ? + osm_physp_get_lmc(p_node_physp) : 0; + } else { + lmc = osm_physp_get_lmc(p_physp); + base_lid_ho = cl_ntoh16(osm_physp_get_base_lid(p_physp)); + } + max_lid_ho = (uint16_t) (base_lid_ho + (1 << lmc) - 1); + + if (p_ctxt->comp_mask & IB_PIR_COMPMASK_LID) { + match_lid_ho = cl_ntoh16(p_ctxt->p_rcvd_rec->lid); + + /* + We validate that the lid belongs to this node. + */ + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Comparing LID: %u <= %u <= %u\n", + base_lid_ho, match_lid_ho, max_lid_ho); + + if (match_lid_ho < base_lid_ho || match_lid_ho > max_lid_ho) + goto Exit; + } + + pir_rcv_new_pir(sa, p_physp, p_ctxt, cl_hton16(base_lid_ho)); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_pir_check_physp(IN osm_sa_t * sa, IN const osm_physp_t * p_physp, + osm_pir_search_ctxt_t * p_ctxt) +{ + const ib_portinfo_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + const ib_port_info_t *p_comp_pi; + const ib_port_info_t *p_pi; + const osm_physp_t * p_physp0; + ib_net32_t cap_mask; + + OSM_LOG_ENTER(sa->p_log); + + p_rcvd_rec = p_ctxt->p_rcvd_rec; + comp_mask = p_ctxt->comp_mask; + p_comp_pi = &p_rcvd_rec->port_info; + p_pi = &p_physp->port_info; + + osm_dump_port_info_v2(sa->p_log, osm_node_get_node_guid(p_physp->p_node), + p_physp->port_guid, p_physp->port_num, + &p_physp->port_info, FILE_ID, OSM_LOG_DEBUG); + + /* We have to re-check the base_lid, since if the given + base_lid in p_pi is zero - we are comparing on all ports. */ + if (comp_mask & IB_PIR_COMPMASK_BASELID) { + if (p_comp_pi->base_lid != p_pi->base_lid) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_MKEY) { + if (p_comp_pi->m_key != p_pi->m_key) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_GIDPRE) { + if (p_comp_pi->subnet_prefix != p_pi->subnet_prefix) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_SMLID) { + if (p_comp_pi->master_sm_base_lid != p_pi->master_sm_base_lid) + goto Exit; + } + + /* IBTA 1.2 errata provides support for bitwise compare if the bit 31 + of the attribute modifier of the Get/GetTable is set */ + if (comp_mask & IB_PIR_COMPMASK_CAPMASK) { + if (p_ctxt->is_enhanced_comp_mask) { + if ((p_comp_pi->capability_mask & p_pi-> + capability_mask) != p_comp_pi->capability_mask) + goto Exit; + } else { + if (p_comp_pi->capability_mask != p_pi->capability_mask) + goto Exit; + } + } + + if (comp_mask & IB_PIR_COMPMASK_DIAGCODE) { + if (p_comp_pi->diag_code != p_pi->diag_code) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_MKEYLEASEPRD) { + if (p_comp_pi->m_key_lease_period != p_pi->m_key_lease_period) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LOCALPORTNUM) { + if (p_comp_pi->local_port_num != p_pi->local_port_num) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LNKWIDTHSUPPORT) { + if (p_comp_pi->link_width_supported != + p_pi->link_width_supported) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LNKWIDTHACTIVE) { + if (p_comp_pi->link_width_active != p_pi->link_width_active) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LINKWIDTHENABLED) { + if (p_comp_pi->link_width_enabled != p_pi->link_width_enabled) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LNKSPEEDSUPPORT) { + if (ib_port_info_get_link_speed_sup(p_comp_pi) != + ib_port_info_get_link_speed_sup(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_PORTSTATE) { + if (ib_port_info_get_port_state(p_comp_pi) != + ib_port_info_get_port_state(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_PORTPHYSTATE) { + if (ib_port_info_get_port_phys_state(p_comp_pi) != + ib_port_info_get_port_phys_state(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LINKDWNDFLTSTATE) { + if (ib_port_info_get_link_down_def_state(p_comp_pi) != + ib_port_info_get_link_down_def_state(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_MKEYPROTBITS) { + if (ib_port_info_get_mpb(p_comp_pi) != + ib_port_info_get_mpb(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LMC) { + if (ib_port_info_get_lmc(p_comp_pi) != + ib_port_info_get_lmc(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LINKSPEEDACTIVE) { + if (ib_port_info_get_link_speed_active(p_comp_pi) != + ib_port_info_get_link_speed_active(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LINKSPEEDENABLE) { + if (ib_port_info_get_link_speed_enabled(p_comp_pi) != + ib_port_info_get_link_speed_enabled(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_NEIGHBORMTU) { + if (ib_port_info_get_neighbor_mtu(p_comp_pi) != + ib_port_info_get_neighbor_mtu(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_MASTERSMSL) { + if (ib_port_info_get_master_smsl(p_comp_pi) != + ib_port_info_get_master_smsl(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_VLCAP) { + if (ib_port_info_get_vl_cap(p_comp_pi) != + ib_port_info_get_vl_cap(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_INITTYPE) { + if (ib_port_info_get_init_type(p_comp_pi) != + ib_port_info_get_init_type(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_VLHIGHLIMIT) { + if (p_comp_pi->vl_high_limit != p_pi->vl_high_limit) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_VLARBHIGHCAP) { + if (p_comp_pi->vl_arb_high_cap != p_pi->vl_arb_high_cap) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_VLARBLOWCAP) { + if (p_comp_pi->vl_arb_low_cap != p_pi->vl_arb_low_cap) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_MTUCAP) { + if (ib_port_info_get_mtu_cap(p_comp_pi) != + ib_port_info_get_mtu_cap(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_VLSTALLCNT) { + if (ib_port_info_get_vl_stall_count(p_comp_pi) != + ib_port_info_get_vl_stall_count(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_HOQLIFE) { + if ((p_comp_pi->vl_stall_life & 0x1F) != + (p_pi->vl_stall_life & 0x1F)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_OPVLS) { + if ((p_comp_pi->vl_enforce & 0xF0) != (p_pi->vl_enforce & 0xF0)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_PARENFIN) { + if ((p_comp_pi->vl_enforce & 0x08) != (p_pi->vl_enforce & 0x08)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_PARENFOUT) { + if ((p_comp_pi->vl_enforce & 0x04) != (p_pi->vl_enforce & 0x04)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_FILTERRAWIN) { + if ((p_comp_pi->vl_enforce & 0x02) != (p_pi->vl_enforce & 0x02)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_FILTERRAWOUT) { + if ((p_comp_pi->vl_enforce & 0x01) != (p_pi->vl_enforce & 0x01)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_MKEYVIO) { + if (p_comp_pi->m_key_violations != p_pi->m_key_violations) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_PKEYVIO) { + if (p_comp_pi->p_key_violations != p_pi->p_key_violations) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_QKEYVIO) { + if (p_comp_pi->q_key_violations != p_pi->q_key_violations) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_GUIDCAP) { + if (p_comp_pi->guid_cap != p_pi->guid_cap) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_SUBNTO) { + if (ib_port_info_get_timeout(p_comp_pi) != + ib_port_info_get_timeout(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_RESPTIME) { + if ((p_comp_pi->resp_time_value & 0x1F) != + (p_pi->resp_time_value & 0x1F)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LOCALPHYERR) { + if (ib_port_info_get_local_phy_err_thd(p_comp_pi) != + ib_port_info_get_local_phy_err_thd(p_pi)) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_OVERRUNERR) { + if (ib_port_info_get_overrun_err_thd(p_comp_pi) != + ib_port_info_get_overrun_err_thd(p_pi)) + goto Exit; + } + + /* IBTA 1.2 errata provides support for bitwise compare if the bit 31 + of the attribute modifier of the Get/GetTable is set */ + if (comp_mask & IB_PIR_COMPMASK_CAPMASK2) { + if (p_ctxt->is_enhanced_comp_mask) { + if ((cl_ntoh16(p_comp_pi->capability_mask2) & + cl_ntoh16(p_pi->capability_mask2)) != + cl_ntoh16(p_comp_pi->capability_mask2)) + goto Exit; + } else { + if (cl_ntoh16(p_comp_pi->capability_mask2) != + cl_ntoh16(p_pi->capability_mask2)) + goto Exit; + } + } + if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) { + p_physp0 = osm_node_get_physp_ptr(p_physp->p_node, 0); + cap_mask = p_physp0->port_info.capability_mask; + } else + cap_mask = p_pi->capability_mask; + if (comp_mask & IB_PIR_COMPMASK_LINKSPDEXTACT) { + if (((cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) > 0) && + (ib_port_info_get_link_speed_ext_active(p_comp_pi) != + ib_port_info_get_link_speed_ext_active(p_pi))) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LINKSPDEXTSUPP) { + if (((cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) > 0) && + (ib_port_info_get_link_speed_ext_sup(p_comp_pi) != + ib_port_info_get_link_speed_ext_sup(p_pi))) + goto Exit; + } + if (comp_mask & IB_PIR_COMPMASK_LINKSPDEXTENAB) { + if (((cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) > 0) && + (ib_port_info_get_link_speed_ext_enabled(p_comp_pi) != + ib_port_info_get_link_speed_ext_enabled(p_pi))) + goto Exit; + } + sa_pir_create(sa, p_physp, p_ctxt); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_pir_by_comp_mask(IN osm_sa_t * sa, IN osm_node_t * p_node, + osm_pir_search_ctxt_t * p_ctxt) +{ + const ib_portinfo_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + const osm_physp_t *p_physp; + uint8_t port_num; + uint8_t num_ports; + const osm_physp_t *p_req_physp; + + OSM_LOG_ENTER(sa->p_log); + + p_rcvd_rec = p_ctxt->p_rcvd_rec; + comp_mask = p_ctxt->comp_mask; + p_req_physp = p_ctxt->p_req_physp; + + num_ports = osm_node_get_num_physp(p_node); + + if (comp_mask & IB_PIR_COMPMASK_PORTNUM) { + if (p_rcvd_rec->port_num < num_ports) { + p_physp = + osm_node_get_physp_ptr(p_node, + p_rcvd_rec->port_num); + /* Check that the p_physp is valid, and that the + p_physp and the p_req_physp share a pkey. */ + if (p_physp && + osm_physp_share_pkey(sa->p_log, p_req_physp, p_physp, + sa->p_subn->opt.allow_both_pkeys)) + sa_pir_check_physp(sa, p_physp, p_ctxt); + } + } else { + for (port_num = 0; port_num < num_ports; port_num++) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) + continue; + + /* if the requester and the p_physp don't share a pkey - + continue */ + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_physp, + sa->p_subn->opt.allow_both_pkeys)) + continue; + + sa_pir_check_physp(sa, p_physp, p_ctxt); + } + } + + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_pir_by_comp_mask_cb(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + osm_node_t *p_node = (osm_node_t *) p_map_item; + osm_pir_search_ctxt_t *p_ctxt = cxt; + + sa_pir_by_comp_mask(p_ctxt->sa, p_node, p_ctxt); +} + +void osm_pir_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_rcvd_mad; + const ib_portinfo_record_t *p_rcvd_rec; + const osm_port_t *p_port = NULL; + cl_qlist_t rec_list; + osm_pir_search_ctxt_t context; + ib_net64_t comp_mask; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_portinfo_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + comp_mask = p_rcvd_mad->comp_mask; + + CL_ASSERT(p_rcvd_mad->attr_id == IB_MAD_ATTR_PORTINFO_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_rcvd_mad->method != IB_MAD_METHOD_GET && + p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2105: " + "Unsupported Method (%s) for PortInfoRecord request\n", + ib_get_sa_method_str(p_rcvd_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2104: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + osm_dump_portinfo_record_v2(sa->p_log, p_rcvd_rec, FILE_ID, OSM_LOG_DEBUG); + } + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.sa = sa; + context.p_req_physp = p_req_physp; + context.is_enhanced_comp_mask = + cl_ntoh32(p_rcvd_mad->attr_mod) & (1 << 31); + + /* + If the user specified a LID, it obviously narrows our + work load, since we don't have to search every port + */ + if (comp_mask & (IB_PIR_COMPMASK_LID | IB_PIR_COMPMASK_BASELID)) { + p_port = osm_get_port_by_lid(sa->p_subn, p_rcvd_rec->lid); + if (p_port) + sa_pir_by_comp_mask(sa, p_port->p_node, &context); + else + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2109: " + "No port found with requested LID %u\n", + cl_ntoh16(p_rcvd_rec->lid)); + } else + cl_qmap_apply_func(&sa->p_subn->node_guid_tbl, + sa_pir_by_comp_mask_cb, &context); + + cl_plock_release(sa->p_lock); + + /* + p922 - The M_Key returned shall be zero, except in the case of a + trusted request. + Note: In the mad controller we check that the SM_Key received on + the mad is valid. Meaning - is either zero or equal to the local + sm_key. + */ + if (!p_rcvd_mad->sm_key) { + osm_sa_item_t *item; + for (item = (osm_sa_item_t *) cl_qlist_head(&rec_list); + item != (osm_sa_item_t *) cl_qlist_end(&rec_list); + item = (osm_sa_item_t *) cl_qlist_next(&item->list_item)) + item->resp.port_rec.port_info.m_key = 0; + } + + osm_sa_respond(sa, p_madw, sizeof(ib_portinfo_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_service_record.c b/opensm/osm_sa_service_record.c new file mode 100644 index 0000000..e55e203 --- /dev/null +++ b/opensm/osm_sa_service_record.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sr_rcv_t. + * This object represents the ServiceRecord Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_SERVICE_RECORD_C +#include +#include +#include +#include +#include +#include +#include +#include + +#define SA_SR_RESP_SIZE SA_ITEM_RESP_SIZE(service_rec) + +typedef struct osm_sr_match_item { + cl_qlist_t sr_list; + ib_service_record_t *p_service_rec; + ib_net64_t comp_mask; + osm_sa_t *sa; +} osm_sr_match_item_t; + +typedef struct osm_sr_search_ctxt { + osm_sr_match_item_t *p_sr_item; + const osm_physp_t *p_req_physp; +} osm_sr_search_ctxt_t; + +static boolean_t +match_service_pkey_with_ports_pkey(IN osm_sa_t * sa, + IN const osm_madw_t * p_madw, + ib_service_record_t * p_service_rec, + ib_net64_t const comp_mask) +{ + boolean_t valid = TRUE; + osm_physp_t *p_req_physp; + ib_net64_t service_guid; + osm_port_t *service_port; + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2404: " + "Cannot find requester physical port\n"); + valid = FALSE; + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + if ((comp_mask & IB_SR_COMPMASK_SPKEY) == IB_SR_COMPMASK_SPKEY) { + /* We have a ServiceP_Key - check matching on requester port, + and ServiceGid port (if such exists) */ + /* Make sure it matches the p_req_physp */ + if (!osm_physp_has_pkey + (sa->p_log, p_service_rec->service_pkey, p_req_physp)) { + valid = FALSE; + goto Exit; + } + + /* If unicast, make sure it matches the port of the ServiceGid */ + if (comp_mask & IB_SR_COMPMASK_SGID && + !ib_gid_is_multicast(&p_service_rec->service_gid)) { + service_guid = + p_service_rec->service_gid.unicast.interface_id; + service_port = + osm_get_port_by_alias_guid(sa->p_subn, service_guid); + if (!service_port) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2405: " + "No port object for port 0x%016" PRIx64 + "\n", cl_ntoh64(service_guid)); + valid = FALSE; + goto Exit; + } + /* check on the table of the default physical port of the service port */ + if (!osm_physp_has_pkey(sa->p_log, + p_service_rec->service_pkey, + service_port->p_physp)) { + valid = FALSE; + goto Exit; + } + } + } + +Exit: + return valid; +} + +static boolean_t +match_name_to_key_association(IN osm_sa_t * sa, + ib_service_record_t * p_service_rec, + ib_net64_t comp_mask) +{ + UNUSED_PARAM(p_service_rec); + UNUSED_PARAM(sa); + + if ((comp_mask & (IB_SR_COMPMASK_SKEY | IB_SR_COMPMASK_SNAME)) == + (IB_SR_COMPMASK_SKEY | IB_SR_COMPMASK_SNAME)) { + /* For now, we are not maintaining the ServiceAssociation record + * so just return TRUE + */ + return TRUE; + } + + return TRUE; +} + +static boolean_t validate_sr(IN osm_sa_t * sa, IN const osm_madw_t * p_madw) +{ + boolean_t valid = TRUE; + ib_sa_mad_t *p_sa_mad; + ib_service_record_t *p_recvd_service_rec; + + OSM_LOG_ENTER(sa->p_log); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_service_rec = + (ib_service_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + valid = match_service_pkey_with_ports_pkey(sa, p_madw, + p_recvd_service_rec, + p_sa_mad->comp_mask); + if (!valid) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "No Match for Service Pkey\n"); + valid = FALSE; + goto Exit; + } + + valid = match_name_to_key_association(sa, p_recvd_service_rec, + p_sa_mad->comp_mask); + if (!valid) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Service Record Name to key matching failed\n"); + valid = FALSE; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(sa->p_log); + return valid; +} + +static void sr_rcv_respond(IN osm_sa_t * sa, IN osm_madw_t * p_madw, + IN cl_qlist_t * p_list) +{ + /* p923 - The ServiceKey shall be set to 0, except in the case of + a trusted request. + Note: In the mad controller we check that the SM_Key received on + the mad is valid. Meaning - is either zero or equal to the local + sm_key. + */ + if (!osm_madw_get_sa_mad_ptr(p_madw)->sm_key) { + osm_sa_item_t *item; + for (item = (osm_sa_item_t *) cl_qlist_head(p_list); + item != (osm_sa_item_t *) cl_qlist_end(p_list); + item = (osm_sa_item_t *) cl_qlist_next(&item->list_item)) + memset(item->resp.service_rec.service_key, 0, + sizeof(item->resp.service_rec.service_key)); + } + + osm_sa_respond(sa, p_madw, sizeof(ib_service_record_t), p_list); +} + +static void get_matching_sr(IN cl_list_item_t * p_list_item, IN void *context) +{ + osm_sr_search_ctxt_t *p_ctxt = context; + osm_svcr_t *p_svcr = (osm_svcr_t *) p_list_item; + osm_sa_item_t *p_sr_pool_item; + osm_sr_match_item_t *p_sr_item = p_ctxt->p_sr_item; + ib_net64_t comp_mask = p_sr_item->comp_mask; + const osm_physp_t *p_req_physp = p_ctxt->p_req_physp; + + if (comp_mask & IB_SR_COMPMASK_SID) { + if (p_sr_item->p_service_rec->service_id != + p_svcr->service_record.service_id) + return; + } + if (comp_mask & IB_SR_COMPMASK_SGID) { + if (memcmp(&p_sr_item->p_service_rec->service_gid, + &p_svcr->service_record.service_gid, + sizeof(p_svcr->service_record.service_gid)) != 0) + return; + } + if (comp_mask & IB_SR_COMPMASK_SPKEY) { + if (p_sr_item->p_service_rec->service_pkey != + p_svcr->service_record.service_pkey) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SKEY) { + if (memcmp(p_sr_item->p_service_rec->service_key, + p_svcr->service_record.service_key, + 16 * sizeof(uint8_t))) + return; + } + if (comp_mask & IB_SR_COMPMASK_SNAME) { + if (memcmp(p_sr_item->p_service_rec->service_name, + p_svcr->service_record.service_name, + sizeof(p_svcr->service_record.service_name)) != 0) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_0) { + if (p_sr_item->p_service_rec->service_data8[0] != + p_svcr->service_record.service_data8[0]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA8_1) { + if (p_sr_item->p_service_rec->service_data8[1] != + p_svcr->service_record.service_data8[1]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_2) { + if (p_sr_item->p_service_rec->service_data8[2] != + p_svcr->service_record.service_data8[2]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_3) { + if (p_sr_item->p_service_rec->service_data8[3] != + p_svcr->service_record.service_data8[3]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_4) { + if (p_sr_item->p_service_rec->service_data8[4] != + p_svcr->service_record.service_data8[4]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_5) { + if (p_sr_item->p_service_rec->service_data8[5] != + p_svcr->service_record.service_data8[5]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_6) { + if (p_sr_item->p_service_rec->service_data8[6] != + p_svcr->service_record.service_data8[6]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA8_7) { + if (p_sr_item->p_service_rec->service_data8[7] != + p_svcr->service_record.service_data8[7]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA8_8) { + if (p_sr_item->p_service_rec->service_data8[8] != + p_svcr->service_record.service_data8[8]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA8_9) { + if (p_sr_item->p_service_rec->service_data8[9] != + p_svcr->service_record.service_data8[9]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA8_10) { + if (p_sr_item->p_service_rec->service_data8[10] != + p_svcr->service_record.service_data8[10]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA8_11) { + if (p_sr_item->p_service_rec->service_data8[11] != + p_svcr->service_record.service_data8[11]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA8_12) { + if (p_sr_item->p_service_rec->service_data8[12] != + p_svcr->service_record.service_data8[12]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_13) { + if (p_sr_item->p_service_rec->service_data8[13] != + p_svcr->service_record.service_data8[13]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_14) { + if (p_sr_item->p_service_rec->service_data8[14] != + p_svcr->service_record.service_data8[14]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA8_15) { + if (p_sr_item->p_service_rec->service_data8[15] != + p_svcr->service_record.service_data8[15]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_0) { + if (p_sr_item->p_service_rec->service_data16[0] != + p_svcr->service_record.service_data16[0]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_1) { + if (p_sr_item->p_service_rec->service_data16[1] != + p_svcr->service_record.service_data16[1]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_2) { + if (p_sr_item->p_service_rec->service_data16[2] != + p_svcr->service_record.service_data16[2]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_3) { + if (p_sr_item->p_service_rec->service_data16[3] != + p_svcr->service_record.service_data16[3]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_4) { + if (p_sr_item->p_service_rec->service_data16[4] != + p_svcr->service_record.service_data16[4]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_5) { + if (p_sr_item->p_service_rec->service_data16[5] != + p_svcr->service_record.service_data16[5]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_6) { + if (p_sr_item->p_service_rec->service_data16[6] != + p_svcr->service_record.service_data16[6]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA16_7) { + if (p_sr_item->p_service_rec->service_data16[7] != + p_svcr->service_record.service_data16[7]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA32_0) { + if (p_sr_item->p_service_rec->service_data32[0] != + p_svcr->service_record.service_data32[0]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA32_1) { + if (p_sr_item->p_service_rec->service_data32[1] != + p_svcr->service_record.service_data32[1]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA32_2) { + if (p_sr_item->p_service_rec->service_data32[2] != + p_svcr->service_record.service_data32[2]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA32_3) { + if (p_sr_item->p_service_rec->service_data32[3] != + p_svcr->service_record.service_data32[3]) + return; + } + + if (comp_mask & IB_SR_COMPMASK_SDATA64_0) { + if (p_sr_item->p_service_rec->service_data64[0] != + p_svcr->service_record.service_data64[0]) + return; + } + if (comp_mask & IB_SR_COMPMASK_SDATA64_1) { + if (p_sr_item->p_service_rec->service_data64[1] != + p_svcr->service_record.service_data64[1]) + return; + } + + /* Check that the requester port has the pkey which is the service_pkey. + If not - then it cannot receive this ServiceRecord. */ + /* The check is relevant only if the service_pkey is valid */ + if (!ib_pkey_is_invalid(p_svcr->service_record.service_pkey)) { + if (!osm_physp_has_pkey(p_sr_item->sa->p_log, + p_svcr->service_record.service_pkey, + p_req_physp)) { + OSM_LOG(p_sr_item->sa->p_log, OSM_LOG_VERBOSE, + "requester port doesn't have the service_pkey: 0x%X\n", + cl_ntoh16(p_svcr->service_record.service_pkey)); + return; + } + } + + p_sr_pool_item = malloc(SA_SR_RESP_SIZE); + if (p_sr_pool_item == NULL) { + OSM_LOG(p_sr_item->sa->p_log, OSM_LOG_ERROR, "ERR 2408: " + "Unable to acquire Service Record from pool\n"); + goto Exit; + } + + p_sr_pool_item->resp.service_rec = p_svcr->service_record; + + cl_qlist_insert_tail(&p_sr_item->sr_list, &p_sr_pool_item->list_item); + +Exit: + return; +} + +static void sr_rcv_process_get_method(osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + ib_sa_mad_t *p_sa_mad; + ib_service_record_t *p_recvd_service_rec; + osm_sr_match_item_t sr_match_item; + osm_sr_search_ctxt_t context; + osm_physp_t *p_req_physp; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + /* Grab the lock */ + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2409: " + "Cannot find requester physical port\n"); + goto Exit; + } + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_service_rec = + (ib_service_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + osm_dump_service_record_v2(sa->p_log, p_recvd_service_rec, + FILE_ID, OSM_LOG_DEBUG); + } + + cl_qlist_init(&sr_match_item.sr_list); + sr_match_item.p_service_rec = p_recvd_service_rec; + sr_match_item.comp_mask = p_sa_mad->comp_mask; + sr_match_item.sa = sa; + + context.p_sr_item = &sr_match_item; + context.p_req_physp = p_req_physp; + + cl_qlist_apply_func(&sa->p_subn->sa_sr_list, get_matching_sr, &context); + + cl_plock_release(sa->p_lock); + + if (p_sa_mad->method == IB_MAD_METHOD_GET && + cl_qlist_count(&sr_match_item.sr_list) == 0) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "No records matched the Service Record query\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RECORDS); + goto Exit; + } + + sr_rcv_respond(sa, p_madw, &sr_match_item.sr_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return; +} + +static void sr_rcv_process_set_method(osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + ib_sa_mad_t *p_sa_mad; + ib_service_record_t *p_recvd_service_rec; + ib_net64_t comp_mask; + osm_svcr_t *p_svcr; + osm_sa_item_t *p_sr_item; + cl_qlist_t sr_list; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_service_rec = + (ib_service_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + comp_mask = p_sa_mad->comp_mask; + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) + osm_dump_service_record_v2(sa->p_log, p_recvd_service_rec, + FILE_ID, OSM_LOG_DEBUG); + + if ((comp_mask & (IB_SR_COMPMASK_SID | IB_SR_COMPMASK_SGID)) != + (IB_SR_COMPMASK_SID | IB_SR_COMPMASK_SGID)) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Component Mask RID check failed for METHOD_SET\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + + /* if we were not provided with a service lease make it infinite */ + if (!(comp_mask & IB_SR_COMPMASK_SLEASE)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "ServiceLease Component Mask not set - using infinite lease\n"); + p_recvd_service_rec->service_lease = 0xFFFFFFFF; + } + + /* If Record exists with matching RID */ + p_svcr = osm_svcr_get_by_rid(sa->p_subn, sa->p_log, + p_recvd_service_rec); + + if (p_svcr == NULL) { + /* Create the instance of the osm_svcr_t object */ + p_svcr = osm_svcr_new(p_recvd_service_rec); + if (p_svcr == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2411: " + "Failed to create new service record\n"); + + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_NO_RESOURCES); + goto Exit; + } + + /* Add this new osm_svcr_t object to subnet object */ + osm_svcr_insert_to_db(sa->p_subn, sa->p_log, p_svcr); + + } else /* Update the old instance of the osm_svcr_t object */ + osm_svcr_init(p_svcr, p_recvd_service_rec); + + cl_plock_release(sa->p_lock); + + if (p_recvd_service_rec->service_lease != 0xFFFFFFFF) { +#if 0 + cl_timer_trim(&sa->sr_timer, + p_recvd_service_rec->service_lease * 1000); +#endif + /* This was a bug since no check was made to see if too long */ + /* just make sure the timer works - get a call back within a second */ + cl_timer_trim(&sa->sr_timer, 1000); + p_svcr->modified_time = cl_get_time_stamp_sec(); + } + + p_sr_item = malloc(SA_SR_RESP_SIZE); + if (p_sr_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2412: " + "Unable to acquire Service record\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES); + goto Exit; + } + + if ((comp_mask & IB_SR_COMPMASK_SPKEY) != IB_SR_COMPMASK_SPKEY) + /* Set the Default Service P_Key in the response */ + p_recvd_service_rec->service_pkey = IB_DEFAULT_PKEY; + + p_sr_item->resp.service_rec = *p_recvd_service_rec; + cl_qlist_init(&sr_list); + + cl_qlist_insert_tail(&sr_list, &p_sr_item->list_item); + + sr_rcv_respond(sa, p_madw, &sr_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sr_rcv_process_delete_method(osm_sa_t * sa, IN osm_madw_t * p_madw) +{ + ib_sa_mad_t *p_sa_mad; + ib_service_record_t *p_recvd_service_rec; + osm_svcr_t *p_svcr; + osm_sa_item_t *p_sr_item; + cl_qlist_t sr_list; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_recvd_service_rec = + (ib_service_record_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) + osm_dump_service_record_v2(sa->p_log, p_recvd_service_rec, + FILE_ID, OSM_LOG_DEBUG); + + /* If Record exists with matching RID */ + p_svcr = osm_svcr_get_by_rid(sa->p_subn, sa->p_log, + p_recvd_service_rec); + + if (p_svcr == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "No records matched the RID\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RECORDS); + goto Exit; + } + + osm_svcr_remove_from_db(sa->p_subn, sa->p_log, p_svcr); + cl_plock_release(sa->p_lock); + + p_sr_item = malloc(SA_SR_RESP_SIZE); + if (p_sr_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2413: " + "Unable to acquire Service record\n"); + osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES); + osm_svcr_delete(p_svcr); + goto Exit; + } + + /* provide back the copy of the record */ + p_sr_item->resp.service_rec = p_svcr->service_record; + cl_qlist_init(&sr_list); + + cl_qlist_insert_tail(&sr_list, &p_sr_item->list_item); + + osm_svcr_delete(p_svcr); + + sr_rcv_respond(sa, p_madw, &sr_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return; +} + +void osm_sr_rcv_process(IN void *context, IN void *data) +{ + osm_sa_t *sa = context; + osm_madw_t *p_madw = data; + ib_sa_mad_t *p_sa_mad; + boolean_t valid; + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_SERVICE_RECORD); + + switch (p_sa_mad->method) { + case IB_MAD_METHOD_SET: + cl_plock_excl_acquire(sa->p_lock); + valid = validate_sr(sa, p_madw); + if (!valid) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, + "Component Mask check failed for set request\n"); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + sr_rcv_process_set_method(sa, p_madw); + break; + case IB_MAD_METHOD_DELETE: + cl_plock_excl_acquire(sa->p_lock); + valid = validate_sr(sa, p_madw); + if (!valid) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Component Mask check failed for delete request\n"); + osm_sa_send_error(sa, p_madw, + IB_SA_MAD_STATUS_REQ_INVALID); + goto Exit; + } + sr_rcv_process_delete_method(sa, p_madw); + break; + case IB_MAD_METHOD_GET: + case IB_MAD_METHOD_GETTABLE: + sr_rcv_process_get_method(sa, p_madw); + break; + default: + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Unsupported Method (%s) for ServiceRecord request\n", + ib_get_sa_method_str(p_sa_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + } + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +void osm_sr_rcv_lease_cb(IN void *context) +{ + osm_sa_t *sa = context; + cl_list_item_t *p_list_item; + cl_list_item_t *p_next_list_item; + osm_svcr_t *p_svcr; + uint32_t curr_time; + uint32_t elapsed_time; + uint32_t trim_time = 20; /* maxiaml timer refresh is 20 seconds */ + + OSM_LOG_ENTER(sa->p_log); + + cl_plock_excl_acquire(sa->p_lock); + + p_list_item = cl_qlist_head(&sa->p_subn->sa_sr_list); + + while (p_list_item != cl_qlist_end(&sa->p_subn->sa_sr_list)) { + p_svcr = (osm_svcr_t *) p_list_item; + + if (p_svcr->service_record.service_lease == 0xFFFFFFFF) { + p_list_item = cl_qlist_next(p_list_item); + continue; + } + + /* current time in seconds */ + curr_time = cl_get_time_stamp_sec(); + /* elapsed time from last modify */ + elapsed_time = curr_time - p_svcr->modified_time; + /* but it can not be less then 1 */ + if (elapsed_time < 1) + elapsed_time = 1; + + if (elapsed_time < p_svcr->lease_period) { + /* + Just update the service lease period + note: for simplicity we work with a uint32_t field + external to the network order lease_period of the MAD + */ + p_svcr->lease_period -= elapsed_time; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Remaining time for Service Name:%s is:0x%X\n", + p_svcr->service_record.service_name, + p_svcr->lease_period); + + p_svcr->modified_time = curr_time; + + /* Update the trim timer */ + if (trim_time > p_svcr->lease_period) { + trim_time = p_svcr->lease_period; + if (trim_time < 1) + trim_time = 1; + } + + p_list_item = cl_qlist_next(p_list_item); + continue; + + } else { + p_next_list_item = cl_qlist_next(p_list_item); + + /* Remove the service Record */ + osm_svcr_remove_from_db(sa->p_subn, sa->p_log, p_svcr); + + osm_svcr_delete(p_svcr); + + p_list_item = p_next_list_item; + continue; + } + } + + /* Release the Lock */ + cl_plock_release(sa->p_lock); + + if (trim_time != 0xFFFFFFFF) { + cl_timer_trim(&sa->sr_timer, trim_time * 1000); /* Convert to milli seconds */ + } + + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_slvl_record.c b/opensm/osm_sa_slvl_record.c new file mode 100644 index 0000000..83f0cb1 --- /dev/null +++ b/opensm/osm_sa_slvl_record.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_slvl_rec_rcv_t. + * This object represents the SLtoVL Mapping Query Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_SLVL_RECORD_C +#include +#include +#include +#include +#include +#include + +#define SA_SLVL_RESP_SIZE SA_ITEM_RESP_SIZE(slvl_rec) + +typedef struct osm_slvl_search_ctxt { + const ib_slvl_table_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + uint8_t in_port_num; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_slvl_search_ctxt_t; + +static void sa_slvl_create(IN osm_sa_t * sa, IN const osm_physp_t * p_physp, + IN osm_slvl_search_ctxt_t * p_ctxt, + IN uint8_t in_port_idx) +{ + osm_sa_item_t *p_rec_item; + uint16_t lid; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_SLVL_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2602: " + "rec_item alloc failed\n"); + goto Exit; + } + + if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) + lid = p_physp->port_info.base_lid; + else + lid = osm_node_get_base_lid(p_physp->p_node, 0); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New SLtoVL Map for: OUT port 0x%016" PRIx64 + ", lid 0x%X, port %u to In Port:%u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + cl_ntoh16(lid), osm_physp_get_port_num(p_physp), in_port_idx); + + memset(p_rec_item, 0, SA_SLVL_RESP_SIZE); + + p_rec_item->resp.slvl_rec.lid = lid; + if (p_physp->p_node->node_info.node_type == IB_NODE_TYPE_SWITCH) { + p_rec_item->resp.slvl_rec.out_port_num = osm_physp_get_port_num(p_physp); + p_rec_item->resp.slvl_rec.in_port_num = in_port_idx; + } + p_rec_item->resp.slvl_rec.slvl_tbl = + *(osm_physp_get_slvl_tbl(p_physp, in_port_idx)); + + cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_slvl_by_comp_mask(IN osm_sa_t * sa, IN const osm_port_t * p_port, + osm_slvl_search_ctxt_t * p_ctxt) +{ + const ib_slvl_table_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + const osm_physp_t *p_out_physp, *p_in_physp; + uint8_t in_port_num, out_port_num; + uint8_t num_ports; + uint8_t in_port_start, in_port_end; + uint8_t out_port_start, out_port_end; + const osm_physp_t *p_req_physp; + + OSM_LOG_ENTER(sa->p_log); + + p_rcvd_rec = p_ctxt->p_rcvd_rec; + comp_mask = p_ctxt->comp_mask; + num_ports = osm_node_get_num_physp(p_port->p_node); + in_port_start = 0; + in_port_end = num_ports - 1; + out_port_start = 0; + out_port_end = num_ports - 1; + p_req_physp = p_ctxt->p_req_physp; + + if (p_port->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Using Physical Default Port Number: 0x%X (for End Node)\n", + p_port->p_physp->port_num); + p_out_physp = p_port->p_physp; + /* check that the p_out_physp and the p_req_physp share a pkey */ + if (osm_physp_share_pkey(sa->p_log, p_req_physp, p_out_physp, + sa->p_subn->opt.allow_both_pkeys)) + sa_slvl_create(sa, p_out_physp, p_ctxt, 0); + } else { + if (comp_mask & IB_SLVL_COMPMASK_OUT_PORT) + out_port_start = out_port_end = + p_rcvd_rec->out_port_num; + if (comp_mask & IB_SLVL_COMPMASK_IN_PORT) + in_port_start = in_port_end = p_rcvd_rec->in_port_num; + + for (out_port_num = out_port_start; + out_port_num <= out_port_end; out_port_num++) { + p_out_physp = + osm_node_get_physp_ptr(p_port->p_node, + out_port_num); + if (!p_out_physp) + continue; + + for (in_port_num = in_port_start; + in_port_num <= in_port_end; in_port_num++) { +#if 0 + if (out_port_num && out_port_num == in_port_num) + continue; +#endif + + p_in_physp = + osm_node_get_physp_ptr(p_port->p_node, + in_port_num); + if (!p_in_physp) + continue; + + /* if the requester and the p_out_physp don't share a pkey - + continue */ + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_out_physp, + sa->p_subn->opt.allow_both_pkeys)) + continue; + + sa_slvl_create(sa, p_out_physp, p_ctxt, + in_port_num); + } + } + } + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_slvl_by_comp_mask_cb(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + const osm_port_t *p_port = (osm_port_t *) p_map_item; + osm_slvl_search_ctxt_t *p_ctxt = cxt; + + sa_slvl_by_comp_mask(p_ctxt->sa, p_port, p_ctxt); +} + +void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *p_rcvd_mad; + const ib_slvl_table_record_t *p_rcvd_rec; + const osm_port_t *p_port = NULL; + cl_qlist_t rec_list; + osm_slvl_search_ctxt_t context; + ib_api_status_t status = IB_SUCCESS; + ib_net64_t comp_mask; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_slvl_table_record_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); + comp_mask = p_rcvd_mad->comp_mask; + + CL_ASSERT(p_rcvd_mad->attr_id == IB_MAD_ATTR_SLVL_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (p_rcvd_mad->method != IB_MAD_METHOD_GET && + p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2604: " + "Unsupported Method (%s) for SL2VLRecord request\n", + ib_get_sa_method_str(p_rcvd_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2603: " + "Cannot find requester physical port\n"); + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = p_rcvd_mad->comp_mask; + context.sa = sa; + context.in_port_num = p_rcvd_rec->in_port_num; + context.p_req_physp = p_req_physp; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Got Query Lid:%u(%02X), In-Port:0x%02X(%02X), Out-Port:0x%02X(%02X)\n", + cl_ntoh16(p_rcvd_rec->lid), + (comp_mask & IB_SLVL_COMPMASK_LID) != 0, + p_rcvd_rec->in_port_num, + (comp_mask & IB_SLVL_COMPMASK_IN_PORT) != 0, + p_rcvd_rec->out_port_num, + (comp_mask & IB_SLVL_COMPMASK_OUT_PORT) != 0); + + /* + If the user specified a LID, it obviously narrows our + work load, since we don't have to search every port + */ + if (comp_mask & IB_SLVL_COMPMASK_LID) { + p_port = osm_get_port_by_lid(sa->p_subn, p_rcvd_rec->lid); + if (!p_port) { + status = IB_NOT_FOUND; + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2608: " + "No port found with LID %u\n", + cl_ntoh16(p_rcvd_rec->lid)); + } + } + + if (status == IB_SUCCESS) { + /* if we have a unique port - no need for a port search */ + if (p_port) + /* this does the loop on all the port phys ports */ + sa_slvl_by_comp_mask(sa, p_port, &context); + else + cl_qmap_apply_func(&sa->p_subn->port_guid_tbl, + sa_slvl_by_comp_mask_cb, &context); + } + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_slvl_table_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_sminfo_record.c b/opensm/osm_sa_sminfo_record.c new file mode 100644 index 0000000..243b6a4 --- /dev/null +++ b/opensm/osm_sa_sminfo_record.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_smir_rcv_t. + * This object represents the SMInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_SMINFO_RECORD_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SA_SMIR_RESP_SIZE SA_ITEM_RESP_SIZE(sminfo_rec) + +typedef struct osm_smir_search_ctxt { + const ib_sminfo_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_smir_search_ctxt_t; + +static ib_api_status_t smir_rcv_new_smir(IN osm_sa_t * sa, + IN const osm_port_t * p_port, + IN cl_qlist_t * p_list, + IN ib_net64_t const guid, + IN ib_net32_t const act_count, + IN uint8_t const pri_state, + IN const osm_physp_t * p_req_physp) +{ + osm_sa_item_t *p_rec_item; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_SMIR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2801: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New SMInfo: GUID 0x%016" PRIx64 "\n", cl_ntoh64(guid)); + + memset(p_rec_item, 0, SA_SMIR_RESP_SIZE); + + p_rec_item->resp.sminfo_rec.lid = osm_port_get_base_lid(p_port); + p_rec_item->resp.sminfo_rec.sm_info.guid = guid; + p_rec_item->resp.sminfo_rec.sm_info.act_count = act_count; + p_rec_item->resp.sminfo_rec.sm_info.pri_state = pri_state; + + cl_qlist_insert_tail(p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void sa_smir_by_comp_mask(IN osm_sa_t * sa, + IN const osm_remote_sm_t * p_rem_sm, + osm_smir_search_ctxt_t * p_ctxt) +{ + const ib_sminfo_record_t *const p_rcvd_rec = p_ctxt->p_rcvd_rec; + const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp; + ib_net64_t const comp_mask = p_ctxt->comp_mask; + osm_port_t *p_port; + + OSM_LOG_ENTER(sa->p_log); + + if (comp_mask & IB_SMIR_COMPMASK_GUID) { + if (p_rem_sm->smi.guid != p_rcvd_rec->sm_info.guid) + goto Exit; + } + + if (comp_mask & IB_SMIR_COMPMASK_PRIORITY) { + if (ib_sminfo_get_priority(&p_rem_sm->smi) != + ib_sminfo_get_priority(&p_rcvd_rec->sm_info)) + goto Exit; + } + + if (comp_mask & IB_SMIR_COMPMASK_SMSTATE) { + if (ib_sminfo_get_state(&p_rem_sm->smi) != + ib_sminfo_get_state(&p_rcvd_rec->sm_info)) + goto Exit; + } + + /* Implement any other needed search cases */ + p_port = osm_get_port_by_guid(sa->p_subn, p_rem_sm->smi.guid); + + if (p_port == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2810: " + "No port for remote sm\n"); + goto Exit; + } + + smir_rcv_new_smir(sa, p_port, p_ctxt->p_list, + p_rem_sm->smi.guid, p_rem_sm->smi.act_count, + p_rem_sm->smi.pri_state, p_req_physp); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_smir_by_comp_mask_cb(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + const osm_remote_sm_t *p_rem_sm = (osm_remote_sm_t *) p_map_item; + osm_smir_search_ctxt_t *p_ctxt = cxt; + + sa_smir_by_comp_mask(p_ctxt->sa, p_rem_sm, p_ctxt); +} + +void osm_smir_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *sad_mad; + const ib_sminfo_record_t *p_rcvd_rec; + const osm_port_t *p_port = NULL; + const ib_sm_info_t *p_smi; + cl_qlist_t rec_list; + osm_smir_search_ctxt_t context; + ib_api_status_t status = IB_SUCCESS; + ib_net64_t comp_mask; + ib_net64_t port_guid; + osm_physp_t *p_req_physp; + osm_port_t *local_port; + osm_remote_sm_t *p_rem_sm; + cl_qmap_t *p_sm_guid_tbl; + uint8_t pri_state; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + sad_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = (ib_sminfo_record_t *) ib_sa_mad_get_payload_ptr(sad_mad); + comp_mask = sad_mad->comp_mask; + + CL_ASSERT(sad_mad->attr_id == IB_MAD_ATTR_SMINFO_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (sad_mad->method != IB_MAD_METHOD_GET && + sad_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2804: " + "Unsupported Method (%s) for SMInfoRecord request\n", + ib_get_sa_method_str(sad_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2803: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + osm_dump_sm_info_record_v2(sa->p_log, p_rcvd_rec, FILE_ID, OSM_LOG_DEBUG); + } + + p_smi = &p_rcvd_rec->sm_info; + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = sad_mad->comp_mask; + context.sa = sa; + context.p_req_physp = p_req_physp; + + /* + If the user specified a LID, it obviously narrows our + work load, since we don't have to search every port + */ + if (comp_mask & IB_SMIR_COMPMASK_LID) { + p_port = osm_get_port_by_lid(sa->p_subn, p_rcvd_rec->lid); + if (!p_port) { + status = IB_NOT_FOUND; + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2806: " + "No port found with LID %u\n", + cl_ntoh16(p_rcvd_rec->lid)); + } + } + + if (status == IB_SUCCESS) { + /* Handle our own SM first */ + local_port = osm_get_port_by_guid(sa->p_subn, + sa->p_subn->sm_port_guid); + if (!local_port) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2809: " + "No port found with GUID 0x%016" PRIx64 "\n", + cl_ntoh64(sa->p_subn->sm_port_guid)); + goto Exit; + } + + if (!p_port || local_port == p_port) { + if (FALSE == + osm_physp_share_pkey(sa->p_log, p_req_physp, + local_port->p_physp, + sa->p_subn->opt.allow_both_pkeys)) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2805: " + "Cannot get SMInfo record due to pkey violation\n"); + goto Exit; + } + + /* Check that other search components specified match */ + if ((comp_mask & IB_SMIR_COMPMASK_GUID) && + sa->p_subn->sm_port_guid != p_smi->guid) + goto Remotes; + if ((comp_mask & IB_SMIR_COMPMASK_PRIORITY) && + sa->p_subn->opt.sm_priority != + ib_sminfo_get_priority(p_smi)) + goto Remotes; + if ((comp_mask & IB_SMIR_COMPMASK_SMSTATE) && + sa->p_subn->sm_state != ib_sminfo_get_state(p_smi)) + goto Remotes; + + /* Now, add local SMInfo to list */ + pri_state = sa->p_subn->sm_state & 0x0F; + pri_state |= (sa->p_subn->opt.sm_priority & 0x0F) << 4; + smir_rcv_new_smir(sa, local_port, context.p_list, + sa->p_subn->sm_port_guid, + cl_ntoh32(sa->p_subn->p_osm->stats. + qp0_mads_sent), pri_state, + p_req_physp); + } + + Remotes: + if (p_port && p_port != local_port) { + /* Find remote SM corresponding to p_port */ + port_guid = osm_port_get_guid(p_port); + p_sm_guid_tbl = &sa->p_subn->sm_guid_tbl; + p_rem_sm = + (osm_remote_sm_t *) cl_qmap_get(p_sm_guid_tbl, + port_guid); + if (p_rem_sm != + (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) + sa_smir_by_comp_mask(sa, p_rem_sm, &context); + else + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 280A: " + "No remote SM for GUID 0x%016" PRIx64 + "\n", cl_ntoh64(port_guid)); + } else if (!p_port) { + /* Go over all other known (remote) SMs */ + cl_qmap_apply_func(&sa->p_subn->sm_guid_tbl, + sa_smir_by_comp_mask_cb, &context); + } + } + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_sminfo_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_sw_info_record.c b/opensm/osm_sa_sw_info_record.c new file mode 100644 index 0000000..acbe3a6 --- /dev/null +++ b/opensm/osm_sa_sw_info_record.c @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sir_rcv_t. + * This object represents the SwitchInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_SW_INFO_RECORD_C +#include +#include +#include +#include +#include + +#define SA_SIR_RESP_SIZE SA_ITEM_RESP_SIZE(swinfo_rec) + +typedef struct osm_sir_search_ctxt { + const ib_switch_info_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_sir_search_ctxt_t; + +static ib_api_status_t sir_rcv_new_sir(IN osm_sa_t * sa, + IN const osm_switch_t * p_sw, + IN cl_qlist_t * p_list, + IN ib_net16_t lid) +{ + osm_sa_item_t *p_rec_item; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_SIR_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5308: " + "rec_item alloc failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New SwitchInfoRecord: lid %u\n", cl_ntoh16(lid)); + + memset(p_rec_item, 0, SA_SIR_RESP_SIZE); + + p_rec_item->resp.swinfo_rec.lid = lid; + p_rec_item->resp.swinfo_rec.switch_info = p_sw->switch_info; + + cl_qlist_insert_tail(p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); + return status; +} + +static void sir_rcv_create_sir(IN osm_sa_t * sa, IN const osm_switch_t * p_sw, + IN cl_qlist_t * p_list, IN ib_net16_t match_lid, + IN const osm_physp_t * p_req_physp) +{ + osm_port_t *p_port; + const osm_physp_t *p_physp; + uint16_t match_lid_ho; + ib_net16_t min_lid_ho; + ib_net16_t max_lid_ho; + + OSM_LOG_ENTER(sa->p_log); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Looking for SwitchInfoRecord with LID: %u\n", + cl_ntoh16(match_lid)); + + /* In switches, the port guid is the node guid. */ + p_port = + osm_get_port_by_guid(sa->p_subn, p_sw->p_node->node_info.port_guid); + if (!p_port) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 530A: " + "Failed to find Port by Node Guid:0x%016" PRIx64 + "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid)); + goto Exit; + } + + /* check that the requester physp and the current physp are under + the same partition. */ + p_physp = p_port->p_physp; + if (!p_physp) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 530B: " + "Failed to find default physical Port by Node Guid:0x%016" + PRIx64 "\n", + cl_ntoh64(p_sw->p_node->node_info.node_guid)); + goto Exit; + } + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_physp, + sa->p_subn->opt.allow_both_pkeys)) + goto Exit; + + /* get the port 0 of the switch */ + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + + match_lid_ho = cl_ntoh16(match_lid); + if (match_lid_ho) { + /* + We validate that the lid belongs to this switch. + */ + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Comparing LID: %u <= %u <= %u\n", + min_lid_ho, match_lid_ho, max_lid_ho); + + if (match_lid_ho < min_lid_ho || match_lid_ho > max_lid_ho) + goto Exit; + + } + + sir_rcv_new_sir(sa, p_sw, p_list, osm_port_get_base_lid(p_port)); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sir_rcv_by_comp_mask(IN cl_map_item_t * p_map_item, IN void *cxt) +{ + const osm_sir_search_ctxt_t *p_ctxt = cxt; + const osm_switch_t *p_sw = (osm_switch_t *) p_map_item; + const ib_switch_info_record_t *const p_rcvd_rec = p_ctxt->p_rcvd_rec; + const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp; + osm_sa_t *sa = p_ctxt->sa; + ib_net64_t const comp_mask = p_ctxt->comp_mask; + ib_net16_t match_lid = 0; + + OSM_LOG_ENTER(p_ctxt->sa->p_log); + + osm_dump_switch_info_v2(p_ctxt->sa->p_log, &p_sw->switch_info, + FILE_ID, OSM_LOG_VERBOSE); + + if (comp_mask & IB_SWIR_COMPMASK_LID) { + match_lid = p_rcvd_rec->lid; + if (!match_lid) + goto Exit; + } + + sir_rcv_create_sir(sa, p_sw, p_ctxt->p_list, match_lid, p_req_physp); + +Exit: + OSM_LOG_EXIT(p_ctxt->sa->p_log); +} + +void osm_sir_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *sad_mad; + const ib_switch_info_record_t *p_rcvd_rec; + cl_qlist_t rec_list; + osm_sir_search_ctxt_t context; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + sad_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_switch_info_record_t *) ib_sa_mad_get_payload_ptr(sad_mad); + + CL_ASSERT(sad_mad->attr_id == IB_MAD_ATTR_SWITCH_INFO_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (sad_mad->method != IB_MAD_METHOD_GET && + sad_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5305: " + "Unsupported Method (%s) for SwitchInfoRecord request\n", + ib_get_sa_method_str(sad_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5304: " + "Cannot find requester physical port\n"); + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + osm_dump_switch_info_record_v2(sa->p_log, p_rcvd_rec, + FILE_ID, OSM_LOG_DEBUG); + } + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = sad_mad->comp_mask; + context.sa = sa; + context.p_req_physp = p_req_physp; + + /* Go over all switches */ + cl_qmap_apply_func(&sa->p_subn->sw_guid_tbl, sir_rcv_by_comp_mask, + &context); + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_switch_info_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_sa_vlarb_record.c b/opensm/osm_sa_vlarb_record.c new file mode 100644 index 0000000..8cb25fc --- /dev/null +++ b/opensm/osm_sa_vlarb_record.c @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_vlarb_rec_rcv_t. + * This object represents the VLArbitrationRecord Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SA_VLARB_RECORD_C +#include +#include +#include +#include +#include +#include + +#define SA_VLA_RESP_SIZE SA_ITEM_RESP_SIZE(vlarb_rec) + +typedef struct osm_vl_arb_search_ctxt { + const ib_vl_arb_table_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + uint8_t block_num; + cl_qlist_t *p_list; + osm_sa_t *sa; + const osm_physp_t *p_req_physp; +} osm_vl_arb_search_ctxt_t; + +static void sa_vl_arb_create(IN osm_sa_t * sa, IN osm_physp_t * p_physp, + IN osm_vl_arb_search_ctxt_t * p_ctxt, + IN uint8_t block) +{ + osm_sa_item_t *p_rec_item; + uint16_t lid; + + OSM_LOG_ENTER(sa->p_log); + + p_rec_item = malloc(SA_VLA_RESP_SIZE); + if (p_rec_item == NULL) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A02: " + "rec_item alloc failed\n"); + goto Exit; + } + + if (p_physp->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) + lid = p_physp->port_info.base_lid; + else + lid = osm_node_get_base_lid(p_physp->p_node, 0); + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New VLArbitration for: port 0x%016" PRIx64 + ", lid %u, port %u Block:%u\n", + cl_ntoh64(osm_physp_get_port_guid(p_physp)), + cl_ntoh16(lid), osm_physp_get_port_num(p_physp), block); + + memset(p_rec_item, 0, SA_VLA_RESP_SIZE); + + p_rec_item->resp.vlarb_rec.lid = lid; + p_rec_item->resp.vlarb_rec.port_num = osm_physp_get_port_num(p_physp); + p_rec_item->resp.vlarb_rec.block_num = block; + p_rec_item->resp.vlarb_rec.vl_arb_tbl = *(osm_physp_get_vla_tbl(p_physp, block)); + + cl_qlist_insert_tail(p_ctxt->p_list, &p_rec_item->list_item); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_vl_arb_check_physp(IN osm_sa_t * sa, IN osm_physp_t * p_physp, + osm_vl_arb_search_ctxt_t * p_ctxt) +{ + ib_net64_t comp_mask = p_ctxt->comp_mask; + uint8_t block; + + OSM_LOG_ENTER(sa->p_log); + + /* we got here with the phys port - all that's left is to get the right block */ + for (block = 1; block <= 4; block++) { + if (!(comp_mask & IB_VLA_COMPMASK_BLOCK) + || block == p_ctxt->block_num) + sa_vl_arb_create(sa, p_physp, p_ctxt, block); + } + + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_vl_arb_by_comp_mask(osm_sa_t * sa, IN const osm_port_t * p_port, + osm_vl_arb_search_ctxt_t * p_ctxt) +{ + const ib_vl_arb_table_record_t *p_rcvd_rec; + ib_net64_t comp_mask; + osm_physp_t *p_physp; + uint8_t port_num; + uint8_t num_ports; + const osm_physp_t *p_req_physp; + + OSM_LOG_ENTER(sa->p_log); + + p_rcvd_rec = p_ctxt->p_rcvd_rec; + comp_mask = p_ctxt->comp_mask; + port_num = p_rcvd_rec->port_num; + p_req_physp = p_ctxt->p_req_physp; + + /* if this is a switch port we can search all ports + otherwise we must be looking on port 0 */ + if (p_port->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) { + /* we put it in the comp mask and port num */ + port_num = p_port->p_physp->port_num; + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Using Physical Default Port Number: 0x%X (for End Node)\n", + port_num); + comp_mask |= IB_VLA_COMPMASK_OUT_PORT; + } + + if (comp_mask & IB_VLA_COMPMASK_OUT_PORT) { + if (port_num < osm_node_get_num_physp(p_port->p_node)) { + p_physp = + osm_node_get_physp_ptr(p_port->p_node, port_num); + /* check that the p_physp is valid, and that the requester + and the p_physp share a pkey. */ + if (p_physp && + osm_physp_share_pkey(sa->p_log, p_req_physp, p_physp, + sa->p_subn->opt.allow_both_pkeys)) + sa_vl_arb_check_physp(sa, p_physp, p_ctxt); + } else { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A03: " + "Given Physical Port Number: 0x%X is out of range should be < 0x%X\n", + port_num, + osm_node_get_num_physp(p_port->p_node)); + goto Exit; + } + } else { + num_ports = osm_node_get_num_physp(p_port->p_node); + for (port_num = 0; port_num < num_ports; port_num++) { + p_physp = + osm_node_get_physp_ptr(p_port->p_node, port_num); + if (!p_physp) + continue; + + /* if the requester and the p_physp don't share a pkey - + continue */ + if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_physp, + sa->p_subn->opt.allow_both_pkeys)) + continue; + + sa_vl_arb_check_physp(sa, p_physp, p_ctxt); + } + } +Exit: + OSM_LOG_EXIT(sa->p_log); +} + +static void sa_vl_arb_by_comp_mask_cb(IN cl_map_item_t * p_map_item, void *cxt) +{ + const osm_port_t *p_port = (osm_port_t *) p_map_item; + osm_vl_arb_search_ctxt_t *p_ctxt = cxt; + + sa_vl_arb_by_comp_mask(p_ctxt->sa, p_port, p_ctxt); +} + +void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) +{ + osm_sa_t *sa = ctx; + osm_madw_t *p_madw = data; + const ib_sa_mad_t *sad_mad; + const ib_vl_arb_table_record_t *p_rcvd_rec; + const osm_port_t *p_port = NULL; + cl_qlist_t rec_list; + osm_vl_arb_search_ctxt_t context; + ib_api_status_t status = IB_SUCCESS; + ib_net64_t comp_mask; + osm_physp_t *p_req_physp; + + CL_ASSERT(sa); + + OSM_LOG_ENTER(sa->p_log); + + CL_ASSERT(p_madw); + + sad_mad = osm_madw_get_sa_mad_ptr(p_madw); + p_rcvd_rec = + (ib_vl_arb_table_record_t *) ib_sa_mad_get_payload_ptr(sad_mad); + comp_mask = sad_mad->comp_mask; + + CL_ASSERT(sad_mad->attr_id == IB_MAD_ATTR_VLARB_RECORD); + + /* we only support SubnAdmGet and SubnAdmGetTable methods */ + if (sad_mad->method != IB_MAD_METHOD_GET && + sad_mad->method != IB_MAD_METHOD_GETTABLE) { + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A05: " + "Unsupported Method (%s) for a VLArbRecord request\n", + ib_get_sa_method_str(sad_mad->method)); + osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); + goto Exit; + } + + cl_plock_acquire(sa->p_lock); + + /* update the requester physical port */ + p_req_physp = osm_get_physp_by_mad_addr(sa->p_log, sa->p_subn, + osm_madw_get_mad_addr_ptr + (p_madw)); + if (p_req_physp == NULL) { + cl_plock_release(sa->p_lock); + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A04: " + "Cannot find requester physical port\n"); + goto Exit; + } + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Requester port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_req_physp))); + + cl_qlist_init(&rec_list); + + context.p_rcvd_rec = p_rcvd_rec; + context.p_list = &rec_list; + context.comp_mask = sad_mad->comp_mask; + context.sa = sa; + context.block_num = p_rcvd_rec->block_num; + context.p_req_physp = p_req_physp; + + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Got Query Lid:%u(%02X), Port:0x%02X(%02X), Block:0x%02X(%02X)\n", + cl_ntoh16(p_rcvd_rec->lid), + (comp_mask & IB_VLA_COMPMASK_LID) != 0, p_rcvd_rec->port_num, + (comp_mask & IB_VLA_COMPMASK_OUT_PORT) != 0, + p_rcvd_rec->block_num, + (comp_mask & IB_VLA_COMPMASK_BLOCK) != 0); + + /* + If the user specified a LID, it obviously narrows our + work load, since we don't have to search every port + */ + if (comp_mask & IB_VLA_COMPMASK_LID) { + p_port = osm_get_port_by_lid(sa->p_subn, p_rcvd_rec->lid); + if (!p_port) { + status = IB_NOT_FOUND; + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A09: " + "No port found with LID %u\n", + cl_ntoh16(p_rcvd_rec->lid)); + } + } + + if (status == IB_SUCCESS) { + /* if we got a unique port - no need for a port search */ + if (p_port) + /* this does the loop on all the port phys ports */ + sa_vl_arb_by_comp_mask(sa, p_port, &context); + else + cl_qmap_apply_func(&sa->p_subn->port_guid_tbl, + sa_vl_arb_by_comp_mask_cb, &context); + } + + cl_plock_release(sa->p_lock); + + osm_sa_respond(sa, p_madw, sizeof(ib_vl_arb_table_record_t), &rec_list); + +Exit: + OSM_LOG_EXIT(sa->p_log); +} diff --git a/opensm/osm_service.c b/opensm/osm_service.c new file mode 100644 index 0000000..ba5a982 --- /dev/null +++ b/opensm/osm_service.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of service record functions. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SERVICE_C +#include +#include + +void osm_svcr_delete(IN osm_svcr_t * p_svcr) +{ + free(p_svcr); +} + +void osm_svcr_init(IN osm_svcr_t * p_svcr, + IN const ib_service_record_t * p_svc_rec) +{ + CL_ASSERT(p_svcr); + + p_svcr->modified_time = cl_get_time_stamp_sec(); + + /* We track the time left for this service in + an external field to avoid extra cl_ntoh/hton + required for working with the MAD field */ + p_svcr->lease_period = cl_ntoh32(p_svc_rec->service_lease); + p_svcr->service_record = *p_svc_rec; +} + +osm_svcr_t *osm_svcr_new(IN const ib_service_record_t * p_svc_rec) +{ + osm_svcr_t *p_svcr; + + CL_ASSERT(p_svc_rec); + + p_svcr = (osm_svcr_t *) malloc(sizeof(*p_svcr)); + if (p_svcr) { + memset(p_svcr, 0, sizeof(*p_svcr)); + osm_svcr_init(p_svcr, p_svc_rec); + } + + return p_svcr; +} + +static cl_status_t match_rid_of_svc_rec(IN const cl_list_item_t * p_list_item, + IN void *context) +{ + ib_service_record_t *p_svc_rec = (ib_service_record_t *) context; + osm_svcr_t *p_svcr = (osm_svcr_t *) p_list_item; + + if (memcmp(&p_svcr->service_record, p_svc_rec, + sizeof(p_svc_rec->service_id) + + sizeof(p_svc_rec->service_gid) + + sizeof(p_svc_rec->service_pkey))) + return CL_NOT_FOUND; + else + return CL_SUCCESS; +} + +osm_svcr_t *osm_svcr_get_by_rid(IN osm_subn_t const *p_subn, + IN osm_log_t * p_log, + IN ib_service_record_t * p_svc_rec) +{ + cl_list_item_t *p_list_item; + + OSM_LOG_ENTER(p_log); + + p_list_item = cl_qlist_find_from_head(&p_subn->sa_sr_list, + match_rid_of_svc_rec, p_svc_rec); + if (p_list_item == cl_qlist_end(&p_subn->sa_sr_list)) + p_list_item = NULL; + + OSM_LOG_EXIT(p_log); + return (osm_svcr_t *) p_list_item; +} + +void osm_svcr_insert_to_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_svcr_t * p_svcr) +{ + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Inserting new Service Record into Database\n"); + + cl_qlist_insert_head(&p_subn->sa_sr_list, &p_svcr->list_item); + p_subn->p_osm->sa.dirty = TRUE; + + OSM_LOG_EXIT(p_log); +} + +void osm_svcr_remove_from_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, + IN osm_svcr_t * p_svcr) +{ + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Removing Service Record Name:%s ID:0x%016" PRIx64 + " from Database\n", p_svcr->service_record.service_name, + cl_ntoh64(p_svcr->service_record.service_id)); + + cl_qlist_remove_item(&p_subn->sa_sr_list, &p_svcr->list_item); + p_subn->p_osm->sa.dirty = TRUE; + + OSM_LOG_EXIT(p_log); +} diff --git a/opensm/osm_slvl_map_rcv.c b/opensm/osm_slvl_map_rcv.c new file mode 100644 index 0000000..67f0e19 --- /dev/null +++ b/opensm/osm_slvl_map_rcv.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_slvl_rcv_t. + * This object represents the SLtoVL Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SLVL_MAP_RCV_C +#include +#include +#include +#include +#include +#include + +/* + * WE ONLY RECEIVE GET or SET responses + */ +void osm_slvl_rcv_process(IN void *context, IN void *p_data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = p_data; + ib_slvl_table_t *p_slvl_tbl; + ib_smp_t *p_smp; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_node_t *p_node; + osm_slvl_context_t *p_context; + ib_net64_t port_guid; + ib_net64_t node_guid; + uint32_t attr_mod; + uint8_t startinport, endinport, startoutport, endoutport; + uint8_t in_port, out_port; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_context = osm_madw_get_slvl_context_ptr(p_madw); + p_slvl_tbl = ib_smp_get_payload_ptr(p_smp); + + port_guid = p_context->port_guid; + node_guid = p_context->node_guid; + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_SLVL_TABLE); + + if (!sm->p_subn->opt.suppress_sl2vl_mad_status_errors && + ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit2; + } + + cl_plock_excl_acquire(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2C06: " + "No port object for port with GUID 0x%" PRIx64 + "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 + ", TID 0x%" PRIx64 "\n", cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + goto Exit; + } + + p_node = p_port->p_node; + CL_ASSERT(p_node); + + /* in case of a non switch node the attr modifier should be ignored */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { + unsigned num_ports = osm_node_get_num_physp(p_node) - 1; + attr_mod = cl_ntoh32(p_smp->attr_mod); + + if (attr_mod & 0x10000) { + startoutport = ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) ? 0 : 1; + endoutport = osm_node_get_num_physp(p_node) - 1; + } else + startoutport = endoutport = attr_mod & 0xff; + + if (attr_mod & 0x20000) { + startinport = ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) ? 0 : 1; + endinport = osm_node_get_num_physp(p_node) - 1; + } else + startinport = endinport = (attr_mod >> 8) & 0xff; + + if (startinport > num_ports || startoutport > num_ports) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2C07" + "Invalid attribute modifier 0x%x received in" + " response from switch 0x%" PRIx64 "\n", + cl_ntoh32(attr_mod), cl_ntoh64(node_guid)); + goto Exit; + } + + } else { + startoutport = endoutport = p_port->p_physp->port_num; + startinport = endinport = 0; + } + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received SLtoVL GetResp" + " in_port_num %u out_port_num %u with GUID 0x%" PRIx64 + " for parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", + startinport == endinport ? startinport : 0xff, + startoutport == endoutport ? startoutport : 0xff, + cl_ntoh64(port_guid), cl_ntoh64(node_guid), + cl_ntoh64(p_smp->trans_id)); + + osm_dump_slvl_map_table_v2(sm->p_log, port_guid, + startinport == endinport ? startinport : 0xff, + startoutport == endoutport ? startoutport : 0xff, + p_slvl_tbl, FILE_ID, OSM_LOG_DEBUG); + + for (out_port = startoutport; out_port <= endoutport; out_port++) { + p_physp = osm_node_get_physp_ptr(p_node, out_port); + for (in_port = startinport; in_port <= endinport; in_port++) + osm_physp_set_slvl_tbl(p_physp, p_slvl_tbl, in_port); + } + +Exit: + cl_plock_release(sm->p_lock); + +Exit2: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_sm.c b/opensm/osm_sm.c new file mode 100644 index 0000000..e5e14ea --- /dev/null +++ b/opensm/osm_sm.c @@ -0,0 +1,464 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sm_t. + * This object represents the SM Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SM_C +#include +#include +#include +#include +#include +#include +#include + +#define OSM_SM_INITIAL_TID_VALUE 0x1233 + +extern void osm_lft_rcv_process(IN void *context, IN void *data); +extern void osm_mft_rcv_process(IN void *context, IN void *data); +extern void osm_nd_rcv_process(IN void *context, IN void *data); +extern void osm_ni_rcv_process(IN void *context, IN void *data); +extern void osm_pkey_rcv_process(IN void *context, IN void *data); +extern void osm_pi_rcv_process(IN void *context, IN void *data); +extern void osm_gi_rcv_process(IN void *context, IN void *data); +extern void osm_slvl_rcv_process(IN void *context, IN void *p_data); +extern void osm_sminfo_rcv_process(IN void *context, IN void *data); +extern void osm_si_rcv_process(IN void *context, IN void *data); +extern void osm_trap_rcv_process(IN void *context, IN void *data); +extern void osm_vla_rcv_process(IN void *context, IN void *data); +extern void osm_mlnx_epi_rcv_process(IN void *context, IN void *data); + +extern void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal); +extern void osm_sm_state_mgr_polling_callback(IN void *context); + +static void sm_process(osm_sm_t * sm, osm_signal_t signal) +{ +#ifdef ENABLE_OSM_PERF_MGR + if (signal == OSM_SIGNAL_PERFMGR_SWEEP) + osm_perfmgr_process(&sm->p_subn->p_osm->perfmgr); + else +#endif + osm_state_mgr_process(sm, signal); +} + +static void sm_sweeper(IN void *p_ptr) +{ + ib_api_status_t status; + osm_sm_t * p_sm = p_ptr; + unsigned signals, i; + + OSM_LOG_ENTER(p_sm->p_log); + + while (p_sm->thread_state == OSM_THREAD_STATE_RUN) { + /* + * Wait on the event with a timeout. + * Sweeps may be initiated "off schedule" by simply + * signaling the event. + */ + status = cl_event_wait_on(&p_sm->signal_event, + EVENT_NO_TIMEOUT, TRUE); + + if (status == CL_SUCCESS) + OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG, + "Off schedule sweep signalled\n"); + else { + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E01: " + "Event wait failed (%s)\n", + CL_STATUS_MSG(status)); + continue; + } + + if (osm_exit_flag) + break; + + cl_spinlock_acquire(&p_sm->signal_lock); + signals = p_sm->signal_mask; + p_sm->signal_mask = 0; + cl_spinlock_release(&p_sm->signal_lock); + + for (i = 0; signals; signals >>= 1, i++) + if (signals & 1) + sm_process(p_sm, i); + } + + OSM_LOG_EXIT(p_sm->p_log); +} + +static void sm_sweep(void *arg) +{ + osm_sm_t *sm = arg; + + /* do the sweep only if we are in MASTER state */ + if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER || + sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING) + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); + cl_timer_start(&sm->sweep_timer, sm->p_subn->opt.sweep_interval * 1000); +} + +static void sweep_fail_process(IN void *context, IN void *p_data) +{ + osm_sm_t *sm = context; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "light sweep failed\n"); + sm->p_subn->force_heavy_sweep = TRUE; +} + +void osm_sm_construct(IN osm_sm_t * p_sm) +{ + memset(p_sm, 0, sizeof(*p_sm)); + p_sm->thread_state = OSM_THREAD_STATE_NONE; + p_sm->sm_trans_id = OSM_SM_INITIAL_TID_VALUE; + cl_spinlock_construct(&p_sm->signal_lock); + cl_spinlock_construct(&p_sm->state_lock); + cl_timer_construct(&p_sm->polling_timer); + cl_event_construct(&p_sm->signal_event); + cl_event_construct(&p_sm->subnet_up_event); + cl_event_wheel_construct(&p_sm->trap_aging_tracker); + cl_thread_construct(&p_sm->sweeper); + osm_sm_mad_ctrl_construct(&p_sm->mad_ctrl); + osm_lid_mgr_construct(&p_sm->lid_mgr); + osm_ucast_mgr_construct(&p_sm->ucast_mgr); +} + +void osm_sm_shutdown(IN osm_sm_t * p_sm) +{ + boolean_t signal_event = FALSE; + + OSM_LOG_ENTER(p_sm->p_log); + + /* + * Signal our threads that we're leaving. + */ + if (p_sm->thread_state != OSM_THREAD_STATE_NONE) + signal_event = TRUE; + + p_sm->thread_state = OSM_THREAD_STATE_EXIT; + + /* + * Don't trigger unless event has been initialized. + * Destroy the thread before we tear down the other objects. + */ + if (signal_event) + cl_event_signal(&p_sm->signal_event); + + cl_timer_stop(&p_sm->polling_timer); + cl_timer_stop(&p_sm->sweep_timer); + cl_thread_destroy(&p_sm->sweeper); + + /* + * Always destroy controllers before the corresponding + * receiver to guarantee that all callbacks from the + * dispatcher are complete. + */ + osm_sm_mad_ctrl_destroy(&p_sm->mad_ctrl); + cl_disp_unregister(p_sm->ni_disp_h); + cl_disp_unregister(p_sm->pi_disp_h); + cl_disp_unregister(p_sm->gi_disp_h); + cl_disp_unregister(p_sm->si_disp_h); + cl_disp_unregister(p_sm->nd_disp_h); + cl_disp_unregister(p_sm->lft_disp_h); + cl_disp_unregister(p_sm->mft_disp_h); + cl_disp_unregister(p_sm->sm_info_disp_h); + cl_disp_unregister(p_sm->trap_disp_h); + cl_disp_unregister(p_sm->slvl_disp_h); + cl_disp_unregister(p_sm->vla_disp_h); + cl_disp_unregister(p_sm->pkey_disp_h); + cl_disp_unregister(p_sm->mlnx_epi_disp_h); + cl_disp_unregister(p_sm->sweep_fail_disp_h); + + OSM_LOG_EXIT(p_sm->p_log); +} + +void osm_sm_destroy(IN osm_sm_t * p_sm) +{ + OSM_LOG_ENTER(p_sm->p_log); + osm_lid_mgr_destroy(&p_sm->lid_mgr); + osm_ucast_mgr_destroy(&p_sm->ucast_mgr); + cl_event_wheel_destroy(&p_sm->trap_aging_tracker); + cl_timer_destroy(&p_sm->sweep_timer); + cl_timer_destroy(&p_sm->polling_timer); + cl_event_destroy(&p_sm->signal_event); + cl_event_destroy(&p_sm->subnet_up_event); + cl_spinlock_destroy(&p_sm->signal_lock); + cl_spinlock_destroy(&p_sm->state_lock); + free(p_sm->mlids_req); + + osm_log_v2(p_sm->p_log, OSM_LOG_SYS, FILE_ID, "Exiting SM\n"); /* Format Waived */ + OSM_LOG_EXIT(p_sm->p_log); +} + +ib_api_status_t osm_sm_init(IN osm_sm_t * p_sm, IN osm_subn_t * p_subn, + IN osm_db_t * p_db, IN osm_vendor_t * p_vendor, + IN osm_mad_pool_t * p_mad_pool, + IN osm_vl15_t * p_vl15, IN osm_log_t * p_log, + IN osm_stats_t * p_stats, + IN cl_dispatcher_t * p_disp, IN cl_plock_t * p_lock) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(p_log); + + p_sm->p_subn = p_subn; + p_sm->p_db = p_db; + p_sm->p_vendor = p_vendor; + p_sm->p_mad_pool = p_mad_pool; + p_sm->p_vl15 = p_vl15; + p_sm->p_log = p_log; + p_sm->p_disp = p_disp; + p_sm->p_lock = p_lock; + + status = cl_spinlock_init(&p_sm->signal_lock); + if (status != CL_SUCCESS) + goto Exit; + + status = cl_spinlock_init(&p_sm->state_lock); + if (status != CL_SUCCESS) + goto Exit; + + status = cl_event_init(&p_sm->signal_event, FALSE); + if (status != CL_SUCCESS) + goto Exit; + + status = cl_event_init(&p_sm->subnet_up_event, FALSE); + if (status != CL_SUCCESS) + goto Exit; + + status = cl_timer_init(&p_sm->sweep_timer, sm_sweep, p_sm); + if (status != CL_SUCCESS) + goto Exit; + + status = cl_timer_init(&p_sm->polling_timer, + osm_sm_state_mgr_polling_callback, p_sm); + if (status != CL_SUCCESS) + goto Exit; + + p_sm->mlids_req_max = 0; + p_sm->mlids_req = malloc((IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + + 1) * sizeof(p_sm->mlids_req[0])); + if (!p_sm->mlids_req) + goto Exit; + memset(p_sm->mlids_req, 0, + (IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + + 1) * sizeof(p_sm->mlids_req[0])); + + status = osm_sm_mad_ctrl_init(&p_sm->mad_ctrl, p_sm->p_subn, + p_sm->p_mad_pool, p_sm->p_vl15, + p_sm->p_vendor, + p_log, p_stats, p_lock, p_disp); + if (status != IB_SUCCESS) + goto Exit; + + status = cl_event_wheel_init(&p_sm->trap_aging_tracker); + if (status != IB_SUCCESS) + goto Exit; + + status = osm_lid_mgr_init(&p_sm->lid_mgr, p_sm); + if (status != IB_SUCCESS) + goto Exit; + + status = osm_ucast_mgr_init(&p_sm->ucast_mgr, p_sm); + if (status != IB_SUCCESS) + goto Exit; + + status = IB_INSUFFICIENT_RESOURCES; + p_sm->sweep_fail_disp_h = cl_disp_register(p_disp, + OSM_MSG_LIGHT_SWEEP_FAIL, + sweep_fail_process, p_sm); + if (p_sm->sweep_fail_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->ni_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_INFO, + osm_ni_rcv_process, p_sm); + if (p_sm->ni_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->pi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PORT_INFO, + osm_pi_rcv_process, p_sm); + if (p_sm->pi_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->gi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_GUID_INFO, + osm_gi_rcv_process, p_sm); + if (p_sm->gi_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->si_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SWITCH_INFO, + osm_si_rcv_process, p_sm); + if (p_sm->si_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->nd_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_DESC, + osm_nd_rcv_process, p_sm); + if (p_sm->nd_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->lft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LFT, + osm_lft_rcv_process, p_sm); + if (p_sm->lft_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->mft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MFT, + osm_mft_rcv_process, p_sm); + if (p_sm->mft_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->sm_info_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SM_INFO, + osm_sminfo_rcv_process, p_sm); + if (p_sm->sm_info_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->trap_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NOTICE, + osm_trap_rcv_process, p_sm); + if (p_sm->trap_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->slvl_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SLVL, + osm_slvl_rcv_process, p_sm); + if (p_sm->slvl_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->vla_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_VL_ARB, + osm_vla_rcv_process, p_sm); + if (p_sm->vla_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->pkey_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PKEY, + osm_pkey_rcv_process, p_sm); + if (p_sm->pkey_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_sm->mlnx_epi_disp_h = cl_disp_register(p_disp, + OSM_MSG_MAD_MLNX_EXT_PORT_INFO, + osm_mlnx_epi_rcv_process, p_sm); + if (p_sm->mlnx_epi_disp_h == CL_DISP_INVALID_HANDLE) + goto Exit; + + p_subn->sm_state = p_subn->opt.sm_inactive ? + IB_SMINFO_STATE_NOTACTIVE : IB_SMINFO_STATE_DISCOVERING; + osm_report_sm_state(p_sm); + + /* + * Now that the component objects are initialized, start + * the sweeper thread if the user wants sweeping. + */ + p_sm->thread_state = OSM_THREAD_STATE_RUN; + status = cl_thread_init(&p_sm->sweeper, sm_sweeper, p_sm, + "opensm sweeper"); + if (status != IB_SUCCESS) + goto Exit; + + if (p_sm->p_subn->opt.sweep_interval) + cl_timer_start(&p_sm->sweep_timer, + p_sm->p_subn->opt.sweep_interval * 1000); + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +void osm_sm_signal(osm_sm_t * p_sm, osm_signal_t signal) +{ + cl_spinlock_acquire(&p_sm->signal_lock); + p_sm->signal_mask |= 1 << signal; + cl_event_signal(&p_sm->signal_event); + cl_spinlock_release(&p_sm->signal_lock); +} + +void osm_sm_sweep(IN osm_sm_t * p_sm) +{ + OSM_LOG_ENTER(p_sm->p_log); + osm_sm_signal(p_sm, OSM_SIGNAL_SWEEP); + OSM_LOG_EXIT(p_sm->p_log); +} + +ib_api_status_t osm_sm_bind(IN osm_sm_t * p_sm, IN ib_net64_t port_guid) +{ + ib_api_status_t status; + + OSM_LOG_ENTER(p_sm->p_log); + + status = osm_sm_mad_ctrl_bind(&p_sm->mad_ctrl, port_guid); + + if (status != IB_SUCCESS) { + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E10: " + "SM MAD Controller bind failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_sm->p_log); + return status; +} + +void osm_sm_reroute_mlid(osm_sm_t * sm, ib_net16_t mlid) +{ + mlid = cl_ntoh16(mlid) - IB_LID_MCAST_START_HO; + sm->mlids_req[mlid] = 1; + if (sm->mlids_req_max < mlid) + sm->mlids_req_max = mlid; + osm_sm_signal(sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST); + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "rerouting requested for MLID 0x%x\n", + mlid + IB_LID_MCAST_START_HO); +} + +void osm_set_sm_priority(osm_sm_t * sm, uint8_t priority) +{ + uint8_t old_pri = sm->p_subn->opt.sm_priority; + + sm->p_subn->opt.sm_priority = priority; + + if (old_pri < priority && + sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY) + osm_send_trap144(sm, TRAP_144_MASK_SM_PRIORITY_CHANGE); +} diff --git a/opensm/osm_sm_mad_ctrl.c b/opensm/osm_sm_mad_ctrl.c new file mode 100644 index 0000000..57ba7ee --- /dev/null +++ b/opensm/osm_sm_mad_ctrl.c @@ -0,0 +1,926 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sm_mad_ctrl_t. + * This object represents the SM MAD request controller object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SM_MAD_CTRL_C +#include +#include +#include +#include +#include +#include + +/****f* opensm: SM/sm_mad_ctrl_retire_trans_mad + * NAME + * sm_mad_ctrl_retire_trans_mad + * + * DESCRIPTION + * This function handles clean-up of MADs associated with the SM's + * outstanding transactions on the wire. + * + * SYNOPSIS + */ + +static void sm_mad_ctrl_retire_trans_mad(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_madw_t * p_madw) +{ + uint32_t outstanding; + + OSM_LOG_ENTER(p_ctrl->p_log); + + CL_ASSERT(p_madw); + /* + Return the MAD & wrapper to the pool. + */ + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "Retiring MAD with TID 0x%" PRIx64 "\n", + cl_ntoh64(osm_madw_get_smp_ptr(p_madw)->trans_id)); + + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + + outstanding = osm_stats_dec_qp0_outstanding(p_ctrl->p_stats); + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "%u QP0 MADs outstanding%s\n", + p_ctrl->p_stats->qp0_mads_outstanding, + outstanding ? "" : ": wire is clean."); + + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/************/ + +/****f* opensm: SM/sm_mad_ctrl_disp_done_callback + * NAME + * sm_mad_ctrl_disp_done_callback + * + * DESCRIPTION + * This function is the Dispatcher callback that indicates + * a received MAD has been processed by the recipient. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_disp_done_callback(IN void *context, IN void *p_data) +{ + osm_sm_mad_ctrl_t *p_ctrl = context; + osm_madw_t *p_madw = p_data; + ib_smp_t *p_smp; + + OSM_LOG_ENTER(p_ctrl->p_log); + + /* + If the MAD that just finished processing was a response, + then retire the transaction, since we must have generated + the request. + + Otherwise, retire the transaction if a response was expected, + as in the case of a send failure. If a response was not expected, + just put the MAD back in the pool, because the MAD was a query + from some outside agent, e.g. Get(SMInfo) from another SM. + */ + p_smp = osm_madw_get_smp_ptr(p_madw); + if (ib_smp_is_response(p_smp)) { + CL_ASSERT(p_madw->resp_expected == FALSE); + sm_mad_ctrl_retire_trans_mad(p_ctrl, p_madw); + } else if (p_madw->resp_expected == TRUE) + sm_mad_ctrl_retire_trans_mad(p_ctrl, p_madw); + else + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/************/ + +/****f* opensm: SM/sm_mad_ctrl_update_wire_stats + * NAME + * sm_mad_ctrl_update_wire_stats + * + * DESCRIPTION + * Updates wire stats for outstanding MADs and calls the VL15 poller. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_update_wire_stats(IN osm_sm_mad_ctrl_t * p_ctrl) +{ + uint32_t mads_on_wire; + + OSM_LOG_ENTER(p_ctrl->p_log); + + mads_on_wire = + cl_atomic_dec(&p_ctrl->p_stats->qp0_mads_outstanding_on_wire); + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "%u SMPs on the wire, %u outstanding\n", mads_on_wire, + p_ctrl->p_stats->qp0_mads_outstanding); + + /* + We can signal the VL15 controller to send another MAD + if any are waiting for transmission. + */ + osm_vl15_poll(p_ctrl->p_vl15); + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/****f* opensm: SM/sm_mad_ctrl_process_get_resp + * NAME + * sm_mad_ctrl_process_get_resp + * + * DESCRIPTION + * This function handles method GetResp() for received MADs. + * This is the most common path for QP0 MADs. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_process_get_resp(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_madw_t * p_madw, + IN void *transaction_context) +{ + ib_smp_t *p_smp; + cl_status_t status; + osm_madw_t *p_old_madw; + cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; + + OSM_LOG_ENTER(p_ctrl->p_log); + + CL_ASSERT(p_madw); + CL_ASSERT(transaction_context); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR && !ib_smp_is_d(p_smp)) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3102: " + "'D' bit not set in returned SMP\n"); + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + } + + p_old_madw = transaction_context; + + sm_mad_ctrl_update_wire_stats(p_ctrl); + + /* + Copy the MAD Wrapper context from the requesting MAD + to the new MAD. This mechanism allows the recipient + controller to recover its own context regarding this + MAD transaction. Once we've copied the context, we + can return the original MAD to the pool. + */ + osm_madw_copy_context(p_madw, p_old_madw); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_old_madw); + + /* + Note that attr_id (like the rest of the MAD) is in + network byte order. + */ + switch (p_smp->attr_id) { + case IB_MAD_ATTR_NODE_DESC: + msg_id = OSM_MSG_MAD_NODE_DESC; + break; + case IB_MAD_ATTR_NODE_INFO: + msg_id = OSM_MSG_MAD_NODE_INFO; + break; + case IB_MAD_ATTR_GUID_INFO: + msg_id = OSM_MSG_MAD_GUID_INFO; + break; + case IB_MAD_ATTR_SWITCH_INFO: + msg_id = OSM_MSG_MAD_SWITCH_INFO; + break; + case IB_MAD_ATTR_PORT_INFO: + msg_id = OSM_MSG_MAD_PORT_INFO; + break; + case IB_MAD_ATTR_LIN_FWD_TBL: + msg_id = OSM_MSG_MAD_LFT; + break; + case IB_MAD_ATTR_MCAST_FWD_TBL: + msg_id = OSM_MSG_MAD_MFT; + break; + case IB_MAD_ATTR_SM_INFO: + msg_id = OSM_MSG_MAD_SM_INFO; + break; + case IB_MAD_ATTR_SLVL_TABLE: + msg_id = OSM_MSG_MAD_SLVL; + break; + case IB_MAD_ATTR_VL_ARBITRATION: + msg_id = OSM_MSG_MAD_VL_ARB; + break; + case IB_MAD_ATTR_P_KEY_TABLE: + msg_id = OSM_MSG_MAD_PKEY; + break; + case IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO: + msg_id = OSM_MSG_MAD_MLNX_EXT_PORT_INFO; + break; + case IB_MAD_ATTR_CLASS_PORT_INFO: + case IB_MAD_ATTR_NOTICE: + case IB_MAD_ATTR_INFORM_INFO: + default: + cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3103: " + "Unsupported attribute 0x%X (%s)\n", + cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id)); + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + + /* + Post this MAD to the dispatcher for asynchronous + processing by the appropriate controller. + */ + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", + osm_get_disp_msg_str(msg_id)); + + status = cl_disp_post(p_ctrl->h_disp, msg_id, p_madw, + sm_mad_ctrl_disp_done_callback, p_ctrl); + + if (status != CL_SUCCESS) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3104: " + "Dispatcher post message failed (%s) for attribute 0x%X (%s)\n", + CL_STATUS_MSG(status), cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/****f* opensm: SM/sm_mad_ctrl_process_get + * NAME + * sm_mad_ctrl_process_get + * + * DESCRIPTION + * This function handles method Get() for received MADs. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_process_get(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_madw_t * p_madw) +{ + ib_smp_t *p_smp; + cl_status_t status; + cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; + + OSM_LOG_ENTER(p_ctrl->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + /* + Note that attr_id (like the rest of the MAD) is in + network byte order. + */ + switch (p_smp->attr_id) { + case IB_MAD_ATTR_SM_INFO: + msg_id = OSM_MSG_MAD_SM_INFO; + break; + default: + cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, + "Ignoring SubnGet MAD - unsupported attribute 0x%X\n", + cl_ntoh16(p_smp->attr_id)); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + + /* + Post this MAD to the dispatcher for asynchronous + processing by the appropriate controller. + */ + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", + osm_get_disp_msg_str(msg_id)); + + status = cl_disp_post(p_ctrl->h_disp, msg_id, p_madw, + sm_mad_ctrl_disp_done_callback, p_ctrl); + + if (status != CL_SUCCESS) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3106: " + "Dispatcher post message failed (%s)\n", + CL_STATUS_MSG(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* opensm: SM/sm_mad_ctrl_process_set + * NAME + * sm_mad_ctrl_process_set + * + * DESCRIPTION + * This function handles method Set() for received MADs. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_process_set(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_madw_t * p_madw) +{ + ib_smp_t *p_smp; + cl_status_t status; + cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; + + OSM_LOG_ENTER(p_ctrl->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + /* + Note that attr_id (like the rest of the MAD) is in + network byte order. + */ + switch (p_smp->attr_id) { + case IB_MAD_ATTR_SM_INFO: + msg_id = OSM_MSG_MAD_SM_INFO; + break; + default: + cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3107: " + "Unsupported attribute 0x%X (%s)\n", + cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id)); + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + + /* + Post this MAD to the dispatcher for asynchronous + processing by the appropriate controller. + */ + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", + osm_get_disp_msg_str(msg_id)); + + status = cl_disp_post(p_ctrl->h_disp, msg_id, p_madw, + sm_mad_ctrl_disp_done_callback, p_ctrl); + + if (status != CL_SUCCESS) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3108: " + "Dispatcher post message failed (%s)\n", + CL_STATUS_MSG(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* opensm: SM/sm_mad_ctrl_process_trap + * NAME + * sm_mad_ctrl_process_trap + * + * DESCRIPTION + * This function handles method Trap() for received MADs. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_process_trap(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_madw_t * p_madw) +{ + ib_smp_t *p_smp; + cl_status_t status; + cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; + + OSM_LOG_ENTER(p_ctrl->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + /* Make sure OpenSM is master. If not - then we should not process the trap */ + if (p_ctrl->p_subn->sm_state != IB_SMINFO_STATE_MASTER) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "Received trap but OpenSM is not in MASTER state. " + "Dropping mad\n"); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + + /* + Note that attr_id (like the rest of the MAD) is in + network byte order. + */ + switch (p_smp->attr_id) { + case IB_MAD_ATTR_NOTICE: + msg_id = OSM_MSG_MAD_NOTICE; + break; + default: + cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3109: " + "Unsupported attribute 0x%X (%s)\n", + cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id)); + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + + /* + Post this MAD to the dispatcher for asynchronous + processing by the appropriate controller. + */ + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", + osm_get_disp_msg_str(msg_id)); + + status = cl_disp_post(p_ctrl->h_disp, msg_id, p_madw, + sm_mad_ctrl_disp_done_callback, p_ctrl); + + if (status != CL_SUCCESS) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3110: " + "Dispatcher post message failed (%s)\n", + CL_STATUS_MSG(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* opensm: SM/sm_mad_ctrl_process_trap_repress + * NAME + * sm_mad_ctrl_process_trap_repress + * + * DESCRIPTION + * This function handles method TrapRepress() for received MADs. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_process_trap_repress(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_madw_t * p_madw) +{ + ib_smp_t *p_smp; + + OSM_LOG_ENTER(p_ctrl->p_log); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + /* + Note that attr_id (like the rest of the MAD) is in + network byte order. + */ + switch (p_smp->attr_id) { + case IB_MAD_ATTR_NOTICE: + sm_mad_ctrl_update_wire_stats(p_ctrl); + sm_mad_ctrl_retire_trans_mad(p_ctrl, p_madw); + break; + default: + cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3105: " + "Unsupported attribute 0x%X (%s)\n", + cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id)); + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + break; + } + + OSM_LOG_EXIT(p_ctrl->p_log); +} + +static void log_rcv_cb_error(osm_log_t *p_log, ib_smp_t *p_smp, ib_net16_t status) +{ + char buf[BUF_SIZE]; + uint32_t i; + + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) { + char ipath[IB_SUBNET_PATH_HOPS_MAX * 4]; + char rpath[IB_SUBNET_PATH_HOPS_MAX * 4]; + int ni = sprintf(ipath, "%d", p_smp->initial_path[0]); + int nr = sprintf(rpath, "%d", p_smp->return_path[0]); + for (i = 1; i <= p_smp->hop_count; i++) { + ni += sprintf(ipath + ni, ",%d", p_smp->initial_path[i]); + nr += sprintf(rpath + nr, ",%d", p_smp->return_path[i]); + } + snprintf(buf, sizeof(buf), + "\n\t\t\tInitial path: %s Return path: %s", + ipath, rpath); + } + + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3111: " + "Received MAD with error status = 0x%X\n" + "\t\t\t%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 "%s\n", + cl_ntoh16(status), ib_get_sm_method_str(p_smp->method), + ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh32(p_smp->attr_mod), + cl_ntoh64(p_smp->trans_id), + p_smp->mgmt_class == IB_MCLASS_SUBN_DIR ? buf : ""); + + osm_dump_dr_smp_v2(p_log, p_smp, FILE_ID, OSM_LOG_VERBOSE); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* opensm: SM/sm_mad_ctrl_rcv_callback + * NAME + * sm_mad_ctrl_rcv_callback + * + * DESCRIPTION + * This is the callback from the transport layer for received MADs. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, + IN void *bind_context, + IN osm_madw_t * p_req_madw) +{ + osm_sm_mad_ctrl_t *p_ctrl = bind_context; + ib_smp_t *p_smp; + ib_net16_t status; + + OSM_LOG_ENTER(p_ctrl->p_log); + + CL_ASSERT(p_madw); + + /* + A MAD was received from the wire, possibly in response to a request. + */ + cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd); + + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "%u QP0 MADs received\n", + p_ctrl->p_stats->qp0_mads_rcvd); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + /* if we are closing down simply do nothing */ + if (osm_exit_flag) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, + "Ignoring received mad - since we are exiting\n"); + + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_DEBUG); + + /* retire the mad or put it back */ + if (ib_smp_is_response(p_smp)) { + CL_ASSERT(p_madw->resp_expected == FALSE); + sm_mad_ctrl_retire_trans_mad(p_ctrl, p_madw); + } else if (p_madw->resp_expected == TRUE) + sm_mad_ctrl_retire_trans_mad(p_ctrl, p_madw); + else + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + + goto Exit; + } + + if (OSM_LOG_IS_ACTIVE_V2(p_ctrl->p_log, OSM_LOG_FRAMES)) + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_FRAMES); + + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) + status = ib_smp_get_status(p_smp); + else + status = p_smp->status; + + if (status != 0) + log_rcv_cb_error(p_ctrl->p_log, p_smp, status); + + switch (p_smp->method) { + case IB_MAD_METHOD_GET_RESP: + CL_ASSERT(p_req_madw != NULL); + sm_mad_ctrl_process_get_resp(p_ctrl, p_madw, p_req_madw); + break; + case IB_MAD_METHOD_GET: + CL_ASSERT(p_req_madw == NULL); + sm_mad_ctrl_process_get(p_ctrl, p_madw); + break; + case IB_MAD_METHOD_TRAP: + CL_ASSERT(p_req_madw == NULL); + sm_mad_ctrl_process_trap(p_ctrl, p_madw); + break; + case IB_MAD_METHOD_SET: + CL_ASSERT(p_req_madw == NULL); + sm_mad_ctrl_process_set(p_ctrl, p_madw); + break; + case IB_MAD_METHOD_TRAP_REPRESS: + CL_ASSERT(p_req_madw != NULL); + sm_mad_ctrl_process_trap_repress(p_ctrl, p_madw); + break; + case IB_MAD_METHOD_SEND: + case IB_MAD_METHOD_REPORT: + case IB_MAD_METHOD_REPORT_RESP: + default: + cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3112: " + "Unsupported method = 0x%X\n", p_smp->method); + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); + osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* opensm: SM/sm_mad_ctrl_send_err_cb + * NAME + * sm_mad_ctrl_send_err_cb + * + * DESCRIPTION + * This is the callback from the transport layer for send errors + * on MADs that were expecting a response. + * + * SYNOPSIS + */ +static void sm_mad_ctrl_send_err_cb(IN void *context, IN osm_madw_t * p_madw) +{ + osm_sm_mad_ctrl_t *p_ctrl = context; + ib_api_status_t status; + ib_smp_t *p_smp; + + OSM_LOG_ENTER(p_ctrl->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3113: " + "MAD completed in error (%s): " + "%s(%s), attr_mod 0x%x, TID 0x%" PRIx64 "\n", + ib_get_err_str(p_madw->status), + ib_get_sm_method_str(p_smp->method), + ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh32(p_smp->attr_mod), + cl_ntoh64(p_smp->trans_id)); + + /* + If this was a SubnSet MAD, then this error might indicate a problem + in configuring the subnet. In this case - need to mark that there was + such a problem. The subnet will not be up, and the next sweep should + be a heavy sweep as well. + */ + if (p_smp->method == IB_MAD_METHOD_SET && + (p_smp->attr_id == IB_MAD_ATTR_PORT_INFO || + p_smp->attr_id == IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO || + p_smp->attr_id == IB_MAD_ATTR_MCAST_FWD_TBL || + p_smp->attr_id == IB_MAD_ATTR_SWITCH_INFO || + p_smp->attr_id == IB_MAD_ATTR_LIN_FWD_TBL || + p_smp->attr_id == IB_MAD_ATTR_P_KEY_TABLE)) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3119: " + "Set method failed for attribute 0x%X (%s)\n", + cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id)); + p_ctrl->p_subn->subnet_initialization_error = TRUE; + } else if (p_madw->status == IB_TIMEOUT && + p_smp->method == IB_MAD_METHOD_GET) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3120: " + "Timeout while getting attribute 0x%X (%s); " + "Possible mis-set mkey?\n", + cl_ntoh16(p_smp->attr_id), + ib_get_sm_attr_str(p_smp->attr_id)); + } + + osm_dump_dr_smp_v2(p_ctrl->p_log, p_smp, FILE_ID, OSM_LOG_VERBOSE); + + /* + Since we did not get any response we suspect the DR path + used for the target port. + Find it and replace it with an alternate path. + This is true only if the destination lid is not 0xFFFF, since + then we are aiming for a specific path and not specific destination + lid. + */ + /* For now - do not add the alternate dr path to the release */ +#if 0 + if (p_madw->mad_addr.dest_lid != 0xFFFF) { + osm_physp_t *p_physp = osm_get_physp_by_mad_addr(p_ctrl->p_log, + p_ctrl->p_subn, + &(p_madw-> + mad_addr)); + if (!p_physp) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3114: " + "Failed to find the corresponding phys port\n"); + } else { + osm_physp_replace_dr_path_with_alternate_dr_path + (p_ctrl->p_log, p_ctrl->p_subn, p_physp, + p_madw->h_bind); + } + } +#endif + + /* + An error occurred. No response was received to a request MAD. + Retire the original request MAD. + */ + sm_mad_ctrl_update_wire_stats(p_ctrl); + + if (osm_madw_get_err_msg(p_madw) != CL_DISP_MSGID_NONE) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, + "Posting Dispatcher message %s\n", + osm_get_disp_msg_str(osm_madw_get_err_msg(p_madw))); + + status = cl_disp_post(p_ctrl->h_disp, + osm_madw_get_err_msg(p_madw), p_madw, + sm_mad_ctrl_disp_done_callback, p_ctrl); + if (status != CL_SUCCESS) + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3115: " + "Dispatcher post message failed (%s)\n", + CL_STATUS_MSG(status)); + } else + /* + No error message was provided, just retire the MAD. + */ + sm_mad_ctrl_retire_trans_mad(p_ctrl, p_madw); + + OSM_LOG_EXIT(p_ctrl->p_log); +} + +/* + * PARAMETERS + * + * RETURN VALUES + * + * NOTES + * + * SEE ALSO + *********/ + +void osm_sm_mad_ctrl_construct(IN osm_sm_mad_ctrl_t * p_ctrl) +{ + CL_ASSERT(p_ctrl); + memset(p_ctrl, 0, sizeof(*p_ctrl)); + p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; +} + +void osm_sm_mad_ctrl_destroy(IN osm_sm_mad_ctrl_t * p_ctrl) +{ + CL_ASSERT(p_ctrl); + + if (p_ctrl->h_bind != CL_DISP_INVALID_HANDLE) + osm_vendor_unbind(p_ctrl->h_bind); + cl_disp_unregister(p_ctrl->h_disp); +} + +ib_api_status_t osm_sm_mad_ctrl_init(IN osm_sm_mad_ctrl_t * p_ctrl, + IN osm_subn_t * p_subn, + IN osm_mad_pool_t * p_mad_pool, + IN osm_vl15_t * p_vl15, + IN osm_vendor_t * p_vendor, + IN osm_log_t * p_log, + IN osm_stats_t * p_stats, + IN cl_plock_t * p_lock, + IN cl_dispatcher_t * p_disp) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + osm_sm_mad_ctrl_construct(p_ctrl); + + p_ctrl->p_subn = p_subn; + p_ctrl->p_log = p_log; + p_ctrl->p_disp = p_disp; + p_ctrl->p_mad_pool = p_mad_pool; + p_ctrl->p_vendor = p_vendor; + p_ctrl->p_stats = p_stats; + p_ctrl->p_lock = p_lock; + p_ctrl->p_vl15 = p_vl15; + + p_ctrl->h_disp = cl_disp_register(p_disp, CL_DISP_MSGID_NONE, NULL, + NULL); + + if (p_ctrl->h_disp == CL_DISP_INVALID_HANDLE) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3116: " + "Dispatcher registration failed\n"); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +ib_api_status_t osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * p_ctrl, + IN ib_net64_t port_guid) +{ + osm_bind_info_t bind_info; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_ctrl->p_log); + + if (p_ctrl->h_bind != OSM_BIND_INVALID_HANDLE) { + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3117: " + "Multiple binds not allowed\n"); + status = IB_ERROR; + goto Exit; + } + + bind_info.class_version = 1; + bind_info.is_report_processor = FALSE; + bind_info.is_responder = TRUE; + bind_info.is_trap_processor = TRUE; + bind_info.mad_class = IB_MCLASS_SUBN_DIR; + bind_info.port_guid = port_guid; + bind_info.recv_q_size = OSM_SM_DEFAULT_QP0_RCV_SIZE; + bind_info.send_q_size = OSM_SM_DEFAULT_QP0_SEND_SIZE; + bind_info.timeout = p_ctrl->p_subn->opt.transaction_timeout; + bind_info.retries = p_ctrl->p_subn->opt.transaction_retries; + + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, + "Binding to port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); + + p_ctrl->h_bind = osm_vendor_bind(p_ctrl->p_vendor, &bind_info, + p_ctrl->p_mad_pool, + sm_mad_ctrl_rcv_callback, + sm_mad_ctrl_send_err_cb, p_ctrl); + + if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { + status = IB_ERROR; + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3118: " + "Vendor specific bind failed\n"); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_ctrl->p_log); + return status; +} diff --git a/opensm/osm_sm_state_mgr.c b/opensm/osm_sm_state_mgr.c new file mode 100644 index 0000000..2d9c1af --- /dev/null +++ b/opensm/osm_sm_state_mgr.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2002-2013 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sm_state_mgr_t. + * This file implements the SM State Manager object. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SM_STATE_MGR_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void osm_report_sm_state(osm_sm_t * sm) +{ + char buf[64]; + const char *state_str = osm_get_sm_mgr_state_str(sm->p_subn->sm_state); + + osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, "Entering %s state\n", state_str); + snprintf(buf, sizeof(buf), "ENTERING SM %s STATE", state_str); + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, buf); +} + +static boolean_t sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm, uint8_t sm_state) +{ + osm_madw_context_t context; + const osm_port_t *p_port; + ib_api_status_t status; + osm_dr_path_t dr_path; + ib_net64_t guid; + boolean_t sent_req = FALSE; + + OSM_LOG_ENTER(sm->p_log); + + memset(&context, 0, sizeof(context)); + if (sm_state == IB_SMINFO_STATE_STANDBY) { + /* + * We are in STANDBY state - this means we need to poll the + * master SM (according to master_guid). + * Send a query of SubnGet(SMInfo) to the subn + * master_sm_base_lid object. + */ + guid = sm->master_sm_guid; + } else { + /* + * We are not in STANDBY - this means we are in MASTER state - + * so we need to poll the SM that is saved in polling_sm_guid + * under sm. + * Send a query of SubnGet(SMInfo) to that SM. + */ + guid = sm->polling_sm_guid; + } + + /* Verify that SM is not polling itself */ + if (guid == sm->p_subn->sm_port_guid) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "OpenSM doesn't poll itself\n"); + goto Exit; + } + + p_port = osm_get_port_by_guid(sm->p_subn, guid); + + if (p_port == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3203: " + "No port object for GUID 0x%016" PRIx64 "\n", + cl_ntoh64(guid)); + goto Exit; + } + + context.smi_context.port_guid = guid; + context.smi_context.set_method = FALSE; + memcpy(&dr_path, osm_physp_get_dr_path_ptr(p_port->p_physp), sizeof(osm_dr_path_t)); + + status = osm_req_get(sm, &dr_path, + IB_MAD_ATTR_SM_INFO, 0, FALSE, + ib_port_info_get_m_key(&p_port->p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3204: " + "Failure requesting SMInfo (%s)\n", + ib_get_err_str(status)); + else + sent_req = TRUE; + +Exit: + OSM_LOG_EXIT(sm->p_log); + + return (sent_req); +} + +static void sm_state_mgr_start_polling(osm_sm_t * sm) +{ + uint32_t timeout; + cl_status_t cl_status; + + OSM_LOG_ENTER(sm->p_log); + + /* + * Init the retry_number back to zero - need to restart counting + */ + sm->retry_number = 0; + + /* + * Send a SubnGet(SMInfo) query to the current (or new) master found. + */ + CL_PLOCK_ACQUIRE(sm->p_lock); + timeout = sm->p_subn->opt.sminfo_polling_timeout; + sm_state_mgr_send_master_sm_info_req(sm, sm->p_subn->sm_state); + CL_PLOCK_RELEASE(sm->p_lock); + + /* + * Start a timer that will wake up every sminfo_polling_timeout milliseconds. + * The callback of the timer will send a SubnGet(SMInfo) to the Master SM + * and restart the timer + */ + cl_status = cl_timer_start(&sm->polling_timer, timeout); + if (cl_status != CL_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3210: " + "Failed to start polling timer\n"); + + OSM_LOG_EXIT(sm->p_log); +} + +void osm_sm_state_mgr_polling_callback(IN void *context) +{ + osm_sm_t *sm = context; + uint32_t timeout; + cl_status_t cl_status; + uint8_t sm_state; + + OSM_LOG_ENTER(sm->p_log); + + cl_spinlock_acquire(&sm->state_lock); + sm_state = sm->p_subn->sm_state; + cl_spinlock_release(&sm->state_lock); + + CL_PLOCK_ACQUIRE(sm->p_lock); + timeout = sm->p_subn->opt.sminfo_polling_timeout; + + /* + * We can be here in one of two cases: + * 1. We are a STANDBY sm polling on the master SM. + * 2. We are a MASTER sm, waiting for a handover from a remote master sm. + * If we are not in one of these cases - don't need to restart the poller. + */ + if (!((sm_state == IB_SMINFO_STATE_MASTER && + sm->polling_sm_guid != 0) || + sm_state == IB_SMINFO_STATE_STANDBY)) { + CL_PLOCK_RELEASE(sm->p_lock); + goto Exit; + } + + /* + * If we are a STANDBY sm and the osm_exit_flag is set, then let's + * signal the subnet_up. This is relevant for the case of running only + * once. In that case - the program is stuck until this signal is + * received. In other cases - it is not relevant whether or not the + * signal is on - since we are currently in exit flow + */ + if (sm_state == IB_SMINFO_STATE_STANDBY && osm_exit_flag) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Signalling subnet_up_event\n"); + cl_event_signal(&sm->subnet_up_event); + goto Exit; + } + + /* + * If retry number reached the max_retry_number in the subnet opt - call + * osm_sm_state_mgr_process with signal OSM_SM_SIGNAL_POLLING_TIMEOUT + */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "SM State %d (%s), Retry number:%d\n", + sm->p_subn->sm_state, osm_get_sm_mgr_state_str(sm->p_subn->sm_state), + sm->retry_number); + + if (sm->retry_number > sm->p_subn->opt.polling_retry_number) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Reached polling_retry_number value in retry_number. " + "Go to DISCOVERY state\n"); + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_POLLING_TIMEOUT); + goto Exit; + } + + /* Send a SubnGet(SMInfo) request to the remote sm (depends on our state) */ + if (sm_state_mgr_send_master_sm_info_req(sm, sm_state)) { + /* Request sent, increment the retry number */ + sm->retry_number++; + } + + CL_PLOCK_RELEASE(sm->p_lock); + + /* restart the timer */ + cl_status = cl_timer_start(&sm->polling_timer, timeout); + if (cl_status != CL_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3211: " + "Failed to restart polling timer\n"); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void sm_state_mgr_signal_error(osm_sm_t * sm, IN osm_sm_signal_t signal) +{ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3207: " + "Invalid signal %s in state %s\n", + osm_get_sm_mgr_signal_str(signal), + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); +} + +void osm_sm_state_mgr_signal_master_is_alive(osm_sm_t * sm) +{ + OSM_LOG_ENTER(sm->p_log); + sm->retry_number = 0; + OSM_LOG_EXIT(sm->p_log); +} + +ib_api_status_t osm_sm_state_mgr_process(osm_sm_t * sm, + IN osm_sm_signal_t signal) +{ + ib_api_status_t status = IB_SUCCESS; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + /* + * The state lock prevents many race conditions from screwing + * up the state transition process. + */ + cl_spinlock_acquire(&sm->state_lock); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Received signal %s in state %s\n", + osm_get_sm_mgr_signal_str(signal), + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); + + switch (sm->p_subn->sm_state) { + case IB_SMINFO_STATE_DISCOVERING: + switch (signal) { + case OSM_SM_SIGNAL_DISCOVERY_COMPLETED: + /* + * Update the state of the SM to MASTER + */ + /* Turn on the first_time_master_sweep flag */ + sm->p_subn->sm_state = IB_SMINFO_STATE_MASTER; + osm_report_sm_state(sm); + /* + * Make sure to set the subnet master_sm_base_lid + * to the sm_base_lid value + */ + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + sm->p_subn->first_time_master_sweep = TRUE; + sm->p_subn->master_sm_base_lid = + sm->p_subn->sm_base_lid; + CL_PLOCK_RELEASE(sm->p_lock); + break; + case OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED: + /* + * Finished all discovery actions - move to STANDBY + * start the polling + */ + sm->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + osm_report_sm_state(sm); + /* + * Since another SM is doing the LFT config - we should not + * ignore the results of it + */ + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + sm->p_subn->ignore_existing_lfts = FALSE; + CL_PLOCK_RELEASE(sm->p_lock); + sm_state_mgr_start_polling(sm); + break; + case OSM_SM_SIGNAL_HANDOVER: + /* + * Signal for a new sweep. We need to discover the other SM. + * If we already discovered this SM, and got the + * HANDOVER - this means the remote SM is of lower priority. + * In this case we will stop polling it (since it is a lower + * priority SM in STANDBY state). + */ + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + case IB_SMINFO_STATE_STANDBY: + switch (signal) { + case OSM_SM_SIGNAL_POLLING_TIMEOUT: + case OSM_SM_SIGNAL_DISCOVER: + /* + * case 1: Polling timeout occured - this means that the Master SM + * is no longer alive. + * case 2: Got a signal to move to DISCOVERING + * Move to DISCOVERING state and start sweeping + */ + sm->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; + osm_report_sm_state(sm); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + sm->p_subn->coming_out_of_standby = TRUE; + CL_PLOCK_RELEASE(sm->p_lock); + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); + break; + case OSM_SM_SIGNAL_DISABLE: + /* + * Update the state to NOT_ACTIVE + */ + sm->p_subn->sm_state = IB_SMINFO_STATE_NOTACTIVE; + osm_report_sm_state(sm); + break; + case OSM_SM_SIGNAL_HANDOVER: + /* + * Update the state to MASTER, and start sweeping + * OPTIONAL: send ACKNOWLEDGE + */ + /* Turn on the force_first_time_master_sweep flag */ + /* We want full reconfiguration to occur on the first */ + /* master sweep of this SM */ + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + /* + * Make sure to set the subnet master_sm_base_lid + * to the sm_base_lid value + */ + sm->p_subn->master_sm_base_lid = + sm->p_subn->sm_base_lid; + + sm->p_subn->force_first_time_master_sweep = TRUE; + CL_PLOCK_RELEASE(sm->p_lock); + + sm->p_subn->sm_state = IB_SMINFO_STATE_MASTER; + osm_report_sm_state(sm); + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); + break; + case OSM_SM_SIGNAL_ACKNOWLEDGE: + /* + * Do nothing - already moved to STANDBY + */ + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + case IB_SMINFO_STATE_NOTACTIVE: + switch (signal) { + case OSM_SM_SIGNAL_STANDBY: + /* + * Update the state to STANDBY + * start the polling + */ + sm->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + osm_report_sm_state(sm); + sm_state_mgr_start_polling(sm); + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + case IB_SMINFO_STATE_MASTER: + switch (signal) { + case OSM_SM_SIGNAL_POLLING_TIMEOUT: + /* + * We received a polling timeout - this means that we + * waited for a remote master sm to send us a handover, + * but didn't get it, and didn't get a response from + * that remote sm. + * We want to force a heavy sweep - hopefully this + * occurred because the remote sm died, and we'll find + * this out and configure the subnet after a heavy sweep. + * We also want to clear the polling_sm_guid - since + * we are done polling on that remote sm - we are + * sweeping again. + */ + case OSM_SM_SIGNAL_HANDOVER: + /* + * If we received a handover in a master state - then we + * want to force a heavy sweep. This means that either + * we are in a sweep currently - in this case - no + * change, or we are in idle state - since we + * recognized a master SM before - so we want to make a + * heavy sweep and reconfigure the new subnet. + * We also want to clear the polling_sm_guid - since + * we are done polling on that remote sm - we got a + * handover from it. + */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Forcing heavy sweep. Received signal %s\n", + osm_get_sm_mgr_signal_str(signal)); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + sm->polling_sm_guid = 0; + sm->p_subn->force_first_time_master_sweep = TRUE; + CL_PLOCK_RELEASE(sm->p_lock); + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); + break; + case OSM_SM_SIGNAL_HANDOVER_SENT: + /* + * Just sent a HANDOVER signal - move to STANDBY + * start the polling + */ + sm->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + osm_report_sm_state(sm); + sm_state_mgr_start_polling(sm); + break; + case OSM_SM_SIGNAL_WAIT_FOR_HANDOVER: + /* + * We found a remote master SM, and we are waiting for + * it to handover the mastership to us. Need to start + * polling that SM, to make sure it is alive, if it + * isn't - then we should move back to discovering, + * since something must have happened to it. + */ + sm_state_mgr_start_polling(sm); + break; + case OSM_SM_SIGNAL_DISCOVER: + sm->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; + osm_report_sm_state(sm); + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3208: " + "Invalid state %s\n", + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); + + } + + cl_spinlock_release(&sm->state_lock); + + OSM_LOG_EXIT(sm->p_log); + return status; +} + +ib_api_status_t osm_sm_state_mgr_check_legality(osm_sm_t * sm, + IN osm_sm_signal_t signal) +{ + ib_api_status_t status = IB_SUCCESS; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + /* + * The state lock prevents many race conditions from screwing + * up the state transition process. + */ + cl_spinlock_acquire(&sm->state_lock); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received signal %s in state %s\n", + osm_get_sm_mgr_signal_str(signal), + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); + + switch (sm->p_subn->sm_state) { + case IB_SMINFO_STATE_DISCOVERING: + switch (signal) { + case OSM_SM_SIGNAL_DISCOVERY_COMPLETED: + case OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED: + case OSM_SM_SIGNAL_HANDOVER: + status = IB_SUCCESS; + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + case IB_SMINFO_STATE_STANDBY: + switch (signal) { + case OSM_SM_SIGNAL_POLLING_TIMEOUT: + case OSM_SM_SIGNAL_DISCOVER: + case OSM_SM_SIGNAL_DISABLE: + case OSM_SM_SIGNAL_HANDOVER: + case OSM_SM_SIGNAL_ACKNOWLEDGE: + status = IB_SUCCESS; + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + case IB_SMINFO_STATE_NOTACTIVE: + switch (signal) { + case OSM_SM_SIGNAL_STANDBY: + status = IB_SUCCESS; + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + case IB_SMINFO_STATE_MASTER: + switch (signal) { + case OSM_SM_SIGNAL_HANDOVER: + case OSM_SM_SIGNAL_HANDOVER_SENT: + status = IB_SUCCESS; + break; + default: + sm_state_mgr_signal_error(sm, signal); + status = IB_INVALID_PARAMETER; + break; + } + break; + + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3209: " + "Invalid state %s\n", + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); + status = IB_INVALID_PARAMETER; + + } + + cl_spinlock_release(&sm->state_lock); + + OSM_LOG_EXIT(sm->p_log); + return status; +} diff --git a/opensm/osm_sminfo_rcv.c b/opensm/osm_sminfo_rcv.c new file mode 100644 index 0000000..2a1f10e --- /dev/null +++ b/opensm/osm_sminfo_rcv.c @@ -0,0 +1,657 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_sminfo_rcv_t. + * This object represents the SMInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SMINFO_RCV_C +#include +#include +#include +#include +#include +#include +#include + +/********************************************************************** + Return TRUE if the remote sm given (by ib_sm_info_t) is higher, + return FALSE otherwise. + By higher - we mean: SM with higher priority or with same priority + and lower GUID. +**********************************************************************/ +static boolean_t smi_rcv_remote_sm_is_higher(IN osm_sm_t * sm, + IN const ib_sm_info_t * p_rem_smi) +{ + return osm_sm_is_greater_than(ib_sminfo_get_priority(p_rem_smi), + p_rem_smi->guid, + sm->p_subn->opt.sm_priority, + sm->p_subn->sm_port_guid); + +} + +static void smi_rcv_process_get_request(IN osm_sm_t * sm, + IN const osm_madw_t * p_madw, + IN boolean_t fill_sm_key) +{ + uint8_t payload[IB_SMP_DATA_SIZE]; + ib_sm_info_t *p_smi = (ib_sm_info_t *) payload; + ib_api_status_t status; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + /* No real need to grab the lock for this function. */ + memset(payload, 0, sizeof(payload)); + + CL_ASSERT(osm_madw_get_smp_ptr(p_madw)->method == IB_MAD_METHOD_GET); + + p_smi->guid = sm->p_subn->sm_port_guid; + p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent); + p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state | + sm->p_subn->opt.sm_priority << 4); + p_smi->sm_key = fill_sm_key ? sm->p_subn->opt.sm_key : 0; + + status = osm_resp_send(sm, p_madw, 0, payload); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F02: " + "Error sending SMInfo response (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + * Check if the p_smp received is legal. + * Current checks: + * MADHeader:AttributeModifier of ACKNOWLEDGE that was not sent by a + * Standby SM. + * MADHeader:AttributeModifiers of HANDOVER/DISABLE/STANDBY/DISCOVER + * that was not sent by a Master SM. + * FUTURE - TO DO: + * Check that the SM_Key matches. + **********************************************************************/ +static ib_api_status_t smi_rcv_check_set_req_legality(IN const ib_smp_t * p_smp) +{ + ib_sm_info_t *p_smi; + + p_smi = ib_smp_get_payload_ptr(p_smp); + + if (p_smp->attr_mod == IB_SMINFO_ATTR_MOD_ACKNOWLEDGE) { + if (ib_sminfo_get_state(p_smi) == IB_SMINFO_STATE_STANDBY) + return IB_SUCCESS; + } else if (p_smp->attr_mod == IB_SMINFO_ATTR_MOD_HANDOVER || + p_smp->attr_mod == IB_SMINFO_ATTR_MOD_DISABLE || + p_smp->attr_mod == IB_SMINFO_ATTR_MOD_STANDBY || + p_smp->attr_mod == IB_SMINFO_ATTR_MOD_DISCOVER) { + if (ib_sminfo_get_state(p_smi) == IB_SMINFO_STATE_MASTER) + return IB_SUCCESS; + } + + return IB_INVALID_PARAMETER; +} + +static void smi_rcv_process_set_request(IN osm_sm_t * sm, + IN const osm_madw_t * p_madw) +{ + uint8_t payload[IB_SMP_DATA_SIZE]; + ib_smp_t *p_smp; + ib_sm_info_t *p_smi = (ib_sm_info_t *) payload; + ib_sm_info_t *sm_smi; + ib_api_status_t status; + osm_sm_signal_t sm_signal; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + memset(payload, 0, sizeof(payload)); + + p_smp = osm_madw_get_smp_ptr(p_madw); + sm_smi = ib_smp_get_payload_ptr(p_smp); + + if (p_smp->method != IB_MAD_METHOD_SET) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F03: " + "Unsupported set method 0x%X\n", p_smp->method); + goto Exit; + } + + CL_PLOCK_ACQUIRE(sm->p_lock); + + p_smi->guid = sm->p_subn->sm_port_guid; + p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent); + p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state | + sm->p_subn->opt.sm_priority << 4); + p_smi->sm_key = sm->p_subn->opt.sm_key; + + /* Check the legality of the packet */ + status = smi_rcv_check_set_req_legality(p_smp); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F04: " + "Check legality failed. AttributeModifier:0x%X RemoteState:%s\n", + p_smp->attr_mod, + osm_get_sm_mgr_state_str(ib_sminfo_get_state(sm_smi))); + status = osm_resp_send(sm, p_madw, 7, payload); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F05: " + "Error sending SMInfo response (%s)\n", + ib_get_err_str(status)); + CL_PLOCK_RELEASE(sm->p_lock); + goto Exit; + } + + /* translate from IB_SMINFO_ATTR to OSM_SM_SIGNAL */ + switch (p_smp->attr_mod) { + case IB_SMINFO_ATTR_MOD_HANDOVER: + sm_signal = OSM_SM_SIGNAL_HANDOVER; + break; + case IB_SMINFO_ATTR_MOD_ACKNOWLEDGE: + sm_signal = OSM_SM_SIGNAL_ACKNOWLEDGE; + break; + case IB_SMINFO_ATTR_MOD_DISABLE: + sm_signal = OSM_SM_SIGNAL_DISABLE; + break; + case IB_SMINFO_ATTR_MOD_STANDBY: + sm_signal = OSM_SM_SIGNAL_STANDBY; + break; + case IB_SMINFO_ATTR_MOD_DISCOVER: + sm_signal = OSM_SM_SIGNAL_DISCOVER; + break; + default: + /* + This code shouldn't be reached - checked in the + check legality + */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F06: " + "THIS CODE SHOULD NOT BE REACHED!!\n"); + CL_PLOCK_RELEASE(sm->p_lock); + goto Exit; + } + + CL_PLOCK_RELEASE(sm->p_lock); + + /* check legality of the needed transition in the SM state machine */ + status = osm_sm_state_mgr_check_legality(sm, sm_signal); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F07: " + "Failed check of legality of needed SM transition. " + "AttributeModifier:0x%X RemoteState:%s\n", + p_smp->attr_mod, + osm_get_sm_mgr_state_str(ib_sminfo_get_state(sm_smi))); + status = osm_resp_send(sm, p_madw, 7, payload); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F08: " + "Error sending SMInfo response (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* the SubnSet(SMInfo) command is ok. Send a response. */ + status = osm_resp_send(sm, p_madw, 0, payload); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F09: " + "Error sending SMInfo response (%s)\n", + ib_get_err_str(status)); + + /* it is a legal packet - act according to it */ + + /* if the AttributeModifier is STANDBY - need to save on the sm in */ + /* the master_sm_guid variable - the guid of the current master. */ + if (p_smp->attr_mod == IB_SMINFO_ATTR_MOD_STANDBY) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Received a STANDBY signal. Updating " + "sm_state_mgr master_guid: 0x%016" PRIx64 "\n", + cl_ntoh64(sm_smi->guid)); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + sm->master_sm_guid = sm_smi->guid; + CL_PLOCK_RELEASE(sm->p_lock); + } + + status = osm_sm_state_mgr_process(sm, sm_signal); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F10: " + "Error in SM state transition (%s)\n", + ib_get_err_str(status)); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void smi_rcv_process_get_sm(IN osm_sm_t * sm, + IN const osm_remote_sm_t * p_sm, + boolean_t light_sweep) +{ + const ib_sm_info_t *p_smi; + + OSM_LOG_ENTER(sm->p_log); + + p_smi = &p_sm->smi; + + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Detected SM 0x%016" PRIx64 " in state %u (%s)\n", + cl_ntoh64(p_smi->guid), ib_sminfo_get_state(p_smi), + osm_get_sm_mgr_state_str(ib_sminfo_get_state(p_smi))); + + /* Check the state of this SM vs. our own. */ + switch (sm->p_subn->sm_state) { + case IB_SMINFO_STATE_NOTACTIVE: + break; + + case IB_SMINFO_STATE_DISCOVERING: + switch (ib_sminfo_get_state(p_smi)) { + case IB_SMINFO_STATE_NOTACTIVE: + break; + case IB_SMINFO_STATE_MASTER: + sm->master_sm_found = 1; + /* save on the sm the guid of the current master. */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Found master SM. Updating sm_state_mgr master_guid: 0x%016" + PRIx64 "\n", cl_ntoh64(p_smi->guid)); + sm->master_sm_guid = p_smi->guid; + break; + case IB_SMINFO_STATE_DISCOVERING: + case IB_SMINFO_STATE_STANDBY: + if (smi_rcv_remote_sm_is_higher(sm, p_smi)) { + /* the remote is a higher sm - need to stop sweeping */ + sm->master_sm_found = 1; + /* save on the sm the guid of the higher SM we found - */ + /* we will poll it - as long as it lives - we should be in Standby. */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Found higher SM. Updating sm_state_mgr master_guid:" + " 0x%016" PRIx64 "\n", + cl_ntoh64(p_smi->guid)); + sm->master_sm_guid = p_smi->guid; + } + break; + default: + break; + } + break; + + case IB_SMINFO_STATE_STANDBY: + /* if the guid of the SM that sent us this response is equal to the */ + /* p_sm_mgr->master_guid - then this is a signal that the polling */ + switch (ib_sminfo_get_state(p_smi)) { + case IB_SMINFO_STATE_MASTER: + /* This means the master is alive */ + /* Signal that to the SM state mgr */ + osm_sm_state_mgr_signal_master_is_alive(sm); + + if (!smi_rcv_remote_sm_is_higher(sm, p_smi)) + osm_send_trap144(sm, + TRAP_144_MASK_SM_PRIORITY_CHANGE); + break; + case IB_SMINFO_STATE_STANDBY: + /* This should be the response from the sm we are polling. */ + /* If it is - then signal master is alive */ + if (sm->master_sm_guid == p_sm->smi.guid) { + /* Make sure that it is an SM with higher priority than us. + If we started polling it when it was master, and it moved + to standby - then it might be with a lower priority than + us - and then we don't want to continue polling it. */ + if (smi_rcv_remote_sm_is_higher(sm, p_smi)) + osm_sm_state_mgr_signal_master_is_alive + (sm); + } + break; + default: + /* any other state - do nothing */ + break; + } + break; + + case IB_SMINFO_STATE_MASTER: + switch (ib_sminfo_get_state(p_smi)) { + case IB_SMINFO_STATE_MASTER: + /* If this is a response due to our polling, this means that we are + * waiting for a handover from this SM, and it is still alive - + * signal that. If we detected the remote SM with higher priority + * we should init a heavy sweep in order to go STANDBY. If we + * detected a remote SM with lower priority, we should resend trap144 + * as it might not get it and we don't want to wait for a HANDOVER + * forever. + */ + if (sm->polling_sm_guid) { + if (smi_rcv_remote_sm_is_higher(sm, p_smi)) + sm->p_subn->force_heavy_sweep = TRUE; + else { + /* Update master_sm_guid to the GUID of the newly + * found MASTER SM and send trap 144 to it. + */ + sm->master_sm_guid = sm->polling_sm_guid; + osm_send_trap144(sm, TRAP_144_MASK_SM_PRIORITY_CHANGE); + } + osm_sm_state_mgr_signal_master_is_alive(sm); + } else { + /* This is a response we got while sweeping the subnet. + * + * If this is during a heavy sweep, we will handle a case of + * handover needed later on, when the sweep is done and all + * SMs are recognized. + * + * If this is during a light sweep, initiate a heavy sweep + * to initiate handover scenarios. + * + * Note that it does not matter if the remote SM is lower + * or higher priority. If it is lower priority, we must + * wait for it HANDOVER. If it is higher priority, we need + * to HANDOVER to it. Both cases are handled after doing + * a heavy sweep. + */ + if (light_sweep) + sm->p_subn->force_heavy_sweep = TRUE; + } + break; + case IB_SMINFO_STATE_STANDBY: + if (light_sweep && + smi_rcv_remote_sm_is_higher(sm, p_smi)) + sm->p_subn->force_heavy_sweep = TRUE; + break; + default: + /* any other state - do nothing */ + break; + } + break; + + default: + break; + } + + OSM_LOG_EXIT(sm->p_log); +} + +static void smi_rcv_process_get_response(IN osm_sm_t * sm, + IN const osm_madw_t * p_madw) +{ + const ib_smp_t *p_smp; + const ib_sm_info_t *p_smi; + cl_qmap_t *p_sm_tbl; + osm_port_t *p_port; + ib_net64_t port_guid; + osm_remote_sm_t *p_sm; + char buf[256]; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + if (p_smp->method != IB_MAD_METHOD_GET_RESP) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F11: " + "Unsupported response method 0x%X\n", p_smp->method); + goto Exit; + } + + p_smi = ib_smp_get_payload_ptr(p_smp); + p_sm_tbl = &sm->p_subn->sm_guid_tbl; + port_guid = p_smi->guid; + + osm_dump_sm_info_v2(sm->p_log, p_smi, FILE_ID, OSM_LOG_DEBUG); + + /* Check that the sm_key of the found SM is the same as ours, + or is zero. If not - OpenSM should ignore this SM */ + if (sm->p_subn->opt.sm_key != 0 && p_smi->sm_key != sm->p_subn->opt.sm_key) { + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) + sprint_uint8_arr(buf, sizeof(buf), + p_smp->initial_path, p_smp->hop_count + 1); + else + sprintf(buf, "LID %u", + cl_ntoh16(p_madw->mad_addr.addr_type.smi.source_lid)); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F18: " + "Got SM (%s) with sm_key 0x%016" PRIx64 " that doesn't match our " + "local sm_key. Ignoring SMInfo\n", buf, cl_ntoh64(p_smi->sm_key)); + osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, + "Found remote SM (%s) with non-matching sm_key\n", buf); + goto Exit; + } + + /* Determine if we already have another SM object for this SM. */ + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F12: " + "No port object for this SM\n"); + goto _unlock_and_exit; + } + + if (osm_port_get_guid(p_port) != p_smi->guid) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F13: " + "Bogus SM port GUID, Expected 0x%016" PRIx64 + ", Received 0x%016" PRIx64 "\n", + cl_ntoh64(osm_port_get_guid(p_port)), + cl_ntoh64(p_smi->guid)); + goto _unlock_and_exit; + } + + if (port_guid == sm->p_subn->sm_port_guid) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Self query response received - SM port 0x%016" PRIx64 + "\n", cl_ntoh64(port_guid)); + goto _unlock_and_exit; + } + + p_sm = (osm_remote_sm_t *) cl_qmap_get(p_sm_tbl, port_guid); + if (p_sm == (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl)) { + p_sm = malloc(sizeof(*p_sm)); + if (p_sm == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F14: " + "Unable to allocate SM object\n"); + goto _unlock_and_exit; + } + + osm_remote_sm_init(p_sm, p_smi); + + cl_qmap_insert(p_sm_tbl, port_guid, &p_sm->map_item); + } else + /* We already know this SM. Update the SMInfo attribute. */ + p_sm->smi = *p_smi; + + smi_rcv_process_get_sm(sm, p_sm, + osm_madw_get_smi_context_ptr(p_madw)-> + light_sweep); + +_unlock_and_exit: + CL_PLOCK_RELEASE(sm->p_lock); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void smi_rcv_process_set_response(IN osm_sm_t * sm, + IN const osm_madw_t * p_madw) +{ + const ib_smp_t *p_smp; + const ib_sm_info_t *p_smi; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit; + } + + if (p_smp->method != IB_MAD_METHOD_GET_RESP) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F16: " + "Unsupported response method 0x%X\n", p_smp->method); + goto Exit; + } + + p_smi = ib_smp_get_payload_ptr(p_smp); + osm_dump_sm_info_v2(sm->p_log, p_smi, FILE_ID, OSM_LOG_DEBUG); + + /* Check the AttributeModifier */ + if (p_smp->attr_mod != IB_SMINFO_ATTR_MOD_HANDOVER) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F17: " + "Unsupported attribute modifier 0x%X, " + "expected ATTR_MOD_HANDOVER\n", + p_smp->attr_mod); + goto Exit; + } + + /* This is a response on a HANDOVER request - Nothing to do. */ + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +void osm_sminfo_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_smp_t *p_smp; + osm_smi_context_t *p_smi_context; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit; + } + + /* Determine if this is a request for our own SMInfo or if + this is a response to our request for another SM's SMInfo. */ + if (ib_smp_is_response(p_smp)) { + const ib_sm_info_t *p_smi = ib_smp_get_payload_ptr(p_smp); + + /* Get the context - to see if this is a response to a Get or Set method */ + p_smi_context = osm_madw_get_smi_context_ptr(p_madw); + + /* Verify that response is from expected port and there is + no port moving issue. */ + if (p_smi_context->port_guid != p_smi->guid) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F19: " + "Unexpected SM port GUID in response" + ", Expected 0x%016" PRIx64 + ", Received 0x%016" PRIx64 "\n", + cl_ntoh64(p_smi_context->port_guid), + cl_ntoh64(p_smi->guid)); + goto Exit; + } + + if (p_smi_context->set_method == FALSE) + /* this is a response to a Get method */ + smi_rcv_process_get_response(sm, p_madw); + else + /* this is a response to a Set method */ + smi_rcv_process_set_response(sm, p_madw); + } else { + osm_port_t * p_port; + ib_net64_t my_mkey; + uint8_t mpb; + char buf[256]; + + if(!(p_port = osm_get_port_by_guid(sm->p_subn, + sm->p_subn->sm_port_guid))) + goto Exit; + + if (!p_port->p_physp) + goto Exit; + + my_mkey = ib_port_info_get_m_key(&p_port->p_physp->port_info); + mpb = my_mkey ? ib_port_info_get_mpb(&p_port->p_physp->port_info) : 0; + + if (p_smp->method == IB_MAD_METHOD_GET) { + /* M-Key Authentication */ + if (my_mkey && mpb > 1 && my_mkey != p_smp->m_key) { + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) + sprint_uint8_arr(buf, sizeof(buf), + p_smp->return_path, p_smp->hop_count + 1); + else + sprintf(buf, "LID %u", + cl_ntoh16(p_madw->mad_addr.addr_type.smi.source_lid)); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F1A: " + "SMInfo(GET) sender (%s) authentication failure." + "Ignoring SMInfo\n", buf); + goto Exit; + } + /* If protection bits == 1 but MKEY mismatch, return SM-KEY = 0 */ + if (my_mkey && mpb == 1 && my_mkey != p_smp->m_key) + smi_rcv_process_get_request(sm, p_madw, FALSE); + else + smi_rcv_process_get_request(sm, p_madw, TRUE); + } else { + /* M-Key Authentication */ + if (my_mkey && my_mkey != p_smp->m_key) { + if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR) + sprint_uint8_arr(buf, sizeof(buf), + p_smp->return_path, p_smp->hop_count + 1); + else + sprintf(buf, "LID %u", + cl_ntoh16(p_madw->mad_addr.addr_type.smi.source_lid)); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F1B: " + "SMInfo(SET) sender (%s) authentication failure." + "Ignoring SMInfo\n", buf); + goto Exit; + } + /* This should be a SubnSet request */ + smi_rcv_process_set_request(sm, p_madw); + } + } + +Exit: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_state_mgr.c b/opensm/osm_state_mgr.c new file mode 100644 index 0000000..e2079ae --- /dev/null +++ b/opensm/osm_state_mgr.c @@ -0,0 +1,1757 @@ +/* + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_state_mgr_t. + * This file implements the State Manager object. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_STATE_MGR_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void osm_drop_mgr_process(IN osm_sm_t * sm); +extern int osm_qos_setup(IN osm_opensm_t * p_osm); +extern int osm_pkey_mgr_process(IN osm_opensm_t * p_osm); +extern int osm_mcast_mgr_process(IN osm_sm_t * sm, boolean_t config_all); +extern int osm_link_mgr_process(IN osm_sm_t * sm, IN uint8_t state); +extern void osm_guid_mgr_process(IN osm_sm_t * sm); + +static void state_mgr_up_msg(IN const osm_sm_t * sm) +{ + /* + * This message should be written only once - when the + * SM moves to Master state and the subnet is up for + * the first time. + */ + osm_log_v2(sm->p_log, sm->p_subn->first_time_master_sweep ? + OSM_LOG_SYS : OSM_LOG_INFO, FILE_ID, "SUBNET UP\n"); + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + sm->p_subn->opt.sweep_interval ? + "SUBNET UP" : "SUBNET UP (sweep disabled)"); +} + +static void state_mgr_reset_node_count(IN cl_map_item_t * p_map_item, + IN void *context) +{ + osm_node_t *p_node = (osm_node_t *) p_map_item; + + p_node->discovery_count = 0; + + memset(p_node->physp_discovered, 0, sizeof(uint8_t) * p_node->physp_tbl_size); +} + +static void state_mgr_reset_port_count(IN cl_map_item_t * p_map_item, + IN void *context) +{ + osm_port_t *p_port = (osm_port_t *) p_map_item; + + p_port->discovery_count = 0; +} + +static void state_mgr_reset_switch_count(IN cl_map_item_t * p_map_item, + IN void *context) +{ + osm_switch_t *p_sw = (osm_switch_t *) p_map_item; + + if (p_sw->max_lid_ho != 0) + p_sw->need_update = 1; +} + +static void state_mgr_get_sw_info(IN cl_map_item_t * p_object, IN void *context) +{ + osm_node_t *p_node; + osm_physp_t *p_physp; + osm_dr_path_t *p_dr_path; + osm_madw_context_t mad_context; + osm_switch_t *const p_sw = (osm_switch_t *) p_object; + osm_sm_t *sm = context; + ib_api_status_t status; + + OSM_LOG_ENTER(sm->p_log); + + p_node = p_sw->p_node; + p_physp = osm_node_get_physp_ptr(p_node, 0); + p_dr_path = osm_physp_get_dr_path_ptr(p_physp); + + memset(&mad_context, 0, sizeof(mad_context)); + + mad_context.si_context.node_guid = osm_node_get_node_guid(p_node); + mad_context.si_context.set_method = FALSE; + mad_context.si_context.light_sweep = TRUE; + mad_context.si_context.lft_top_change = FALSE; + + status = osm_req_get(sm, p_dr_path, IB_MAD_ATTR_SWITCH_INFO, 0, + FALSE, ib_port_info_get_m_key(&p_physp->port_info), + 0, OSM_MSG_LIGHT_SWEEP_FAIL, &mad_context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3304: " + "Request for SwitchInfo from 0x%" PRIx64 " failed (%s)\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), + ib_get_err_str(status)); + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + Initiate a remote port info request for the given physical port + **********************************************************************/ +static void state_mgr_get_remote_port_info(IN osm_sm_t * sm, + IN osm_physp_t * p_physp) +{ + osm_dr_path_t *p_dr_path; + osm_dr_path_t rem_node_dr_path; + osm_madw_context_t mad_context; + ib_api_status_t status; + + OSM_LOG_ENTER(sm->p_log); + + /* generate a dr path leaving on the physp to the remote node */ + p_dr_path = osm_physp_get_dr_path_ptr(p_physp); + memcpy(&rem_node_dr_path, p_dr_path, sizeof(osm_dr_path_t)); + if (osm_dr_path_extend(&rem_node_dr_path, osm_physp_get_port_num(p_physp))) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332D: " + "DR path with hop count %d couldn't be extended " + "so skipping PortInfo query\n", + p_dr_path->hop_count); + goto Exit; + } + + memset(&mad_context, 0, sizeof(mad_context)); + + mad_context.pi_context.node_guid = + osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp)); + mad_context.pi_context.port_guid = p_physp->port_guid; + mad_context.pi_context.set_method = FALSE; + mad_context.pi_context.light_sweep = TRUE; + mad_context.pi_context.active_transition = FALSE; + mad_context.pi_context.client_rereg = FALSE; + + /* note that with some negative logic - if the query failed it means + * that there is no point in going to heavy sweep */ + status = osm_req_get(sm, &rem_node_dr_path, IB_MAD_ATTR_PORT_INFO, 0, + TRUE, 0, 0, CL_DISP_MSGID_NONE, &mad_context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332E: " + "Request for remote PortInfo from 0x%" PRIx64 " failed (%s)\n", + cl_ntoh64(p_physp->port_guid), ib_get_err_str(status)); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + Initiates a thorough sweep of the subnet. + Used when there is suspicion that something on the subnet has changed. +**********************************************************************/ +static ib_api_status_t state_mgr_sweep_hop_0(IN osm_sm_t * sm) +{ + ib_api_status_t status; + osm_dr_path_t dr_path; + osm_bind_handle_t h_bind; + uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; + + OSM_LOG_ENTER(sm->p_log); + + memset(path_array, 0, sizeof(path_array)); + + /* + * First, get the bind handle. + */ + h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); + if (h_bind != OSM_BIND_INVALID_HANDLE) { + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "INITIATING HEAVY SWEEP"); + /* + * Start the sweep by clearing the port counts, then + * get our own NodeInfo at 0 hops. + */ + CL_PLOCK_ACQUIRE(sm->p_lock); + + cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, + state_mgr_reset_node_count, sm); + + cl_qmap_apply_func(&sm->p_subn->port_guid_tbl, + state_mgr_reset_port_count, sm); + + cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, + state_mgr_reset_switch_count, sm); + + /* Set the in_sweep_hop_0 flag in subn to be TRUE. + * This will indicate the sweeping not to continue beyond the + * the current node. + * This is relevant for the case of SM on switch, since in the + * switch info we need to signal somehow not to continue + * the sweeping. */ + sm->p_subn->in_sweep_hop_0 = TRUE; + + CL_PLOCK_RELEASE(sm->p_lock); + + osm_dr_path_init(&dr_path, 0, path_array); + CL_PLOCK_ACQUIRE(sm->p_lock); + status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, + TRUE, 0, 0, CL_DISP_MSGID_NONE, NULL); + CL_PLOCK_RELEASE(sm->p_lock); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3305: " + "Request for NodeInfo failed (%s)\n", + ib_get_err_str(status)); + } else { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "No bound ports. Deferring sweep...\n"); + status = IB_INVALID_STATE; + } + + OSM_LOG_EXIT(sm->p_log); + return status; +} + +/********************************************************************** + Clear out all existing port lid assignments +**********************************************************************/ +static ib_api_status_t state_mgr_clean_known_lids(IN osm_sm_t * sm) +{ + ib_api_status_t status = IB_SUCCESS; + cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl); + uint32_t i; + + OSM_LOG_ENTER(sm->p_log); + + /* we need a lock here! */ + CL_PLOCK_ACQUIRE(sm->p_lock); + + for (i = 0; i < cl_ptr_vector_get_size(p_vec); i++) + cl_ptr_vector_set(p_vec, i, NULL); + + CL_PLOCK_RELEASE(sm->p_lock); + + OSM_LOG_EXIT(sm->p_log); + return status; +} + +/********************************************************************** + Clear SA cache +**********************************************************************/ +static ib_api_status_t state_mgr_sa_clean(IN osm_sm_t * sm) +{ + ib_api_status_t status = IB_SUCCESS; + cl_qmap_t *p_port_guid_tbl; + osm_assigned_guids_t *p_assigned_guids, *p_next_assigned_guids; + osm_alias_guid_t *p_alias_guid, *p_next_alias_guid; + osm_mcm_port_t *mcm_port; + osm_subn_t * p_subn; + osm_port_t *p_port; + osm_infr_t *p_infr; + osm_svcr_t *p_svcr; + + OSM_LOG_ENTER(sm->p_log); + + p_subn = sm->p_subn; + + /* we need a lock here! */ + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + + if (p_subn->opt.drop_event_subscriptions) { + /* Clean InformInfo records */ + p_infr = (osm_infr_t *) cl_qlist_remove_head(&p_subn->sa_infr_list); + while (p_infr != + (osm_infr_t *) cl_qlist_end(&p_subn->sa_infr_list)) { + osm_infr_delete(p_infr); + p_infr = (osm_infr_t *) cl_qlist_remove_head(&p_subn->sa_infr_list); + } + + /* For now, treat Service Records in same category as InformInfos */ + /* Clean Service records */ + p_svcr = (osm_svcr_t *) cl_qlist_remove_head(&p_subn->sa_sr_list); + while (p_svcr != + (osm_svcr_t *) cl_qlist_end(&p_subn->sa_sr_list)) { + osm_svcr_delete(p_svcr); + p_svcr = (osm_svcr_t *) cl_qlist_remove_head(&p_subn->sa_sr_list); + } + } + + /* Clean Multicast member list on each port */ + p_port_guid_tbl = &p_subn->port_guid_tbl; + for (p_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); + p_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl); + p_port = (osm_port_t *) cl_qmap_next(&p_port->map_item)) { + while (!cl_is_qlist_empty(&p_port->mcm_list)) { + mcm_port = cl_item_obj(cl_qlist_head(&p_port->mcm_list), + mcm_port, list_item); + osm_mgrp_delete_port(p_subn, sm->p_log, mcm_port->mgrp, + p_port); + } + /* Hack - clean alias guid table from physp */ + free(p_port->p_physp->p_guids); + p_port->p_physp->p_guids = NULL; + } + + /* Clean Alias Guid work objects */ + while (cl_qlist_count(&p_subn->alias_guid_list)) + osm_guid_work_obj_delete((osm_guidinfo_work_obj_t *) + cl_qlist_remove_head(&p_subn->alias_guid_list)); + + /* Clean Assigned GUIDs table */ + p_next_assigned_guids = (osm_assigned_guids_t *) + cl_qmap_head(&p_subn->assigned_guids_tbl); + while (p_next_assigned_guids != + (osm_assigned_guids_t *) cl_qmap_end(&p_subn->assigned_guids_tbl)) { + p_assigned_guids = p_next_assigned_guids; + p_next_assigned_guids = (osm_assigned_guids_t *) + cl_qmap_next(&p_assigned_guids->map_item); + cl_qmap_remove_item(&p_subn->assigned_guids_tbl, + &p_assigned_guids->map_item); + osm_assigned_guids_delete(&p_assigned_guids); + } + + /* Clean Alias GUIDs table */ + p_next_alias_guid = (osm_alias_guid_t *) + cl_qmap_head(&p_subn->alias_port_guid_tbl); + while (p_next_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&p_subn->alias_port_guid_tbl)) { + p_alias_guid = p_next_alias_guid; + p_next_alias_guid = (osm_alias_guid_t *) + cl_qmap_next(&p_alias_guid->map_item); + if (osm_alias_guid_get_alias_guid(p_alias_guid) != + osm_alias_guid_get_base_guid(p_alias_guid)) { + /* Clean if it's not base port GUID */ + cl_qmap_remove_item(&p_subn->alias_port_guid_tbl, + &p_alias_guid->map_item); + osm_alias_guid_delete(&p_alias_guid); + } + } + + p_subn->p_osm->sa.dirty = TRUE; + + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG_EXIT(sm->p_log); + return status; +} + +/********************************************************************** + Notifies the transport layer that the local LID has changed, + which give it a chance to update address vectors, etc.. +**********************************************************************/ +static ib_api_status_t state_mgr_notify_lid_change(IN osm_sm_t * sm) +{ + ib_api_status_t status; + osm_bind_handle_t h_bind; + + OSM_LOG_ENTER(sm->p_log); + + /* + * First, get the bind handle. + */ + h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); + if (h_bind == OSM_BIND_INVALID_HANDLE) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3306: " + "No bound ports\n"); + status = IB_ERROR; + goto Exit; + } + + /* + * Notify the transport layer that we changed the local LID. + */ + status = osm_vendor_local_lid_change(h_bind); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3307: " + "Vendor LID update failed (%s)\n", + ib_get_err_str(status)); + +Exit: + OSM_LOG_EXIT(sm->p_log); + return status; +} + +/********************************************************************** + Returns true if the SM port is down. + The SM's port object must exist in the port_guid table. +**********************************************************************/ +static boolean_t state_mgr_is_sm_port_down(IN osm_sm_t * sm) +{ + ib_net64_t port_guid; + osm_port_t *p_port; + osm_physp_t *p_physp; + uint8_t state; + + OSM_LOG_ENTER(sm->p_log); + + port_guid = sm->p_subn->sm_port_guid; + + /* + * If we don't know our own port guid yet, assume the port is down. + */ + if (port_guid == 0) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: " + "SM port GUID unknown\n"); + state = IB_LINK_DOWN; + goto Exit; + } + + CL_ASSERT(port_guid); + + CL_PLOCK_ACQUIRE(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: " + "SM port with GUID:%016" PRIx64 " is unknown\n", + cl_ntoh64(port_guid)); + state = IB_LINK_DOWN; + CL_PLOCK_RELEASE(sm->p_lock); + goto Exit; + } + + p_physp = p_port->p_physp; + + CL_ASSERT(p_physp); + + if (p_port->p_node->sw && + !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info)) + state = IB_LINK_ACTIVE; /* base SP0 */ + else + state = osm_physp_get_port_state(p_physp); + + if (!p_port->discovery_count) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 330A: Failed to discover SM port\n"); + state = IB_LINK_DOWN; + } + + CL_PLOCK_RELEASE(sm->p_lock); + +Exit: + OSM_LOG_EXIT(sm->p_log); + return (state == IB_LINK_DOWN); +} + +/********************************************************************** + Sweeps the node 1 hop away. + This sets off a "chain reaction" that causes discovery of the subnet. + Used when there is suspicion that something on the subnet has changed. +**********************************************************************/ +static ib_api_status_t state_mgr_sweep_hop_1(IN osm_sm_t * sm) +{ + ib_api_status_t status = IB_SUCCESS; + osm_madw_context_t context; + osm_node_t *p_node; + osm_port_t *p_port; + osm_dr_path_t hop_1_path; + ib_net64_t port_guid; + uint8_t port_num; + uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; + uint8_t num_ports; + osm_physp_t *p_ext_physp; + + OSM_LOG_ENTER(sm->p_log); + + /* + * First, get our own port and node objects. + */ + port_guid = sm->p_subn->sm_port_guid; + + CL_ASSERT(port_guid); + + /* Set the in_sweep_hop_0 flag in subn to be FALSE. + * This will indicate the sweeping to continue beyond the + * the current node. + * This is relevant for the case of SM on switch, since in the + * switch info we need to signal that the sweeping should + * continue through the switch. */ + sm->p_subn->in_sweep_hop_0 = FALSE; + + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3310: " + "No SM port object\n"); + status = IB_ERROR; + goto Exit; + } + + p_node = p_port->p_node; + CL_ASSERT(p_node); + + port_num = ib_node_info_get_local_port_num(&p_node->node_info); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Probing hop 1 on local port %u\n", port_num); + + memset(path_array, 0, sizeof(path_array)); + /* the hop_1 operations depend on the type of our node. + * Currently - legal nodes that can host SM are SW and CA */ + switch (osm_node_get_type(p_node)) { + case IB_NODE_TYPE_CA: + case IB_NODE_TYPE_ROUTER: + memset(&context, 0, sizeof(context)); + context.ni_context.node_guid = osm_node_get_node_guid(p_node); + context.ni_context.port_num = port_num; + + path_array[1] = port_num; + + osm_dr_path_init(&hop_1_path, 1, path_array); + CL_PLOCK_ACQUIRE(sm->p_lock); + status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, + TRUE, 0, 0, CL_DISP_MSGID_NONE, &context); + CL_PLOCK_RELEASE(sm->p_lock); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3311: " + "Request for NodeInfo failed (%s)\n", + ib_get_err_str(status)); + break; + + case IB_NODE_TYPE_SWITCH: + /* Need to go over all the ports of the switch, and send a + * node_info from them. This doesn't include the port 0 of the + * switch, which hosts the SM. + * Note: We'll send another switchInfo on port 0, since if no + * ports are connected, we still want to get some response, and + * have the subnet come up. + */ + num_ports = osm_node_get_num_physp(p_node); + for (port_num = 1; port_num < num_ports; port_num++) { + /* go through the port only if the port is not DOWN */ + p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); + if (p_ext_physp && ib_port_info_get_port_state + (&(p_ext_physp->port_info)) > IB_LINK_DOWN) { + memset(&context, 0, sizeof(context)); + context.ni_context.node_guid = + osm_node_get_node_guid(p_node); + context.ni_context.port_num = port_num; + + path_array[1] = port_num; + osm_dr_path_init(&hop_1_path, 1, path_array); + CL_PLOCK_ACQUIRE(sm->p_lock); + status = osm_req_get(sm, &hop_1_path, + IB_MAD_ATTR_NODE_INFO, 0, + TRUE, 0, 0, + CL_DISP_MSGID_NONE, + &context); + CL_PLOCK_RELEASE(sm->p_lock); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3312: " + "Request for NodeInfo failed (%s)\n", + ib_get_err_str(status)); + } + } + break; + + default: + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3313: Unknown node type %d (%s)\n", + osm_node_get_type(p_node), p_node->print_desc); + } + +Exit: + OSM_LOG_EXIT(sm->p_log); + return status; +} + +static void query_sm_info(cl_map_item_t * item, void *cxt) +{ + osm_madw_context_t context; + osm_remote_sm_t *r_sm = cl_item_obj(item, r_sm, map_item); + osm_sm_t *sm = cxt; + ib_api_status_t ret; + osm_port_t *p_port; + + p_port= osm_get_port_by_guid(sm->p_subn, r_sm->smi.guid); + if (p_port == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3340: " + "No port object on given sm object\n"); + return; + } + + context.smi_context.port_guid = r_sm->smi.guid; + context.smi_context.set_method = FALSE; + context.smi_context.light_sweep = TRUE; + + ret = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_port->p_physp), + IB_MAD_ATTR_SM_INFO, 0, FALSE, + ib_port_info_get_m_key(&p_port->p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + if (ret != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3314: " + "Failure requesting SMInfo (%s)\n", + ib_get_err_str(ret)); +} + +static void state_mgr_reset_state_change_bit(IN cl_map_item_t * obj, + IN void *context) +{ + osm_madw_context_t mad_context; + osm_switch_t *p_sw = (osm_switch_t *) obj; + osm_sm_t *sm = context; + osm_node_t *p_node; + osm_physp_t *p_physp; + osm_dr_path_t *p_path; + ib_api_status_t status; + ib_switch_info_t si; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_sw); + + p_node = p_sw->p_node; + + CL_ASSERT(p_node); + + p_physp = osm_node_get_physp_ptr(p_node, 0); + p_path = osm_physp_get_dr_path_ptr(p_physp); + + if (!ib_switch_info_get_state_change(&p_sw->switch_info)) + goto exit; + + si = p_sw->switch_info; + + ib_switch_info_state_change_set(&si); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Resetting PortStateChange on switch GUID 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + mad_context.si_context.light_sweep = FALSE; + mad_context.si_context.node_guid = osm_node_get_node_guid(p_node); + mad_context.si_context.set_method = TRUE; + mad_context.si_context.lft_top_change = FALSE; + + status = osm_req_set(sm, p_path, (uint8_t *) &si, + sizeof(si), IB_MAD_ATTR_SWITCH_INFO, + 0, FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &mad_context); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332A: " + "Sending SwitchInfo attribute failed (%s)\n", + ib_get_err_str(status)); + +exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void state_mgr_update_node_desc(IN cl_map_item_t * obj, IN void *context) +{ + osm_madw_context_t mad_context; + osm_node_t *p_node = (osm_node_t *) obj; + osm_sm_t *sm = context; + osm_physp_t *p_physp = NULL; + unsigned i, num_ports; + ib_api_status_t status; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_node); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Updating NodeDesc for 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + /* get a physp to request from. */ + num_ports = osm_node_get_num_physp(p_node); + for (i = 0; i < num_ports; i++) + if ((p_physp = osm_node_get_physp_ptr(p_node, i))) + break; + + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331C: " + "Failed to find any valid physical port object.\n"); + goto exit; + } + + mad_context.nd_context.node_guid = osm_node_get_node_guid(p_node); + + status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), + IB_MAD_ATTR_NODE_DESC, 0, FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &mad_context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 331B: Failure initiating NodeDescription request " + "(%s) to 0x%016" PRIx64 "\n", ib_get_err_str(status), + cl_ntoh64(osm_node_get_node_guid(p_node))); + +exit: + OSM_LOG_EXIT(sm->p_log); +} + +void osm_reset_switch_state_change_bit(IN osm_opensm_t *osm) +{ + CL_PLOCK_ACQUIRE(&osm->lock); + cl_qmap_apply_func(&osm->subn.sw_guid_tbl, state_mgr_reset_state_change_bit, + &osm->sm); + CL_PLOCK_RELEASE(&osm->lock); +} + +void osm_update_node_desc(IN osm_opensm_t *osm) +{ + CL_PLOCK_ACQUIRE(&osm->lock); + cl_qmap_apply_func(&osm->subn.node_guid_tbl, state_mgr_update_node_desc, + &osm->sm); + CL_PLOCK_RELEASE(&osm->lock); +} + +/********************************************************************** + During a light sweep, check each node to see if the node description + is valid and if not issue a ND query. +**********************************************************************/ +static void state_mgr_get_node_desc(IN cl_map_item_t * obj, IN void *context) +{ + osm_node_t *p_node = (osm_node_t *) obj; + osm_sm_t *sm = context; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_node); + + if (p_node->print_desc + && strcmp(p_node->print_desc, OSM_NODE_DESC_UNKNOWN)) + /* if ND is valid, do nothing */ + goto exit; + + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3319: Unknown node description for node GUID " + "0x%016" PRIx64 ". Reissuing ND query\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + state_mgr_update_node_desc(obj, context); + +exit: + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + Initiates a lightweight sweep of the subnet. + Used during normal sweeps after the subnet is up. +**********************************************************************/ +static ib_api_status_t state_mgr_light_sweep_start(IN osm_sm_t * sm) +{ + ib_api_status_t status = IB_SUCCESS; + osm_bind_handle_t h_bind; + cl_qmap_t *p_sw_tbl; + cl_map_item_t *p_next; + osm_node_t *p_node; + osm_physp_t *p_physp; + uint8_t port_num; + + OSM_LOG_ENTER(sm->p_log); + + p_sw_tbl = &sm->p_subn->sw_guid_tbl; + + /* + * First, get the bind handle. + */ + h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); + if (h_bind == OSM_BIND_INVALID_HANDLE) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "No bound ports. Deferring sweep...\n"); + status = IB_INVALID_STATE; + goto _exit; + } + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING LIGHT SWEEP"); + CL_PLOCK_ACQUIRE(sm->p_lock); + cl_qmap_apply_func(p_sw_tbl, state_mgr_get_sw_info, sm); + CL_PLOCK_RELEASE(sm->p_lock); + + CL_PLOCK_ACQUIRE(sm->p_lock); + cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, state_mgr_get_node_desc, + sm); + CL_PLOCK_RELEASE(sm->p_lock); + + /* now scan the list of physical ports that were not down but have no remote port */ + CL_PLOCK_ACQUIRE(sm->p_lock); + p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl); + while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) { + p_node = (osm_node_t *) p_next; + p_next = cl_qmap_next(p_next); + + for (port_num = 1; port_num < osm_node_get_num_physp(p_node); + port_num++) { + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (p_physp && (osm_physp_get_port_state(p_physp) != + IB_LINK_DOWN) + && !osm_physp_get_remote(p_physp)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3315: " + "Unknown remote side for node 0x%016" + PRIx64 + " (%s) port %u. Adding to light sweep sampling list\n", + cl_ntoh64(osm_node_get_node_guid + (p_node)), + p_node->print_desc, port_num); + + osm_dump_dr_path_v2(sm->p_log, + osm_physp_get_dr_path_ptr + (p_physp), FILE_ID, OSM_LOG_ERROR); + + state_mgr_get_remote_port_info(sm, p_physp); + } + } + } + + cl_qmap_apply_func(&sm->p_subn->sm_guid_tbl, query_sm_info, sm); + + CL_PLOCK_RELEASE(sm->p_lock); + +_exit: + OSM_LOG_EXIT(sm->p_log); + return status; +} + +/********************************************************************** + * Go over all the remote SMs (as updated in the sm_guid_tbl). + * Find if there is a remote sm that is a master SM. + * If there is a remote master SM - return a pointer to it, + * else - return NULL. + **********************************************************************/ +static osm_remote_sm_t *state_mgr_exists_other_master_sm(IN osm_sm_t * sm) +{ + cl_qmap_t *p_sm_tbl; + osm_remote_sm_t *p_sm; + osm_remote_sm_t *p_sm_res = NULL; + osm_node_t *p_node; + + OSM_LOG_ENTER(sm->p_log); + + p_sm_tbl = &sm->p_subn->sm_guid_tbl; + + /* go over all the remote SMs */ + for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl); + p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl); + p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) { + /* If the sm is in MASTER state - return a pointer to it */ + p_node = osm_get_node_by_guid(sm->p_subn, p_sm->smi.guid); + if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Found remote master SM with guid:0x%016" PRIx64 + " (node %s)\n", cl_ntoh64(p_sm->smi.guid), + p_node ? p_node->print_desc : "UNKNOWN"); + p_sm_res = p_sm; + goto Exit; + } + } + +Exit: + OSM_LOG_EXIT(sm->p_log); + return p_sm_res; +} + +/********************************************************************** + * Go over all remote SMs (as updated in the sm_guid_tbl). + * Find the one with the highest priority and lowest guid. + * Compare this SM to the local SM. If the local SM is higher - + * return NULL, if the remote SM is higher - return a pointer to it. + **********************************************************************/ +static osm_remote_sm_t *state_mgr_get_highest_sm(IN osm_sm_t * sm) +{ + cl_qmap_t *p_sm_tbl; + osm_remote_sm_t *p_sm = NULL; + osm_remote_sm_t *p_highest_sm; + uint8_t highest_sm_priority; + ib_net64_t highest_sm_guid; + osm_node_t *p_node; + + OSM_LOG_ENTER(sm->p_log); + + p_sm_tbl = &sm->p_subn->sm_guid_tbl; + + /* Start with the local sm as the standard */ + p_highest_sm = NULL; + highest_sm_priority = sm->p_subn->opt.sm_priority; + highest_sm_guid = sm->p_subn->sm_port_guid; + + /* go over all the remote SMs */ + for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl); + p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl); + p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) { + + /* If the sm is in NOTACTIVE state - continue */ + if (ib_sminfo_get_state(&p_sm->smi) == + IB_SMINFO_STATE_NOTACTIVE) + continue; + + if (osm_sm_is_greater_than(ib_sminfo_get_priority(&p_sm->smi), + p_sm->smi.guid, highest_sm_priority, + highest_sm_guid)) { + /* the new p_sm is with higher priority - update the highest_sm */ + /* to this sm */ + p_highest_sm = p_sm; + highest_sm_priority = + ib_sminfo_get_priority(&p_sm->smi); + highest_sm_guid = p_sm->smi.guid; + } + } + + if (p_highest_sm != NULL) { + p_node = osm_get_node_by_guid(sm->p_subn, p_highest_sm->smi.guid); + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Found higher priority SM with guid: %016" PRIx64 " (node %s)\n", + cl_ntoh64(p_highest_sm->smi.guid), + p_node ? p_node->print_desc : "UNKNOWN"); + } + OSM_LOG_EXIT(sm->p_log); + return p_highest_sm; +} + +/********************************************************************** + * Send SubnSet(SMInfo) SMP with HANDOVER attribute to the + * remote_sm indicated. + **********************************************************************/ +static void state_mgr_send_handover(IN osm_sm_t * sm, IN osm_remote_sm_t * p_sm) +{ + uint8_t payload[IB_SMP_DATA_SIZE]; + ib_sm_info_t *p_smi = (ib_sm_info_t *) payload; + osm_madw_context_t context; + const osm_port_t *p_port; + ib_api_status_t status; + + OSM_LOG_ENTER(sm->p_log); + + /* + * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given. + */ + + memset(&context, 0, sizeof(context)); + p_port = osm_get_port_by_guid(sm->p_subn, p_sm->smi.guid); + if (p_port == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3316: " + "No port object on given remote_sm object\n"); + goto Exit; + } + + /* update the master_guid in the sm_state_mgr object according to */ + /* the guid of the port where the new Master SM should reside. */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Handing over mastership. Updating sm_state_mgr master_guid: %016" + PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid), + p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); + sm->master_sm_guid = p_port->guid; + + context.smi_context.port_guid = p_port->guid; + context.smi_context.set_method = TRUE; + + memset(payload, 0, sizeof(payload)); + p_smi->guid = sm->p_subn->sm_port_guid; + p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent); + p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state | + sm->p_subn->opt.sm_priority << 4); + p_smi->sm_key = sm->p_subn->opt.sm_key; + + CL_PLOCK_ACQUIRE(sm->p_lock); + status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_port->p_physp), + payload, sizeof(payload), IB_MAD_ATTR_SM_INFO, + IB_SMINFO_ATTR_MOD_HANDOVER, FALSE, + ib_port_info_get_m_key(&p_port->p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + CL_PLOCK_RELEASE(sm->p_lock); + + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3317: " + "Failure requesting SMInfo (%s), remote SM at 0x%" + PRIx64 " (node %s)\n", + ib_get_err_str(status), cl_ntoh64(p_port->guid), + p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + * Send Trap 64 on all new ports. + **********************************************************************/ +static void state_mgr_report_new_ports(IN osm_sm_t * sm) +{ + ib_gid_t port_gid; + ib_mad_notice_attr_t notice; + ib_api_status_t status; + ib_net64_t port_guid; + cl_map_item_t *p_next; + osm_port_t *p_port; + uint16_t min_lid_ho; + uint16_t max_lid_ho; + + OSM_LOG_ENTER(sm->p_log); + + CL_PLOCK_ACQUIRE(sm->p_lock); + p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl); + while (p_next != cl_qmap_end(&sm->p_subn->port_guid_tbl)) { + p_port = (osm_port_t *) p_next; + p_next = cl_qmap_next(p_next); + + if (!p_port->is_new) + continue; + + port_guid = osm_port_get_guid(p_port); + /* issue a notice - trap 64 (SM_GID_IN_SERVICE_TRAP) */ + + /* details of the notice */ + notice.generic_type = 0x80 | IB_NOTICE_TYPE_SUBN_MGMT; /* is generic subn mgt type */ + ib_notice_set_prod_type_ho(¬ice, 4); /* A Class Manager generator */ + /* endport becomes reachable */ + notice.g_or_v.generic.trap_num = CL_HTON16(SM_GID_IN_SERVICE_TRAP); /* 64 */ + /* The sm_base_lid is saved in network order already. */ + notice.issuer_lid = sm->p_subn->sm_base_lid; + /* following C14-72.1.1 and table 119 p739 */ + /* we need to provide the GID */ + port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; + port_gid.unicast.interface_id = port_guid; + memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid), + sizeof(ib_gid_t)); + + /* According to page 653 - the issuer gid in this case of trap + * is the SM gid, since the SM is the initiator of this trap. */ + notice.issuer_gid.unicast.prefix = + sm->p_subn->opt.subnet_prefix; + notice.issuer_gid.unicast.interface_id = + sm->p_subn->sm_port_guid; + + status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3318: " + "Error sending trap reports on GUID:0x%016" + PRIx64 " (%s)\n", port_gid.unicast.interface_id, + ib_get_err_str(status)); + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + OSM_LOG(sm->p_log, OSM_LOG_INFO, + "Discovered new port with GUID:0x%016" PRIx64 + " LID range [%u,%u] of node: %s\n", + cl_ntoh64(port_gid.unicast.interface_id), + min_lid_ho, max_lid_ho, + p_port->p_node ? p_port->p_node-> + print_desc : "UNKNOWN"); + + p_port->is_new = 0; + } + CL_PLOCK_RELEASE(sm->p_lock); + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + * Make sure that the lid_port_tbl of the subnet has only the ports + * that are recognized, and in the correct lid place. There could be + * errors if we wanted to assign a certain port with lid X, but that + * request didn't reach the port. In this case port_lid_tbl will have + * the port under lid X, though the port isn't updated with this lid. + * We will run a new heavy sweep (since there were errors in the + * initialization), but here we'll clean the database from incorrect + * information. + **********************************************************************/ +static void state_mgr_check_tbl_consistency(IN osm_sm_t * sm) +{ + cl_qmap_t *p_port_guid_tbl; + osm_port_t *p_port; + osm_port_t *p_next_port; + cl_ptr_vector_t *p_port_lid_tbl; + size_t max_lid, ref_size, curr_size, lid; + osm_port_t *p_port_ref, *p_port_stored; + cl_ptr_vector_t ref_port_lid_tbl; + uint16_t min_lid_ho; + uint16_t max_lid_ho; + uint16_t lid_ho; + + OSM_LOG_ENTER(sm->p_log); + + if (sm->lid_mgr.dirty == FALSE) + goto Exit; + + sm->lid_mgr.dirty = FALSE; + + cl_ptr_vector_construct(&ref_port_lid_tbl); + cl_ptr_vector_init(&ref_port_lid_tbl, + cl_ptr_vector_get_size(&sm->p_subn->port_lid_tbl), + OSM_SUBNET_VECTOR_GROW_SIZE); + + p_port_guid_tbl = &sm->p_subn->port_guid_tbl; + + /* Let's go over all the ports according to port_guid_tbl, + * and add the port to a reference port_lid_tbl. */ + p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); + while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) { + p_port = p_next_port; + p_next_port = + (osm_port_t *) cl_qmap_next(&p_next_port->map_item); + + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++) + cl_ptr_vector_set(&ref_port_lid_tbl, lid_ho, p_port); + } + + p_port_lid_tbl = &sm->p_subn->port_lid_tbl; + + ref_size = cl_ptr_vector_get_size(&ref_port_lid_tbl); + curr_size = cl_ptr_vector_get_size(p_port_lid_tbl); + /* They should be the same, but compare it anyway */ + max_lid = (ref_size > curr_size) ? ref_size : curr_size; + + for (lid = 1; lid < max_lid; lid++) { + p_port_ref = NULL; + p_port_stored = NULL; + cl_ptr_vector_at(p_port_lid_tbl, lid, (void *)&p_port_stored); + cl_ptr_vector_at(&ref_port_lid_tbl, lid, (void *)&p_port_ref); + + if (p_port_stored == p_port_ref) + /* This is the "good" case - both entries are the + * same for this lid. Nothing to do. */ + continue; + + if (p_port_ref == NULL) { + /* There is an object in the subnet database for this + * lid, but no such object exists in the reference + * port_list_tbl. This can occur if we wanted to assign + * a certain port with some lid (different than the one + * pre-assigned to it), and the port didn't get the + * PortInfo Set request. Due to this, the port is + * updated with its original lid in our database + * rather than the new lid we wanted to give it. */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3322: " + "lid %zu is wrongly assigned to port 0x%016" + PRIx64 " (\'%s\' port %u) in port_lid_tbl\n", + lid, + cl_ntoh64(osm_port_get_guid(p_port_stored)), + p_port_stored->p_node->print_desc, + p_port_stored->p_physp->port_num); + } else if (p_port_stored == NULL) + /* There is an object in the new database, but no + * object in our subnet database. This is the matching + * case of the prior check - the port still has its + * original lid. */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3323: " + "port 0x%016" PRIx64 " (\'%s\' port %u)" + " exists in new port_lid_tbl under lid %zu," + " but missing in subnet port_lid_tbl db\n", + cl_ntoh64(osm_port_get_guid(p_port_ref)), + p_port_ref->p_node->print_desc, + p_port_ref->p_physp->port_num, lid); + else { + /* if we reached here then p_port_stored != p_port_ref. + * We were trying to set a lid to p_port_stored, but + * it didn't reach it, and p_port_ref also didn't get + * the lid update. */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3324: " + "lid %zu has port 0x%016" PRIx64 + " (\'%s\' port %u) in new port_lid_tbl db, " + "and port 0x%016" PRIx64 " (\'%s\' port %u)" + " in subnet port_lid_tbl db\n", lid, + cl_ntoh64(osm_port_get_guid(p_port_ref)), + p_port_ref->p_node->print_desc, + p_port_ref->p_physp->port_num, + cl_ntoh64(osm_port_get_guid(p_port_stored)), + p_port_stored->p_node->print_desc, + p_port_stored->p_physp->port_num); + } + + /* + * Clear the lid of the port in order to ignore it + * in routing phase + */ + if (p_port_stored) { + OSM_LOG(sm->p_log, OSM_LOG_INFO, "Clearing Lid for " + "port 0x%016" PRIx64 "\n", + cl_ntoh64(osm_port_get_guid(p_port_stored))); + osm_port_clear_base_lid(p_port_stored); + cl_ptr_vector_set(p_port_lid_tbl, lid, NULL); + } + + /* Make sure we'll do another heavy sweep. */ + sm->p_subn->subnet_initialization_error = TRUE; + } + + cl_ptr_vector_destroy(&ref_port_lid_tbl); + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +static void check_switch_lft(cl_map_item_t * item, void *log) +{ + osm_switch_t *sw = (osm_switch_t *) item; + + if (!sw->new_lft) + return; + + if (memcmp(sw->lft, sw->new_lft, sw->max_lid_ho + 1)) + osm_log_v2(log, OSM_LOG_ERROR, FILE_ID, "ERR 331D: " + "LFT of switch 0x%016" PRIx64 " (%s) is not up to date\n", + cl_ntoh64(sw->p_node->node_info.node_guid), + sw->p_node->print_desc); +} + +int wait_for_pending_transactions(osm_stats_t * stats) +{ +#ifdef HAVE_LIBPTHREAD + pthread_mutex_lock(&stats->mutex); + while (stats->qp0_mads_outstanding && !osm_exit_flag) + pthread_cond_wait(&stats->cond, &stats->mutex); + pthread_mutex_unlock(&stats->mutex); +#else + while (1) { + unsigned count = stats->qp0_mads_outstanding; + if (!count || osm_exit_flag) + break; + cl_event_wait_on(&stats->event, EVENT_NO_TIMEOUT, TRUE); + } +#endif + return osm_exit_flag; +} + +static void do_sweep(osm_sm_t * sm) +{ + ib_api_status_t status; + osm_remote_sm_t *p_remote_sm; + unsigned config_parsed = 0; + + if (sm->p_subn->force_first_time_master_sweep) { + sm->p_subn->force_heavy_sweep = TRUE; + sm->p_subn->coming_out_of_standby = TRUE; + sm->p_subn->first_time_master_sweep = TRUE; + sm->p_subn->force_first_time_master_sweep = FALSE; + } + + /* after subnet initialization error, run heavy sweep */ + if (sm->p_subn->subnet_initialization_error) + sm->p_subn->force_heavy_sweep = TRUE; + + if (sm->p_subn->force_heavy_sweep) { + if (osm_subn_rescan_conf_files(sm->p_subn) < 0) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: " + "osm_subn_rescan_conf_file failed\n"); + else + config_parsed = 1; + } + + if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER && + sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING) + return; + + if (sm->p_subn->coming_out_of_standby) { + /* + * Need to force re-write of sm_base_lid to all ports + * to do that we want all the ports to be considered + * foreign + */ + state_mgr_clean_known_lids(sm); + + /* + * Need to clean SA cache when state changes to STANDBY + * after handover. + */ + state_mgr_sa_clean(sm); + + /* + * Need to reconfigure LFTs, PKEYs, and QoS on all switches + * when coming out of STANDBY + */ + sm->p_subn->need_update = 1; + } + + sm->master_sm_found = 0; + + /* + * If we already have switches, then try a light sweep. + * Otherwise, this is probably our first discovery pass + * or we are connected in loopback. In both cases do a + * heavy sweep. + * Note: If we are connected in loopback we want a heavy + * sweep, since we will not be getting any traps if there is + * a lost connection. + */ + /* if we are in DISCOVERING state - this means it is either in + * initializing or wake up from STANDBY - run the heavy sweep */ + if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) + && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING + && sm->p_subn->opt.force_heavy_sweep == FALSE + && sm->p_subn->force_heavy_sweep == FALSE + && sm->p_subn->force_reroute == FALSE + && sm->p_subn->subnet_initialization_error == FALSE + && (state_mgr_light_sweep_start(sm) == IB_SUCCESS)) { + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + if (!sm->p_subn->force_heavy_sweep) { + if (sm->p_subn->opt.sa_db_dump && + !osm_sa_db_file_dump(sm->p_subn->p_osm)) + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_SA_DB_DUMPED, + NULL); + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "LIGHT SWEEP COMPLETE"); + return; + } + } + + /* + * Unicast cache should be invalidated when subnet re-route is + * requested, and when OpenSM comes out of standby state. + */ + if (sm->p_subn->opt.use_ucast_cache && + (sm->p_subn->force_reroute || sm->p_subn->coming_out_of_standby)) + osm_ucast_cache_invalidate(&sm->ucast_mgr); + + /* + * If we don't need to do a heavy sweep and we want to do a reroute, + * just reroute only. + */ + if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) + && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING + && sm->p_subn->opt.force_heavy_sweep == FALSE + && sm->p_subn->force_heavy_sweep == FALSE + && sm->p_subn->force_reroute == TRUE + && sm->p_subn->subnet_initialization_error == FALSE) { + /* Reset flag */ + sm->p_subn->force_reroute = FALSE; + + /* Re-program the switches fully */ + sm->p_subn->ignore_existing_lfts = TRUE; + + if (osm_ucast_mgr_process(&sm->ucast_mgr)) { + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "REROUTE FAILED"); + return; + } + osm_qos_setup(sm->p_subn->p_osm); + + /* Reset flag */ + sm->p_subn->ignore_existing_lfts = FALSE; + + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + osm_congestion_control_setup(sm->p_subn->p_osm); + + if (osm_congestion_control_wait_pending_transactions(sm->p_subn->p_osm)) + return; + + if (!sm->p_subn->subnet_initialization_error) { + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "REROUTE COMPLETE"); + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_UCAST_ROUTING_DONE, + (void *) UCAST_ROUTING_REROUTE); + return; + } + } + + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_HEAVY_SWEEP_START, NULL); + + /* go to heavy sweep */ +repeat_discovery: + + /* First of all - unset all flags */ + sm->p_subn->force_heavy_sweep = FALSE; + sm->p_subn->force_reroute = FALSE; + sm->p_subn->subnet_initialization_error = FALSE; + + /* Reset tracking values in case limiting component got removed + * from fabric. */ + sm->p_subn->min_ca_mtu = IB_MAX_MTU; + sm->p_subn->min_ca_rate = IB_RATE_MAX; + sm->p_subn->min_data_vls = IB_MAX_NUM_VLS - 1; + sm->p_subn->min_sw_data_vls = IB_MAX_NUM_VLS - 1; + + /* rescan configuration updates */ + if (!config_parsed && osm_subn_rescan_conf_files(sm->p_subn) < 0) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: " + "osm_subn_rescan_conf_file failed\n"); + + if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) + sm->p_subn->need_update = 1; + + status = state_mgr_sweep_hop_0(sm); + if (status != IB_SUCCESS || + wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + if (state_mgr_is_sm_port_down(sm) == TRUE) { + if (sm->p_subn->last_sm_port_state) { + sm->p_subn->last_sm_port_state = 0; + state_mgr_sa_clean(sm); + osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, + "SM port is down\n"); + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "SM PORT DOWN"); + } + + /* Run the drop manager - we want to clear all records */ + osm_drop_mgr_process(sm); + + /* Move to DISCOVERING state */ + if (sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING) + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER); + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_STATE_CHANGE, NULL); + return; + } else { + if (!sm->p_subn->last_sm_port_state) { + sm->p_subn->last_sm_port_state = 1; + osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, + "SM port is up\n"); + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "SM PORT UP"); + } + } + + status = state_mgr_sweep_hop_1(sm); + if (status != IB_SUCCESS || + wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + /* discovery completed - check other sm presence */ + if (sm->master_sm_found) { + /* + * Call the sm_state_mgr with signal + * MASTER_OR_HIGHER_SM_DETECTED_DONE + */ + osm_sm_state_mgr_process(sm, + OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED); + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "ENTERING STANDBY STATE"); + /* notify master SM about us */ + osm_send_trap144(sm, 0); + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_STATE_CHANGE, NULL); + return; + } + + /* if new sweep requested - don't bother with the rest */ + if (sm->p_subn->force_heavy_sweep) { + config_parsed = 0; + goto repeat_discovery; + } + + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_HEAVY_SWEEP_DONE, NULL); + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE"); + + osm_drop_mgr_process(sm); + + /* If we are MASTER - get the highest remote_sm, and + * see if it is higher than our local sm. + */ + if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) { + p_remote_sm = state_mgr_get_highest_sm(sm); + if (p_remote_sm != NULL) { + /* report new ports (trap 64) before leaving MASTER */ + state_mgr_report_new_ports(sm); + + /* need to handover the mastership + * to the remote sm, and move to standby */ + state_mgr_send_handover(sm, p_remote_sm); + osm_sm_state_mgr_process(sm, + OSM_SM_SIGNAL_HANDOVER_SENT); + return; + } else { + /* We are the highest sm - check to see if there is + * a remote SM that is in master state. */ + p_remote_sm = state_mgr_exists_other_master_sm(sm); + if (p_remote_sm != NULL) { + /* There is a remote SM that is master. + * need to wait for that SM to relinquish control + * of its portion of the subnet. C14-60.2.1. + * Also - need to start polling on that SM. */ + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + sm->polling_sm_guid = p_remote_sm->smi.guid; + CL_PLOCK_RELEASE(sm->p_lock); + osm_sm_state_mgr_process(sm, + OSM_SM_SIGNAL_WAIT_FOR_HANDOVER); + return; + } else if (sm->polling_sm_guid) { + /* Stop polling SM if it's not found */ + osm_sm_state_mgr_process(sm, + OSM_SM_SIGNAL_POLLING_TIMEOUT); + return; + } + } + } + + /* + * If we are not MASTER already - this means that we are + * in discovery state. call osm_sm_state_mgr with signal + * DISCOVERY_COMPLETED + */ + if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING) + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED); + + osm_reset_switch_state_change_bit(sm->p_subn->p_osm); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + osm_pkey_mgr_process(sm->p_subn->p_osm); + + /* try to restore SA DB (this should be before lid_mgr + because we may want to disable clients reregistration + when SA DB is restored) */ + osm_sa_db_file_load(sm->p_subn->p_osm); + + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "PKEY setup completed - STARTING SM LID CONFIG"); + + osm_lid_mgr_process_sm(&sm->lid_mgr); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG"); + state_mgr_notify_lid_change(sm); + + osm_lid_mgr_process_subnet(&sm->lid_mgr); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + /* At this point we need to check the consistency of + * the port_lid_tbl under the subnet. There might be + * errors in it if PortInfo Set requests didn't reach + * their destination. */ + state_mgr_check_tbl_consistency(sm); + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG"); + + /* + * Proceed with unicast forwarding table configuration; if it fails + * return early to wait for a trap or the next sweep interval. + */ + + if (!sm->ucast_mgr.cache_valid || + osm_ucast_cache_process(&sm->ucast_mgr)) { + if (osm_ucast_mgr_process(&sm->ucast_mgr)) { + osm_ucast_cache_invalidate(&sm->ucast_mgr); + return; + } + } + + osm_qos_setup(sm->p_subn->p_osm); + + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + /* We are done setting all LFTs so clear the ignore existing. + * From now on, as long as we are still master, we want to + * take into account these lfts. */ + sm->p_subn->ignore_existing_lfts = FALSE; + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "SWITCHES CONFIGURED FOR UNICAST"); + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_UCAST_ROUTING_DONE, + (void *) UCAST_ROUTING_HEAVY_SWEEP); + + if (!sm->p_subn->opt.disable_multicast) { + osm_mcast_mgr_process(sm, TRUE); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "SWITCHES CONFIGURED FOR MULTICAST"); + } + + osm_guid_mgr_process(sm); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "ALIAS GUIDS CONFIGURED"); + + /* + * The LINK_PORTS state is required since we cannot count on + * the port state change MADs to succeed. This is an artifact + * of the spec defining state change from state X to state X + * as an error. The hardware then is not required to process + * other parameters provided by the Set(PortInfo) Packet. + */ + + osm_link_mgr_process(sm, IB_LINK_NO_CHANGE); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE"); + + osm_link_mgr_process(sm, IB_LINK_ARMED); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, + "LINKS ARMED - SET LINKS TO ACTIVE STATE"); + + osm_link_mgr_process(sm, IB_LINK_ACTIVE); + if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) + return; + + /* + * The sweep completed! + */ + + /* Now do GSI configuration */ + + osm_congestion_control_setup(sm->p_subn->p_osm); + + if (osm_congestion_control_wait_pending_transactions(sm->p_subn->p_osm)) + return; + + /* + * Send trap 64 on newly discovered endports + */ + state_mgr_report_new_ports(sm); + + /* check switch lft buffers assignments */ + cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, check_switch_lft, + sm->p_log); + + /* in any case we zero this flag */ + sm->p_subn->coming_out_of_standby = FALSE; + sm->p_subn->first_time_master_sweep = FALSE; + + /* If there were errors - then the subnet is not really up */ + if (sm->p_subn->subnet_initialization_error == TRUE) { + osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, + "Errors during initialization\n"); + OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR, + "ERRORS DURING INITIALIZATION"); + } else { + sm->p_subn->need_update = 0; + osm_dump_all(sm->p_subn->p_osm); + state_mgr_up_msg(sm); + + if ((OSM_LOG_IS_ACTIVE_V2(sm->p_log, OSM_LOG_VERBOSE) || + sm->p_subn->opt.sa_db_dump) && + !osm_sa_db_file_dump(sm->p_subn->p_osm)) + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_SA_DB_DUMPED, + NULL); + } + + /* + * Finally signal the subnet up event + */ + cl_event_signal(&sm->subnet_up_event); + + /* if we got a signal to force heavy sweep or errors + * in the middle of the sweep - try another sweep. */ + if (sm->p_subn->force_heavy_sweep) + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); + + /* Write a new copy of our persistent guid2mkey database */ + osm_db_store(sm->p_subn->p_g2m, sm->p_subn->opt.fsync_high_avail_files); + osm_db_store(sm->p_subn->p_neighbor, + sm->p_subn->opt.fsync_high_avail_files); + + osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP, + NULL); +} + +static void do_process_mgrp_queue(osm_sm_t * sm) +{ + if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) + return; + if (!sm->p_subn->opt.disable_multicast) { + osm_mcast_mgr_process(sm, FALSE); + wait_for_pending_transactions(&sm->p_subn->p_osm->stats); + } +} + +static void do_process_guid_queue(osm_sm_t *sm) +{ + osm_guid_mgr_process(sm); + wait_for_pending_transactions(&sm->p_subn->p_osm->stats); +} + +void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal) +{ + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received signal %s in state %s\n", + osm_get_sm_signal_str(signal), + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); + + switch (signal) { + case OSM_SIGNAL_SWEEP: + if (!sm->p_subn->sweeping_enabled) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "sweeping disabled - " + "ignoring signal %s in state %s\n", + osm_get_sm_signal_str(signal), + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); + } else + do_sweep(sm); + break; + case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST: + do_process_mgrp_queue(sm); + break; + case OSM_SIGNAL_GUID_PROCESS_REQUEST: + do_process_guid_queue(sm); + break; + default: + CL_ASSERT(FALSE); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3320: " + "Invalid SM signal %u\n", signal); + break; + } + + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_subnet.c b/opensm/osm_subnet.c new file mode 100644 index 0000000..b8473fc --- /dev/null +++ b/opensm/osm_subnet.c @@ -0,0 +1,3148 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. + * Copyright (c) 2009 System Fabric Works, Inc. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2009-2015 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_subn_t. + * This object represents an IBA subnet. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SUBNET_C +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char null_str[] = "(null)"; + +#define OPT_OFFSET(opt) offsetof(osm_subn_opt_t, opt) +#define ARR_SIZE(a) (sizeof(a)/sizeof((a)[0])) + +typedef struct opt_rec { + const char *name; + unsigned long opt_offset; + void (*parse_fn)(osm_subn_t *p_subn, char *p_key, char *p_val_str, + void *p_val1, void *p_val2, + void (*)(osm_subn_t *, void *)); + void (*setup_fn)(osm_subn_t *p_subn, void *p_val); + int can_update; +} opt_rec_t; + +static const char *module_name_str[] = { + "main.c", + "osm_console.c", + "osm_console_io.c", + "osm_db_files.c", + "osm_db_pack.c", + "osm_drop_mgr.c", + "osm_dump.c", + "osm_event_plugin.c", + "osm_guid_info_rcv.c", + "osm_guid_mgr.c", + "osm_helper.c", + "osm_inform.c", + "osm_lid_mgr.c", + "osm_lin_fwd_rcv.c", + "osm_link_mgr.c", + "osm_log.c", + "osm_mad_pool.c", + "osm_mcast_fwd_rcv.c", + "osm_mcast_mgr.c", + "osm_mcast_tbl.c", + "osm_mcm_port.c", + "osm_mesh.c", + "osm_mlnx_ext_port_info_rcv.c", + "osm_mtree.c", + "osm_multicast.c", + "osm_node.c", + "osm_node_desc_rcv.c", + "osm_node_info_rcv.c", + "osm_opensm.c", + "osm_perfmgr.c", + "osm_perfmgr_db.c", + "osm_pkey.c", + "osm_pkey_mgr.c", + "osm_pkey_rcv.c", + "osm_port.c", + "osm_port_info_rcv.c", + "osm_prtn.c", + "osm_prtn_config.c", + "osm_qos.c", + "osm_qos_parser_l.c", + "osm_qos_parser_y.c", + "osm_qos_policy.c", + "osm_remote_sm.c", + "osm_req.c", + "osm_resp.c", + "osm_router.c", + "osm_sa.c", + "osm_sa_class_port_info.c", + "osm_sa_guidinfo_record.c", + "osm_sa_informinfo.c", + "osm_sa_lft_record.c", + "osm_sa_link_record.c", + "osm_sa_mad_ctrl.c", + "osm_sa_mcmember_record.c", + "osm_sa_mft_record.c", + "osm_sa_multipath_record.c", + "osm_sa_node_record.c", + "osm_sa_path_record.c", + "osm_sa_pkey_record.c", + "osm_sa_portinfo_record.c", + "osm_sa_service_record.c", + "osm_sa_slvl_record.c", + "osm_sa_sminfo_record.c", + "osm_sa_sw_info_record.c", + "osm_sa_vlarb_record.c", + "osm_service.c", + "osm_slvl_map_rcv.c", + "osm_sm.c", + "osm_sminfo_rcv.c", + "osm_sm_mad_ctrl.c", + "osm_sm_state_mgr.c", + "osm_state_mgr.c", + "osm_subnet.c", + "osm_sw_info_rcv.c", + "osm_switch.c", + "osm_torus.c", + "osm_trap_rcv.c", + "osm_ucast_cache.c", + "osm_ucast_dnup.c", + "osm_ucast_file.c", + "osm_ucast_ftree.c", + "osm_ucast_lash.c", + "osm_ucast_mgr.c", + "osm_ucast_updn.c", + "osm_vendor_ibumad.c", + "osm_vl15intf.c", + "osm_vl_arb_rcv.c", + "st.c", + "osm_ucast_dfsssp.c", + "osm_congestion_control.c", + "osm_ucast_nue.c", + /* Add new module names here ... */ + /* FILE_ID define in those modules must be identical to index here */ + /* last FILE_ID is currently 90 */ +}; + +#define MOD_NAME_STR_UNKNOWN_VAL (ARR_SIZE(module_name_str)) + +static int find_module_name(const char *name, uint8_t *file_id) +{ + uint8_t i; + + for (i = 0; i < MOD_NAME_STR_UNKNOWN_VAL; i++) { + if (strcmp(name, module_name_str[i]) == 0) { + if (file_id) + *file_id = i; + return 0; + } + } + return 1; +} + +static void log_report(const char *fmt, ...) +{ + char buf[128]; + va_list args; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + printf("%s", buf); + cl_log_event("OpenSM", CL_LOG_INFO, buf, NULL, 0); +} + +static void log_config_value(char *name, const char *fmt, ...) +{ + char buf[128]; + va_list args; + unsigned n; + va_start(args, fmt); + n = snprintf(buf, sizeof(buf), " Loading Cached Option:%s = ", name); + if (n > sizeof(buf)) + n = sizeof(buf); + n += vsnprintf(buf + n, sizeof(buf) - n, fmt, args); + if (n > sizeof(buf) - 2) + n = sizeof(buf) - 2; + snprintf(buf + n, sizeof(buf) - n, "\n"); + va_end(args); + printf("%s", buf); + cl_log_event("OpenSM", CL_LOG_INFO, buf, NULL, 0); +} + +static void opts_setup_log_flags(osm_subn_t *p_subn, void *p_val) +{ + p_subn->p_osm->log.level = *((uint8_t *) p_val); +} + +static void opts_setup_force_log_flush(osm_subn_t *p_subn, void *p_val) +{ + p_subn->p_osm->log.flush = *((boolean_t *) p_val); +} + +static void opts_setup_accum_log_file(osm_subn_t *p_subn, void *p_val) +{ + p_subn->p_osm->log.accum_log_file = *((boolean_t *) p_val); +} + +static void opts_setup_log_max_size(osm_subn_t *p_subn, void *p_val) +{ + uint32_t log_max_size = *((uint32_t *) p_val); + + p_subn->p_osm->log.max_size = (unsigned long)log_max_size << 20; /* convert from MB to bytes */ +} + +static void opts_setup_sminfo_polling_timeout(osm_subn_t *p_subn, void *p_val) +{ + osm_sm_t *p_sm = &p_subn->p_osm->sm; + uint32_t sminfo_polling_timeout = *((uint32_t *) p_val); + + cl_timer_stop(&p_sm->polling_timer); + cl_timer_start(&p_sm->polling_timer, sminfo_polling_timeout); +} + +static void opts_setup_sm_priority(osm_subn_t *p_subn, void *p_val) +{ + osm_sm_t *p_sm = &p_subn->p_osm->sm; + uint8_t sm_priority = *((uint8_t *) p_val); + + osm_set_sm_priority(p_sm, sm_priority); +} + +static int opts_strtoul(uint32_t *val, IN char *p_val_str, + IN char *p_key, uint32_t max_value) +{ + char *endptr; + unsigned long int tmp_val; + + errno = 0; + tmp_val = strtoul(p_val_str, &endptr, 0); + *val = tmp_val; + if (*p_val_str == '\0' || *endptr != '\0') { + log_report("-E- Parsing error in field %s, expected " + "numeric input received: %s\n", p_key, p_val_str); + return -1; + } + if (tmp_val > max_value || + ((tmp_val == ULONG_MAX) && errno == ERANGE)) { + log_report("-E- Parsing error in field %s, value out of range\n", p_key); + return -1; + } + return 0; +} + +static int opts_strtoull(uint64_t *val, IN char *p_val_str, + IN char *p_key, uint64_t max_value) +{ + char *endptr; + unsigned long long int tmp_val; + + errno = 0; + tmp_val = strtoull(p_val_str, &endptr, 0); + *val = tmp_val; + if (*p_val_str == '\0' || *endptr != '\0') { + log_report("-E- Parsing error in field %s, expected " + "numeric input received: %s\n", p_key, p_val_str); + return -1; + } + if (tmp_val > max_value || (tmp_val == ULLONG_MAX && errno == ERANGE)) { + log_report("-E- Parsing error in field %s, value out of range\n", p_key); + return -1; + } + return 0; +} + +static void opts_parse_net64(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + uint64_t *p_val1 = p_v1, *p_val2 = p_v2; + uint64_t val; + + if (opts_strtoull(&val, p_val_str, p_key, UINT64_MAX)) + return; + + if (cl_hton64(val) != *p_val1) { + log_config_value(p_key, "0x%016" PRIx64, val); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = cl_ntoh64(val); + } +} + +static void opts_parse_uint32(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + uint32_t *p_val1 = p_v1, *p_val2 = p_v2; + uint32_t val; + + if (opts_strtoul(&val, p_val_str, p_key, UINT32_MAX)) + return; + + if (val != *p_val1) { + log_config_value(p_key, "%u", val); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = val; + } +} + +static void opts_parse_net32(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + uint32_t *p_val1 = p_v1, *p_val2 = p_v2; + uint32_t val; + + if (opts_strtoul(&val, p_val_str, p_key, UINT32_MAX)) + return; + + if (cl_hton32(val) != *p_val1) { + log_config_value(p_key, "%u", val); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = cl_hton32(val); + } +} + +static void opts_parse_int32(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + int32_t *p_val1 = p_v1, *p_val2 = p_v2; + int32_t val = strtol(p_val_str, NULL, 0); + + if (val != *p_val1) { + log_config_value(p_key, "%d", val); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = val; + } +} + +static void opts_parse_uint16(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + uint16_t *p_val1 = p_v1, *p_val2 = p_v2; + uint32_t tmp_val; + + if (opts_strtoul(&tmp_val, p_val_str, p_key, UINT16_MAX)) + return; + + uint16_t val = (uint16_t) tmp_val; + if (val != *p_val1) { + log_config_value(p_key, "%u", val); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = val; + } +} + +static void opts_parse_net16(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + uint16_t *p_val1 = p_v1, *p_val2 = p_v2; + uint32_t tmp_val; + + if (opts_strtoul(&tmp_val, p_val_str, p_key, UINT16_MAX)) + return; + + uint16_t val = (uint16_t) tmp_val; + if (cl_hton16(val) != *p_val1) { + log_config_value(p_key, "0x%04x", val); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = cl_hton16(val); + } +} + +static void opts_parse_uint8(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + uint8_t *p_val1 = p_v1, *p_val2 = p_v2; + uint32_t tmp_val; + + if (opts_strtoul(&tmp_val, p_val_str, p_key, UINT8_MAX)) + return; + + uint8_t val = (uint8_t) tmp_val; + if (val != *p_val1) { + log_config_value(p_key, "%u", val); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = val; + } +} + +static void opts_parse_boolean(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + boolean_t *p_val1 = p_v1, *p_val2 = p_v2; + boolean_t val; + + if (!p_val_str) + return; + + if (strcmp("TRUE", p_val_str)) + val = FALSE; + else + val = TRUE; + + if (val != *p_val1) { + log_config_value(p_key, "%s", p_val_str); + if (pfn) + pfn(p_subn, &val); + *p_val1 = *p_val2 = val; + } +} + +static void opts_parse_charp(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + char **p_val1 = p_v1, **p_val2 = p_v2; + const char *current_str = *p_val1 ? *p_val1 : null_str; + + if (p_val_str && strcmp(p_val_str, current_str)) { + char *new; + log_config_value(p_key, "%s", p_val_str); + /* special case the "(null)" string */ + new = strcmp(null_str, p_val_str) ? strdup(p_val_str) : NULL; + if (pfn) + pfn(p_subn, new); + if (*p_val1 && *p_val1 != *p_val2) + free(*p_val1); + if (*p_val2) + free(*p_val2); + *p_val1 = *p_val2 = new; + } +} + +static void opts_parse_256bit(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + uint8_t *p_val1 = p_v1, *p_val2 = p_v2; + uint8_t val[IB_CC_PORT_MASK_DATA_SIZE] = { 0 }; + char tmpbuf[3] = { 0 }; + uint8_t tmpint; + int numdigits = 0; + int startindex; + char *strptr = p_val_str; + char *ptr; + int i; + + /* parse like it's hypothetically a 256 bit integer code + * + * store "big endian" + */ + + if (!strncmp(strptr, "0x", 2) || !strncmp(strptr, "0X", 2)) + strptr+=2; + + for (ptr = strptr; *ptr; ptr++) { + if (!isxdigit(*ptr)) { + log_report("invalid hex digit in bitmask\n"); + return; + } + numdigits++; + } + + if (!numdigits) { + log_report("invalid length bitmask\n"); + return; + } + + /* max of 2 hex chars per byte */ + if (numdigits > IB_CC_PORT_MASK_DATA_SIZE * 2) + numdigits = IB_CC_PORT_MASK_DATA_SIZE * 2; + + startindex = IB_CC_PORT_MASK_DATA_SIZE - ((numdigits - 1) / 2) - 1; + + if (numdigits % 2) { + memcpy(tmpbuf, strptr, 1); + strptr += 1; + } + else { + memcpy(tmpbuf, strptr, 2); + strptr += 2; + } + + tmpint = strtoul(tmpbuf, NULL, 16); + val[startindex] = tmpint; + + for (i = (startindex + 1); i < IB_CC_PORT_MASK_DATA_SIZE; i++) { + memcpy(tmpbuf, strptr, 2); + strptr += 2; + tmpint = strtoul(tmpbuf, NULL, 16); + val[i] = tmpint; + } + + if (memcmp(val, p_val1, IB_CC_PORT_MASK_DATA_SIZE)) { + log_config_value(p_key, "%s", p_val_str); + if (pfn) + pfn(p_subn, val); + memcpy(p_val1, val, IB_CC_PORT_MASK_DATA_SIZE); + memcpy(p_val2, val, IB_CC_PORT_MASK_DATA_SIZE); + } + +} + +static void opts_parse_cct_entry(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + osm_cct_entry_t *p_cct1 = p_v1, *p_cct2 = p_v2; + osm_cct_entry_t cct; + char buf[512] = { 0 }; + char *ptr; + + strncpy(buf, p_val_str, 511); + + if (!(ptr = strchr(buf, ':'))) { + log_report("invalid CCT entry\n"); + return; + } + + *ptr = '\0'; + ptr++; + + cct.shift = strtoul(buf, NULL, 0); + cct.multiplier = strtoul(ptr, NULL, 0); + + if (cct.shift != p_cct1->shift + || cct.multiplier != p_cct1->multiplier) { + log_config_value(p_key, "%s", p_val_str); + if (pfn) + pfn(p_subn, &cct); + p_cct1->shift = p_cct2->shift = cct.shift; + p_cct1->multiplier = p_cct2->multiplier = cct.multiplier; + } +} + +static void opts_parse_cc_cct(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + osm_cct_t *p_val1 = p_v1, *p_val2 = p_v2; + const char *current_str = p_val1->input_str ? p_val1->input_str : null_str; + + if (p_val_str && strcmp(p_val_str, current_str)) { + osm_cct_t newcct; + char *new; + unsigned int len = 0; + char *lasts; + char *tok; + char *ptr; + + /* special case the "(null)" string */ + new = strcmp(null_str, p_val_str) ? strdup(p_val_str) : NULL; + + if (!new) { + log_config_value(p_key, "%s", p_val_str); + if (pfn) + pfn(p_subn, NULL); + memset(p_val1->entries, '\0', sizeof(p_val1->entries)); + memset(p_val2->entries, '\0', sizeof(p_val2->entries)); + p_val1->entries_len = p_val2->entries_len = 0; + p_val1->input_str = p_val2->input_str = NULL; + return; + } + + memset(&newcct, '\0', sizeof(newcct)); + + tok = strtok_r(new, ",", &lasts); + while (tok && len < OSM_CCT_ENTRY_MAX) { + + if (!(ptr = strchr(tok, ':'))) { + log_report("invalid CCT entry\n"); + free(new); + return; + } + *ptr = '\0'; + ptr++; + + newcct.entries[len].shift = strtoul(tok, NULL, 0); + newcct.entries[len].multiplier = strtoul(ptr, NULL, 0); + len++; + tok = strtok_r(NULL, ",", &lasts); + } + + free(new); + + newcct.entries_len = len; + newcct.input_str = strdup(p_val_str); + + log_config_value(p_key, "%s", p_val_str); + if (pfn) + pfn(p_subn, &newcct); + if (p_val1->input_str && p_val1->input_str != p_val2->input_str) + free(p_val1->input_str); + if (p_val2->input_str) + free(p_val2->input_str); + memcpy(p_val1->entries, newcct.entries, sizeof(newcct.entries)); + memcpy(p_val2->entries, newcct.entries, sizeof(newcct.entries)); + p_val1->entries_len = p_val2->entries_len = newcct.entries_len; + p_val1->input_str = p_val2->input_str = newcct.input_str; + } +} + +static int parse_ca_cong_common(char *p_val_str, uint8_t *sl, unsigned int *val_offset) { + char *new, *lasts, *sl_str, *val_str; + uint8_t sltmp; + + new = strcmp(null_str, p_val_str) ? strdup(p_val_str) : NULL; + if (!new) + return -1; + + sl_str = strtok_r(new, " \t", &lasts); + val_str = strtok_r(NULL, " \t", &lasts); + + if (!val_str) { + log_report("value must be specified in addition to SL\n"); + free(new); + return -1; + } + + sltmp = strtoul(sl_str, NULL, 0); + if (sltmp >= IB_CA_CONG_ENTRY_DATA_SIZE) { + log_report("invalid SL specified\n"); + free(new); + return -1; + } + + *sl = sltmp; + *val_offset = (unsigned int)(val_str - new); + + free(new); + return 0; +} + +static void opts_parse_ccti_timer(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + osm_cacongestion_entry_t *p_val1 = p_v1, *p_val2 = p_v2; + unsigned int val_offset = 0; + uint8_t sl = 0; + + if (parse_ca_cong_common(p_val_str, &sl, &val_offset) < 0) + return; + + opts_parse_net16(p_subn, p_key, p_val_str + val_offset, + &p_val1[sl].ccti_timer, + &p_val2[sl].ccti_timer, + pfn); +} + +static void opts_parse_ccti_increase(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + osm_cacongestion_entry_t *p_val1 = p_v1, *p_val2 = p_v2; + unsigned int val_offset = 0; + uint8_t sl = 0; + + if (parse_ca_cong_common(p_val_str, &sl, &val_offset) < 0) + return; + + opts_parse_uint8(p_subn, p_key, p_val_str + val_offset, + &p_val1[sl].ccti_increase, + &p_val2[sl].ccti_increase, + pfn); +} + +static void opts_parse_trigger_threshold(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + osm_cacongestion_entry_t *p_val1 = p_v1, *p_val2 = p_v2; + unsigned int val_offset = 0; + uint8_t sl = 0; + + if (parse_ca_cong_common(p_val_str, &sl, &val_offset) < 0) + return; + + opts_parse_uint8(p_subn, p_key, p_val_str + val_offset, + &p_val1[sl].trigger_threshold, + &p_val2[sl].trigger_threshold, + pfn); +} + +static void opts_parse_ccti_min(IN osm_subn_t *p_subn, IN char *p_key, + IN char *p_val_str, void *p_v1, void *p_v2, + void (*pfn)(osm_subn_t *, void *)) +{ + osm_cacongestion_entry_t *p_val1 = p_v1, *p_val2 = p_v2; + unsigned int val_offset = 0; + uint8_t sl = 0; + + if (parse_ca_cong_common(p_val_str, &sl, &val_offset) < 0) + return; + + opts_parse_uint8(p_subn, p_key, p_val_str + val_offset, + &p_val1[sl].ccti_min, + &p_val2[sl].ccti_min, + pfn); +} + +static const opt_rec_t opt_tbl[] = { + { "guid", OPT_OFFSET(guid), opts_parse_net64, NULL, 0 }, + { "m_key", OPT_OFFSET(m_key), opts_parse_net64, NULL, 1 }, + { "sm_key", OPT_OFFSET(sm_key), opts_parse_net64, NULL, 1 }, + { "sa_key", OPT_OFFSET(sa_key), opts_parse_net64, NULL, 1 }, + { "subnet_prefix", OPT_OFFSET(subnet_prefix), opts_parse_net64, NULL, 0 }, + { "m_key_lease_period", OPT_OFFSET(m_key_lease_period), opts_parse_net16, NULL, 1 }, + { "m_key_protection_level", OPT_OFFSET(m_key_protect_bits), opts_parse_uint8, NULL, 1 }, + { "m_key_lookup", OPT_OFFSET(m_key_lookup), opts_parse_boolean, NULL, 1 }, + { "sweep_interval", OPT_OFFSET(sweep_interval), opts_parse_uint32, NULL, 1 }, + { "max_wire_smps", OPT_OFFSET(max_wire_smps), opts_parse_uint32, NULL, 1 }, + { "max_wire_smps2", OPT_OFFSET(max_wire_smps2), opts_parse_uint32, NULL, 1 }, + { "max_smps_timeout", OPT_OFFSET(max_smps_timeout), opts_parse_uint32, NULL, 1 }, + { "console", OPT_OFFSET(console), opts_parse_charp, NULL, 0 }, + { "console_port", OPT_OFFSET(console_port), opts_parse_uint16, NULL, 0 }, + { "transaction_timeout", OPT_OFFSET(transaction_timeout), opts_parse_uint32, NULL, 0 }, + { "transaction_retries", OPT_OFFSET(transaction_retries), opts_parse_uint32, NULL, 0 }, + { "long_transaction_timeout", OPT_OFFSET(long_transaction_timeout), opts_parse_uint32, NULL, 0 }, + { "max_msg_fifo_timeout", OPT_OFFSET(max_msg_fifo_timeout), opts_parse_uint32, NULL, 1 }, + { "sm_priority", OPT_OFFSET(sm_priority), opts_parse_uint8, opts_setup_sm_priority, 1 }, + { "lmc", OPT_OFFSET(lmc), opts_parse_uint8, NULL, 0 }, + { "lmc_esp0", OPT_OFFSET(lmc_esp0), opts_parse_boolean, NULL, 0 }, + { "max_op_vls", OPT_OFFSET(max_op_vls), opts_parse_uint8, NULL, 1 }, + { "force_link_speed", OPT_OFFSET(force_link_speed), opts_parse_uint8, NULL, 1 }, + { "force_link_speed_ext", OPT_OFFSET(force_link_speed_ext), opts_parse_uint8, NULL, 1 }, + { "force_link_width", OPT_OFFSET(force_link_width), opts_parse_uint8, NULL, 1 }, + { "fdr10", OPT_OFFSET(fdr10), opts_parse_uint8, NULL, 1 }, + { "reassign_lids", OPT_OFFSET(reassign_lids), opts_parse_boolean, NULL, 1 }, + { "ignore_other_sm", OPT_OFFSET(ignore_other_sm), opts_parse_boolean, NULL, 1 }, + { "single_thread", OPT_OFFSET(single_thread), opts_parse_boolean, NULL, 0 }, + { "disable_multicast", OPT_OFFSET(disable_multicast), opts_parse_boolean, NULL, 1 }, + { "subnet_timeout", OPT_OFFSET(subnet_timeout), opts_parse_uint8, NULL, 1 }, + { "packet_life_time", OPT_OFFSET(packet_life_time), opts_parse_uint8, NULL, 1 }, + { "vl_stall_count", OPT_OFFSET(vl_stall_count), opts_parse_uint8, NULL, 1 }, + { "leaf_vl_stall_count", OPT_OFFSET(leaf_vl_stall_count), opts_parse_uint8, NULL, 1 }, + { "head_of_queue_lifetime", OPT_OFFSET(head_of_queue_lifetime), opts_parse_uint8, NULL, 1 }, + { "leaf_head_of_queue_lifetime", OPT_OFFSET(leaf_head_of_queue_lifetime), opts_parse_uint8, NULL, 1 }, + { "local_phy_errors_threshold", OPT_OFFSET(local_phy_errors_threshold), opts_parse_uint8, NULL, 1 }, + { "overrun_errors_threshold", OPT_OFFSET(overrun_errors_threshold), opts_parse_uint8, NULL, 1 }, + { "use_mfttop", OPT_OFFSET(use_mfttop), opts_parse_boolean, NULL, 1}, + { "sminfo_polling_timeout", OPT_OFFSET(sminfo_polling_timeout), opts_parse_uint32, opts_setup_sminfo_polling_timeout, 1 }, + { "polling_retry_number", OPT_OFFSET(polling_retry_number), opts_parse_uint32, NULL, 1 }, + { "force_heavy_sweep", OPT_OFFSET(force_heavy_sweep), opts_parse_boolean, NULL, 1 }, + { "port_prof_ignore_file", OPT_OFFSET(port_prof_ignore_file), opts_parse_charp, NULL, 0 }, + { "hop_weights_file", OPT_OFFSET(hop_weights_file), opts_parse_charp, NULL, 0 }, + { "dimn_ports_file", OPT_OFFSET(port_search_ordering_file), opts_parse_charp, NULL, 0 }, + { "port_search_ordering_file", OPT_OFFSET(port_search_ordering_file), opts_parse_charp, NULL, 0 }, + { "port_profile_switch_nodes", OPT_OFFSET(port_profile_switch_nodes), opts_parse_boolean, NULL, 1 }, + { "sweep_on_trap", OPT_OFFSET(sweep_on_trap), opts_parse_boolean, NULL, 1 }, + { "routing_engine", OPT_OFFSET(routing_engine_names), opts_parse_charp, NULL, 0 }, + { "avoid_throttled_links", OPT_OFFSET(avoid_throttled_links), opts_parse_boolean, NULL, 0 }, + { "connect_roots", OPT_OFFSET(connect_roots), opts_parse_boolean, NULL, 1 }, + { "use_ucast_cache", OPT_OFFSET(use_ucast_cache), opts_parse_boolean, NULL, 0 }, + { "log_file", OPT_OFFSET(log_file), opts_parse_charp, NULL, 0 }, + { "log_max_size", OPT_OFFSET(log_max_size), opts_parse_uint32, opts_setup_log_max_size, 1 }, + { "log_flags", OPT_OFFSET(log_flags), opts_parse_uint8, opts_setup_log_flags, 1 }, + { "force_log_flush", OPT_OFFSET(force_log_flush), opts_parse_boolean, opts_setup_force_log_flush, 1 }, + { "accum_log_file", OPT_OFFSET(accum_log_file), opts_parse_boolean, opts_setup_accum_log_file, 1 }, + { "partition_config_file", OPT_OFFSET(partition_config_file), opts_parse_charp, NULL, 0 }, + { "no_partition_enforcement", OPT_OFFSET(no_partition_enforcement), opts_parse_boolean, NULL, 1 }, + { "part_enforce", OPT_OFFSET(part_enforce), opts_parse_charp, NULL, 1 }, + { "allow_both_pkeys", OPT_OFFSET(allow_both_pkeys), opts_parse_boolean, NULL, 0 }, + { "keep_pkey_indexes", OPT_OFFSET(keep_pkey_indexes), opts_parse_boolean, NULL, 1 }, + { "sm_assigned_guid", OPT_OFFSET(sm_assigned_guid), opts_parse_uint8, NULL, 1 }, + { "qos", OPT_OFFSET(qos), opts_parse_boolean, NULL, 1 }, + { "qos_policy_file", OPT_OFFSET(qos_policy_file), opts_parse_charp, NULL, 0 }, + { "suppress_sl2vl_mad_status_errors", OPT_OFFSET(suppress_sl2vl_mad_status_errors), opts_parse_boolean, NULL, 1 }, + { "dump_files_dir", OPT_OFFSET(dump_files_dir), opts_parse_charp, NULL, 0 }, + { "lid_matrix_dump_file", OPT_OFFSET(lid_matrix_dump_file), opts_parse_charp, NULL, 0 }, + { "lfts_file", OPT_OFFSET(lfts_file), opts_parse_charp, NULL, 0 }, + { "root_guid_file", OPT_OFFSET(root_guid_file), opts_parse_charp, NULL, 0 }, + { "cn_guid_file", OPT_OFFSET(cn_guid_file), opts_parse_charp, NULL, 0 }, + { "io_guid_file", OPT_OFFSET(io_guid_file), opts_parse_charp, NULL, 0 }, + { "port_shifting", OPT_OFFSET(port_shifting), opts_parse_boolean, NULL, 1 }, + { "scatter_ports", OPT_OFFSET(scatter_ports), opts_parse_uint32, NULL, 1 }, + { "max_reverse_hops", OPT_OFFSET(max_reverse_hops), opts_parse_uint16, NULL, 0 }, + { "ids_guid_file", OPT_OFFSET(ids_guid_file), opts_parse_charp, NULL, 0 }, + { "guid_routing_order_file", OPT_OFFSET(guid_routing_order_file), opts_parse_charp, NULL, 0 }, + { "guid_routing_order_no_scatter", OPT_OFFSET(guid_routing_order_no_scatter), opts_parse_boolean, NULL, 0 }, + { "sa_db_file", OPT_OFFSET(sa_db_file), opts_parse_charp, NULL, 0 }, + { "sa_db_dump", OPT_OFFSET(sa_db_dump), opts_parse_boolean, NULL, 1 }, + { "torus_config", OPT_OFFSET(torus_conf_file), opts_parse_charp, NULL, 1 }, + { "do_mesh_analysis", OPT_OFFSET(do_mesh_analysis), opts_parse_boolean, NULL, 1 }, + { "exit_on_fatal", OPT_OFFSET(exit_on_fatal), opts_parse_boolean, NULL, 1 }, + { "honor_guid2lid_file", OPT_OFFSET(honor_guid2lid_file), opts_parse_boolean, NULL, 1 }, + { "daemon", OPT_OFFSET(daemon), opts_parse_boolean, NULL, 0 }, + { "sm_inactive", OPT_OFFSET(sm_inactive), opts_parse_boolean, NULL, 1 }, + { "babbling_port_policy", OPT_OFFSET(babbling_port_policy), opts_parse_boolean, NULL, 1 }, + { "drop_event_subscriptions", OPT_OFFSET(drop_event_subscriptions), opts_parse_boolean, NULL, 1 }, + { "ipoib_mcgroup_creation_validation", OPT_OFFSET(ipoib_mcgroup_creation_validation), opts_parse_boolean, NULL, 1 }, + { "mcgroup_join_validation", OPT_OFFSET(mcgroup_join_validation), opts_parse_boolean, NULL, 1 }, + { "use_original_extended_sa_rates_only", OPT_OFFSET(use_original_extended_sa_rates_only), opts_parse_boolean, NULL, 1 }, + { "use_optimized_slvl", OPT_OFFSET(use_optimized_slvl), opts_parse_boolean, NULL, 1 }, + { "fsync_high_avail_files", OPT_OFFSET(fsync_high_avail_files), opts_parse_boolean, NULL, 1 }, +#ifdef ENABLE_OSM_PERF_MGR + { "perfmgr", OPT_OFFSET(perfmgr), opts_parse_boolean, NULL, 0 }, + { "perfmgr_redir", OPT_OFFSET(perfmgr_redir), opts_parse_boolean, NULL, 0 }, + { "perfmgr_sweep_time_s", OPT_OFFSET(perfmgr_sweep_time_s), opts_parse_uint16, NULL, 0 }, + { "perfmgr_max_outstanding_queries", OPT_OFFSET(perfmgr_max_outstanding_queries), opts_parse_uint32, NULL, 0 }, + { "perfmgr_ignore_cas", OPT_OFFSET(perfmgr_ignore_cas), opts_parse_boolean, NULL, 0 }, + { "event_db_dump_file", OPT_OFFSET(event_db_dump_file), opts_parse_charp, NULL, 0 }, + { "perfmgr_rm_nodes", OPT_OFFSET(perfmgr_rm_nodes), opts_parse_boolean, NULL, 0 }, + { "perfmgr_log_errors", OPT_OFFSET(perfmgr_log_errors), opts_parse_boolean, NULL, 0 }, + { "perfmgr_query_cpi", OPT_OFFSET(perfmgr_query_cpi), opts_parse_boolean, NULL, 0 }, + { "perfmgr_xmit_wait_log", OPT_OFFSET(perfmgr_xmit_wait_log), opts_parse_boolean, NULL, 0 }, + { "perfmgr_xmit_wait_threshold", OPT_OFFSET(perfmgr_xmit_wait_threshold), opts_parse_uint32, NULL, 0 }, +#endif /* ENABLE_OSM_PERF_MGR */ + { "event_plugin_name", OPT_OFFSET(event_plugin_name), opts_parse_charp, NULL, 0 }, + { "event_plugin_options", OPT_OFFSET(event_plugin_options), opts_parse_charp, NULL, 0 }, + { "node_name_map_name", OPT_OFFSET(node_name_map_name), opts_parse_charp, NULL, 0 }, + { "qos_max_vls", OPT_OFFSET(qos_options.max_vls), opts_parse_uint32, NULL, 1 }, + { "qos_high_limit", OPT_OFFSET(qos_options.high_limit), opts_parse_int32, NULL, 1 }, + { "qos_vlarb_high", OPT_OFFSET(qos_options.vlarb_high), opts_parse_charp, NULL, 1 }, + { "qos_vlarb_low", OPT_OFFSET(qos_options.vlarb_low), opts_parse_charp, NULL, 1 }, + { "qos_sl2vl", OPT_OFFSET(qos_options.sl2vl), opts_parse_charp, NULL, 1 }, + { "qos_ca_max_vls", OPT_OFFSET(qos_ca_options.max_vls), opts_parse_uint32, NULL, 1 }, + { "qos_ca_high_limit", OPT_OFFSET(qos_ca_options.high_limit), opts_parse_int32, NULL, 1 }, + { "qos_ca_vlarb_high", OPT_OFFSET(qos_ca_options.vlarb_high), opts_parse_charp, NULL, 1 }, + { "qos_ca_vlarb_low", OPT_OFFSET(qos_ca_options.vlarb_low), opts_parse_charp, NULL, 1 }, + { "qos_ca_sl2vl", OPT_OFFSET(qos_ca_options.sl2vl), opts_parse_charp, NULL, 1 }, + { "qos_sw0_max_vls", OPT_OFFSET(qos_sw0_options.max_vls), opts_parse_uint32, NULL, 1 }, + { "qos_sw0_high_limit", OPT_OFFSET(qos_sw0_options.high_limit), opts_parse_int32, NULL, 1 }, + { "qos_sw0_vlarb_high", OPT_OFFSET(qos_sw0_options.vlarb_high), opts_parse_charp, NULL, 1 }, + { "qos_sw0_vlarb_low", OPT_OFFSET(qos_sw0_options.vlarb_low), opts_parse_charp, NULL, 1 }, + { "qos_sw0_sl2vl", OPT_OFFSET(qos_sw0_options.sl2vl), opts_parse_charp, NULL, 1 }, + { "qos_swe_max_vls", OPT_OFFSET(qos_swe_options.max_vls), opts_parse_uint32, NULL, 1 }, + { "qos_swe_high_limit", OPT_OFFSET(qos_swe_options.high_limit), opts_parse_int32, NULL, 1 }, + { "qos_swe_vlarb_high", OPT_OFFSET(qos_swe_options.vlarb_high), opts_parse_charp, NULL, 1 }, + { "qos_swe_vlarb_low", OPT_OFFSET(qos_swe_options.vlarb_low), opts_parse_charp, NULL, 1 }, + { "qos_swe_sl2vl", OPT_OFFSET(qos_swe_options.sl2vl), opts_parse_charp, NULL, 1 }, + { "qos_rtr_max_vls", OPT_OFFSET(qos_rtr_options.max_vls), opts_parse_uint32, NULL, 1 }, + { "qos_rtr_high_limit", OPT_OFFSET(qos_rtr_options.high_limit), opts_parse_int32, NULL, 1 }, + { "qos_rtr_vlarb_high", OPT_OFFSET(qos_rtr_options.vlarb_high), opts_parse_charp, NULL, 1 }, + { "qos_rtr_vlarb_low", OPT_OFFSET(qos_rtr_options.vlarb_low), opts_parse_charp, NULL, 1 }, + { "qos_rtr_sl2vl", OPT_OFFSET(qos_rtr_options.sl2vl), opts_parse_charp, NULL, 1 }, + { "congestion_control", OPT_OFFSET(congestion_control), opts_parse_boolean, NULL, 1 }, + { "cc_key", OPT_OFFSET(cc_key), opts_parse_net64, NULL, 0}, + { "cc_max_outstanding_mads", OPT_OFFSET(cc_max_outstanding_mads), opts_parse_uint32, NULL, 0 }, + { "cc_sw_cong_setting_control_map", OPT_OFFSET(cc_sw_cong_setting_control_map), opts_parse_net32, NULL, 1}, + { "cc_sw_cong_setting_victim_mask", OPT_OFFSET(cc_sw_cong_setting_victim_mask), opts_parse_256bit, NULL, 1}, + { "cc_sw_cong_setting_credit_mask", OPT_OFFSET(cc_sw_cong_setting_credit_mask), opts_parse_256bit, NULL, 1}, + { "cc_sw_cong_setting_threshold", OPT_OFFSET(cc_sw_cong_setting_threshold), opts_parse_uint8, NULL, 1}, + { "cc_sw_cong_setting_packet_size", OPT_OFFSET(cc_sw_cong_setting_packet_size), opts_parse_uint8, NULL, 1}, + { "cc_sw_cong_setting_credit_starvation_threshold", OPT_OFFSET(cc_sw_cong_setting_credit_starvation_threshold), opts_parse_uint8, NULL, 1}, + { "cc_sw_cong_setting_credit_starvation_return_delay", OPT_OFFSET(cc_sw_cong_setting_credit_starvation_return_delay), opts_parse_cct_entry, NULL, 1}, + { "cc_sw_cong_setting_marking_rate", OPT_OFFSET(cc_sw_cong_setting_marking_rate), opts_parse_net16, NULL, 1}, + { "cc_ca_cong_setting_port_control", OPT_OFFSET(cc_ca_cong_setting_port_control), opts_parse_net16, NULL, 1}, + { "cc_ca_cong_setting_control_map", OPT_OFFSET(cc_ca_cong_setting_control_map), opts_parse_net16, NULL, 1}, + { "cc_ca_cong_setting_ccti_timer", OPT_OFFSET(cc_ca_cong_entries), opts_parse_ccti_timer, NULL, 1}, + { "cc_ca_cong_setting_ccti_increase", OPT_OFFSET(cc_ca_cong_entries), opts_parse_ccti_increase, NULL, 1}, + { "cc_ca_cong_setting_trigger_threshold", OPT_OFFSET(cc_ca_cong_entries), opts_parse_trigger_threshold, NULL, 1}, + { "cc_ca_cong_setting_ccti_min", OPT_OFFSET(cc_ca_cong_entries), opts_parse_ccti_min, NULL, 1}, + { "cc_cct", OPT_OFFSET(cc_cct), opts_parse_cc_cct, NULL, 1}, + { "enable_quirks", OPT_OFFSET(enable_quirks), opts_parse_boolean, NULL, 1 }, + { "no_clients_rereg", OPT_OFFSET(no_clients_rereg), opts_parse_boolean, NULL, 1 }, + { "prefix_routes_file", OPT_OFFSET(prefix_routes_file), opts_parse_charp, NULL, 0 }, + { "consolidate_ipv6_snm_req", OPT_OFFSET(consolidate_ipv6_snm_req), opts_parse_boolean, NULL, 1 }, + { "lash_start_vl", OPT_OFFSET(lash_start_vl), opts_parse_uint8, NULL, 1 }, + { "sm_sl", OPT_OFFSET(sm_sl), opts_parse_uint8, NULL, 1 }, + { "nue_max_num_vls", OPT_OFFSET(nue_max_num_vls), opts_parse_uint8, NULL, 1 }, + { "nue_include_switches", OPT_OFFSET(nue_include_switches), opts_parse_boolean, NULL, 0 }, + { "log_prefix", OPT_OFFSET(log_prefix), opts_parse_charp, NULL, 1 }, + { "per_module_logging_file", OPT_OFFSET(per_module_logging_file), opts_parse_charp, NULL, 0 }, + { "quasi_ftree_indexing", OPT_OFFSET(quasi_ftree_indexing), opts_parse_boolean, NULL, 1 }, + {0} +}; + +static int compar_mgids(const void *m1, const void *m2) +{ + return memcmp(m1, m2, sizeof(ib_gid_t)); +} + +static void subn_validate_g2m(osm_subn_t *p_subn) +{ + cl_qlist_t guids; + osm_db_guid_elem_t *p_item; + uint64_t mkey; + boolean_t valid_entry; + + OSM_LOG_ENTER(&(p_subn->p_osm->log)); + cl_qlist_init(&guids); + + if (osm_db_guid2mkey_guids(p_subn->p_g2m, &guids)) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, "ERR 7506: " + "could not get mkey guid list\n"); + goto Exit; + } + + while ((p_item = (osm_db_guid_elem_t *) cl_qlist_remove_head(&guids)) + != (osm_db_guid_elem_t *) cl_qlist_end(&guids)) { + valid_entry = TRUE; + + if (p_item->guid == 0) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7507: found invalid zero guid"); + valid_entry = FALSE; + } else if (osm_db_guid2mkey_get(p_subn->p_g2m, p_item->guid, + &mkey)) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7508: could not get mkey for guid:0x%016" + PRIx64 "\n", p_item->guid); + valid_entry = FALSE; + } + + if (valid_entry == FALSE) { + if (osm_db_guid2mkey_delete(p_subn->p_g2m, + p_item->guid)) + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7509: failed to delete entry for " + "guid:0x%016" PRIx64 "\n", + p_item->guid); + } + free(p_item); + } + +Exit: + OSM_LOG_EXIT(&(p_subn->p_osm->log)); +} + +static void subn_validate_neighbor(osm_subn_t *p_subn) +{ + cl_qlist_t entries; + osm_db_neighbor_elem_t *p_item; + boolean_t valid_entry; + uint64_t guid; + uint8_t port; + + OSM_LOG_ENTER(&(p_subn->p_osm->log)); + cl_qlist_init(&entries); + + if (osm_db_neighbor_guids(p_subn->p_neighbor, &entries)) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, "ERR 7512: " + "could not get neighbor entry list\n"); + goto Exit; + } + + while ((p_item = + (osm_db_neighbor_elem_t *) cl_qlist_remove_head(&entries)) + != (osm_db_neighbor_elem_t *) cl_qlist_end(&entries)) { + valid_entry = TRUE; + + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_DEBUG, + "Validating neighbor for guid:0x%016" PRIx64 + ", port %d\n", + p_item->guid, p_item->portnum); + if (p_item->guid == 0) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7513: found invalid zero guid\n"); + valid_entry = FALSE; + } else if (p_item->portnum == 0) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7514: found invalid zero port for " + "guid: 0x%016" PRIx64 "\n", + p_item->guid); + valid_entry = FALSE; + } else if (osm_db_neighbor_get(p_subn->p_neighbor, + p_item->guid, p_item->portnum, + &guid, &port)) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7515: could not find neighbor for " + "guid: 0x%016" PRIx64 ", port %d\n", + p_item->guid, p_item->portnum); + valid_entry = FALSE; + } else if (guid == 0) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7516: found invalid neighbor " + "zero guid for guid: 0x%016" PRIx64 + ", port %d\n", + p_item->guid, p_item->portnum); + valid_entry = FALSE; + } else if (port == 0) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7517: found invalid neighbor " + "zero port for guid: 0x%016" PRIx64 + ", port %d\n", + p_item->guid, p_item->portnum); + valid_entry = FALSE; + } else if (osm_db_neighbor_get(p_subn->p_neighbor, + guid, port, &guid, &port) || + guid != p_item->guid || port != p_item->portnum) { + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7518: neighbor does not point " + "back at us (guid: 0x%016" PRIx64 + ", port %d)\n", + p_item->guid, p_item->portnum); + valid_entry = FALSE; + } + + if (valid_entry == FALSE) { + if (osm_db_neighbor_delete(p_subn->p_neighbor, + p_item->guid, + p_item->portnum)) + OSM_LOG(&(p_subn->p_osm->log), OSM_LOG_ERROR, + "ERR 7519: failed to delete entry for " + "guid:0x%016" PRIx64 " port:%u\n", + p_item->guid, p_item->portnum); + } + free(p_item); + } + +Exit: + OSM_LOG_EXIT(&(p_subn->p_osm->log)); +} + +void osm_subn_construct(IN osm_subn_t * p_subn) +{ + memset(p_subn, 0, sizeof(*p_subn)); + cl_ptr_vector_construct(&p_subn->port_lid_tbl); + cl_qmap_init(&p_subn->sw_guid_tbl); + cl_qmap_init(&p_subn->node_guid_tbl); + cl_qmap_init(&p_subn->port_guid_tbl); + cl_qmap_init(&p_subn->alias_port_guid_tbl); + cl_qmap_init(&p_subn->assigned_guids_tbl); + cl_qmap_init(&p_subn->sm_guid_tbl); + cl_qlist_init(&p_subn->sa_sr_list); + cl_qlist_init(&p_subn->sa_infr_list); + cl_qlist_init(&p_subn->alias_guid_list); + cl_qlist_init(&p_subn->prefix_routes_list); + cl_qmap_init(&p_subn->rtr_guid_tbl); + cl_qmap_init(&p_subn->prtn_pkey_tbl); + cl_fmap_init(&p_subn->mgrp_mgid_tbl, compar_mgids); +} + +static void subn_destroy_qos_options(osm_qos_options_t *opt) +{ + free(opt->vlarb_high); + free(opt->vlarb_low); + free(opt->sl2vl); +} + +static void subn_opt_destroy(IN osm_subn_opt_t * p_opt) +{ + free(p_opt->console); + free(p_opt->port_prof_ignore_file); + free(p_opt->hop_weights_file); + free(p_opt->port_search_ordering_file); + free(p_opt->routing_engine_names); + free(p_opt->log_file); + free(p_opt->partition_config_file); + free(p_opt->qos_policy_file); + free(p_opt->dump_files_dir); + free(p_opt->part_enforce); + free(p_opt->lid_matrix_dump_file); + free(p_opt->lfts_file); + free(p_opt->root_guid_file); + free(p_opt->cn_guid_file); + free(p_opt->io_guid_file); + free(p_opt->ids_guid_file); + free(p_opt->guid_routing_order_file); + free(p_opt->sa_db_file); + free(p_opt->torus_conf_file); +#ifdef ENABLE_OSM_PERF_MGR + free(p_opt->event_db_dump_file); +#endif /* ENABLE_OSM_PERF_MGR */ + free(p_opt->event_plugin_name); + free(p_opt->event_plugin_options); + free(p_opt->node_name_map_name); + free(p_opt->prefix_routes_file); + free(p_opt->log_prefix); + free(p_opt->per_module_logging_file); + subn_destroy_qos_options(&p_opt->qos_options); + subn_destroy_qos_options(&p_opt->qos_ca_options); + subn_destroy_qos_options(&p_opt->qos_sw0_options); + subn_destroy_qos_options(&p_opt->qos_swe_options); + subn_destroy_qos_options(&p_opt->qos_rtr_options); + free(p_opt->cc_cct.input_str); +} + +void osm_subn_destroy(IN osm_subn_t * p_subn) +{ + int i; + osm_node_t *p_node, *p_next_node; + osm_assigned_guids_t *p_assigned_guids, *p_next_assigned_guids; + osm_alias_guid_t *p_alias_guid, *p_next_alias_guid; + osm_port_t *p_port, *p_next_port; + osm_switch_t *p_sw, *p_next_sw; + osm_remote_sm_t *p_rsm, *p_next_rsm; + osm_prtn_t *p_prtn, *p_next_prtn; + osm_infr_t *p_infr, *p_next_infr; + osm_svcr_t *p_svcr, *p_next_svcr; + + /* it might be a good idea to de-allocate all known objects */ + p_next_node = (osm_node_t *) cl_qmap_head(&p_subn->node_guid_tbl); + while (p_next_node != + (osm_node_t *) cl_qmap_end(&p_subn->node_guid_tbl)) { + p_node = p_next_node; + p_next_node = (osm_node_t *) cl_qmap_next(&p_node->map_item); + osm_node_delete(&p_node); + } + + p_next_assigned_guids = (osm_assigned_guids_t *) cl_qmap_head(&p_subn->assigned_guids_tbl); + while (p_next_assigned_guids != + (osm_assigned_guids_t *) cl_qmap_end(&p_subn->assigned_guids_tbl)) { + p_assigned_guids = p_next_assigned_guids; + p_next_assigned_guids = (osm_assigned_guids_t *) cl_qmap_next(&p_assigned_guids->map_item); + osm_assigned_guids_delete(&p_assigned_guids); + } + + p_next_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&p_subn->alias_port_guid_tbl); + while (p_next_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&p_subn->alias_port_guid_tbl)) { + p_alias_guid = p_next_alias_guid; + p_next_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_alias_guid->map_item); + osm_alias_guid_delete(&p_alias_guid); + } + + while (cl_qlist_count(&p_subn->alias_guid_list)) + osm_guid_work_obj_delete((osm_guidinfo_work_obj_t *) cl_qlist_remove_head(&p_subn->alias_guid_list)); + + p_next_port = (osm_port_t *) cl_qmap_head(&p_subn->port_guid_tbl); + while (p_next_port != + (osm_port_t *) cl_qmap_end(&p_subn->port_guid_tbl)) { + p_port = p_next_port; + p_next_port = (osm_port_t *) cl_qmap_next(&p_port->map_item); + osm_port_delete(&p_port); + } + + p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); + while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + osm_switch_delete(&p_sw); + } + + p_next_rsm = (osm_remote_sm_t *) cl_qmap_head(&p_subn->sm_guid_tbl); + while (p_next_rsm != + (osm_remote_sm_t *) cl_qmap_end(&p_subn->sm_guid_tbl)) { + p_rsm = p_next_rsm; + p_next_rsm = (osm_remote_sm_t *) cl_qmap_next(&p_rsm->map_item); + free(p_rsm); + } + + p_next_prtn = (osm_prtn_t *) cl_qmap_head(&p_subn->prtn_pkey_tbl); + while (p_next_prtn != + (osm_prtn_t *) cl_qmap_end(&p_subn->prtn_pkey_tbl)) { + p_prtn = p_next_prtn; + p_next_prtn = (osm_prtn_t *) cl_qmap_next(&p_prtn->map_item); + osm_prtn_delete(p_subn, &p_prtn); + } + + cl_fmap_remove_all(&p_subn->mgrp_mgid_tbl); + + for (i = 0; i <= p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO; + i++) + if (p_subn->mboxes[i]) + osm_mgrp_box_delete(p_subn->mboxes[i]); + + p_next_infr = (osm_infr_t *) cl_qlist_head(&p_subn->sa_infr_list); + while (p_next_infr != + (osm_infr_t *) cl_qlist_end(&p_subn->sa_infr_list)) { + p_infr = p_next_infr; + p_next_infr = (osm_infr_t *) cl_qlist_next(&p_infr->list_item); + osm_infr_delete(p_infr); + } + + p_next_svcr = (osm_svcr_t *) cl_qlist_head(&p_subn->sa_sr_list); + while (p_next_svcr != + (osm_svcr_t *) cl_qlist_end(&p_subn->sa_sr_list)) { + p_svcr = p_next_svcr; + p_next_svcr = (osm_svcr_t *) cl_qlist_next(&p_svcr->list_item); + osm_svcr_delete(p_svcr); + } + + cl_ptr_vector_destroy(&p_subn->port_lid_tbl); + + osm_qos_policy_destroy(p_subn->p_qos_policy); + + while (!cl_is_qlist_empty(&p_subn->prefix_routes_list)) { + cl_list_item_t *item = cl_qlist_remove_head(&p_subn->prefix_routes_list); + free(item); + } + + subn_opt_destroy(&p_subn->opt); + free(p_subn->opt.file_opts); +} + +ib_api_status_t osm_subn_init(IN osm_subn_t * p_subn, IN osm_opensm_t * p_osm, + IN const osm_subn_opt_t * p_opt) +{ + cl_status_t status; + + p_subn->p_osm = p_osm; + + status = cl_ptr_vector_init(&p_subn->port_lid_tbl, + OSM_SUBNET_VECTOR_MIN_SIZE, + OSM_SUBNET_VECTOR_GROW_SIZE); + if (status != CL_SUCCESS) + return status; + + status = cl_ptr_vector_set_capacity(&p_subn->port_lid_tbl, + OSM_SUBNET_VECTOR_CAPACITY); + if (status != CL_SUCCESS) + return status; + + /* + LID zero is not valid. NULL out this entry for the + convenience of other code. + */ + cl_ptr_vector_set(&p_subn->port_lid_tbl, 0, NULL); + + p_subn->opt = *p_opt; + p_subn->max_ucast_lid_ho = IB_LID_UCAST_END_HO; + p_subn->max_mcast_lid_ho = IB_LID_MCAST_END_HO; + p_subn->min_ca_mtu = IB_MAX_MTU; + p_subn->min_ca_rate = IB_PATH_RECORD_RATE_300_GBS; + p_subn->min_data_vls = IB_MAX_NUM_VLS - 1; + p_subn->min_sw_data_vls = IB_MAX_NUM_VLS - 1; + p_subn->ignore_existing_lfts = TRUE; + + /* we assume master by default - so we only need to set it true if STANDBY */ + p_subn->coming_out_of_standby = FALSE; + p_subn->sweeping_enabled = TRUE; + p_subn->last_sm_port_state = 1; + + /* Initialize the guid2mkey database */ + p_subn->p_g2m = osm_db_domain_init(&(p_osm->db), "guid2mkey"); + if (!p_subn->p_g2m) { + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, "ERR 7510: " + "Error initializing Guid-to-MKey persistent database\n"); + return IB_ERROR; + } + + if (osm_db_restore(p_subn->p_g2m)) { +#ifndef __WIN__ + /* + * When Windows is BSODing, it might corrupt files that + * were previously opened for writing, even if the files + * are closed, so we might see corrupted guid2mkey file. + */ + if (p_subn->opt.exit_on_fatal) { + osm_log(&(p_osm->log), OSM_LOG_SYS, + "FATAL: Error restoring Guid-to-Mkey " + "persistent database\n"); + return IB_ERROR; + } else +#endif + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, + "ERR 7511: Error restoring Guid-to-Mkey " + "persistent database\n"); + } + + subn_validate_g2m(p_subn); + + /* Initialize the neighbor database */ + p_subn->p_neighbor = osm_db_domain_init(&(p_osm->db), "neighbors"); + if (!p_subn->p_neighbor) { + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, "ERR 7520: Error " + "initializing neighbor link persistent database\n"); + return IB_ERROR; + } + + if (osm_db_restore(p_subn->p_neighbor)) { +#ifndef __WIN__ + /* + * When Windows is BSODing, it might corrupt files that + * were previously opened for writing, even if the files + * are closed, so we might see corrupted neighbors file. + */ + if (p_subn->opt.exit_on_fatal) { + osm_log(&(p_osm->log), OSM_LOG_SYS, + "FATAL: Error restoring neighbor link " + "persistent database\n"); + return IB_ERROR; + } else +#endif + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, + "ERR 7521: Error restoring neighbor link " + "persistent database\n"); + } + + subn_validate_neighbor(p_subn); + + return IB_SUCCESS; +} + +osm_port_t *osm_get_port_by_mad_addr(IN osm_log_t * p_log, + IN const osm_subn_t * p_subn, + IN osm_mad_addr_t * p_mad_addr) +{ + osm_port_t *port = osm_get_port_by_lid(p_subn, p_mad_addr->dest_lid); + if (!port) + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7504: " + "Lid is out of range: %u\n", + cl_ntoh16(p_mad_addr->dest_lid)); + + return port; +} + +ib_api_status_t osm_get_gid_by_mad_addr(IN osm_log_t * p_log, + IN const osm_subn_t * p_subn, + IN osm_mad_addr_t * p_mad_addr, + OUT ib_gid_t * p_gid) +{ + const osm_port_t *p_port; + + if (p_gid == NULL) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7505: " + "Provided output GID is NULL\n"); + return IB_INVALID_PARAMETER; + } + + p_port = osm_get_port_by_mad_addr(p_log, p_subn, p_mad_addr); + if (!p_port) + return IB_INVALID_PARAMETER; + + p_gid->unicast.interface_id = p_port->p_physp->port_guid; + p_gid->unicast.prefix = p_subn->opt.subnet_prefix; + + return IB_SUCCESS; +} + +osm_physp_t *osm_get_physp_by_mad_addr(IN osm_log_t * p_log, + IN const osm_subn_t * p_subn, + IN osm_mad_addr_t * p_mad_addr) +{ + osm_port_t *p_port; + + p_port = osm_get_port_by_mad_addr(p_log, p_subn, p_mad_addr); + if (!p_port) + return NULL; + + return p_port->p_physp; +} + +osm_switch_t *osm_get_switch_by_guid(IN const osm_subn_t * p_subn, + IN ib_net64_t guid) +{ + osm_switch_t *p_switch; + + p_switch = (osm_switch_t *) cl_qmap_get(&(p_subn->sw_guid_tbl), guid); + if (p_switch == (osm_switch_t *) cl_qmap_end(&(p_subn->sw_guid_tbl))) + p_switch = NULL; + return p_switch; +} + +osm_node_t *osm_get_node_by_guid(IN osm_subn_t const *p_subn, IN ib_net64_t guid) +{ + osm_node_t *p_node; + + p_node = (osm_node_t *) cl_qmap_get(&(p_subn->node_guid_tbl), guid); + if (p_node == (osm_node_t *) cl_qmap_end(&(p_subn->node_guid_tbl))) + p_node = NULL; + return p_node; +} + +osm_port_t *osm_get_port_by_guid(IN osm_subn_t const *p_subn, IN ib_net64_t guid) +{ + osm_port_t *p_port; + + p_port = (osm_port_t *) cl_qmap_get(&(p_subn->port_guid_tbl), guid); + if (p_port == (osm_port_t *) cl_qmap_end(&(p_subn->port_guid_tbl))) + p_port = NULL; + return p_port; +} + +osm_alias_guid_t *osm_get_alias_guid_by_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t guid) +{ + osm_alias_guid_t *p_alias_guid; + + p_alias_guid = (osm_alias_guid_t *) cl_qmap_get(&(p_subn->alias_port_guid_tbl), guid); + if (p_alias_guid == (osm_alias_guid_t *) cl_qmap_end(&(p_subn->alias_port_guid_tbl))) + return NULL; + return p_alias_guid; +} + +osm_port_t *osm_get_port_by_alias_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t guid) +{ + osm_alias_guid_t *p_alias_guid; + + p_alias_guid = osm_get_alias_guid_by_guid(p_subn, guid); + if (!p_alias_guid) + return NULL; + return p_alias_guid->p_base_port; +} + +osm_assigned_guids_t *osm_assigned_guids_new(IN const ib_net64_t port_guid, + IN const uint32_t num_guids) +{ + osm_assigned_guids_t *p_assigned_guids; + + p_assigned_guids = calloc(1, sizeof(*p_assigned_guids) + + sizeof(ib_net64_t) * (num_guids - 1)); + if (p_assigned_guids) + p_assigned_guids->port_guid = port_guid; + return p_assigned_guids; +} + +void osm_assigned_guids_delete(IN OUT osm_assigned_guids_t ** pp_assigned_guids) +{ + free(*pp_assigned_guids); + *pp_assigned_guids = NULL; +} + +osm_assigned_guids_t *osm_get_assigned_guids_by_guid(IN osm_subn_t const *p_subn, + IN ib_net64_t port_guid) +{ + osm_assigned_guids_t *p_assigned_guids; + + p_assigned_guids = (osm_assigned_guids_t *) cl_qmap_get(&(p_subn->assigned_guids_tbl), port_guid); + if (p_assigned_guids == (osm_assigned_guids_t *) cl_qmap_end(&(p_subn->assigned_guids_tbl))) + return NULL; + return p_assigned_guids; +} + +osm_port_t *osm_get_port_by_lid_ho(IN osm_subn_t const * subn, IN uint16_t lid) +{ + if (lid < cl_ptr_vector_get_size(&subn->port_lid_tbl)) + return cl_ptr_vector_get(&subn->port_lid_tbl, lid); + return NULL; +} + +osm_mgrp_t *osm_get_mgrp_by_mgid(IN osm_subn_t * subn, IN ib_gid_t * mgid) +{ + osm_mgrp_t *mgrp; + + mgrp = (osm_mgrp_t *)cl_fmap_get(&subn->mgrp_mgid_tbl, mgid); + if (mgrp != (osm_mgrp_t *)cl_fmap_end(&subn->mgrp_mgid_tbl)) + return mgrp; + return NULL; +} + +int is_mlnx_ext_port_info_supported(ib_net32_t vendid, ib_net16_t devid) +{ + uint32_t vendid_ho; + uint16_t devid_ho; + + devid_ho = cl_ntoh16(devid); + if ((devid_ho >= 0xc738 && devid_ho <= 0xc73b) || + devid_ho == 0xc839 || devid_ho == 0xcb20 || + devid_ho == 0xcf08 || devid_ho == 0xcf09 || devid_ho == 0xd2f0) + return 1; + if (devid_ho >= 0x1003 && devid_ho <= 0x101b) + return 1; + if (devid_ho == 0xa2d2) + return 1; + + vendid_ho = cl_ntoh32(vendid); + if (vendid_ho == 0x119f) { + /* Bull Switch-X */ + if (devid_ho == 0x1b02 || devid_ho == 0x1b50) + return 1; + /* Bull Switch-IB/IB2 */ + if (devid_ho == 0x1ba0 || + (devid_ho >= 0x1bd0 && devid_ho <= 0x1bd5)) + return 1; + /* Bull Quantum */ + if (devid_ho == 0x1bf0) + return 1; + /* Bull Connect-X3 */ + if (devid_ho == 0x1b33 || devid_ho == 0x1b73 || + devid_ho == 0x1b40 || devid_ho == 0x1b41 || + devid_ho == 0x1b60 || devid_ho == 0x1b61) + return 1; + /* Bull Connect-IB */ + if (devid_ho == 0x1b83 || + devid_ho == 0x1b93 || devid_ho == 0x1b94) + return 1; + /* Bull Connect-X4, Sequana HDR and HDR100 */ + if (devid_ho == 0x1bb4 || devid_ho == 0x1bb5 || + (devid_ho >= 0x1bc4 && devid_ho <= 0x1bc6)) + return 1; + } + return 0; +} + +static void subn_init_qos_options(osm_qos_options_t *opt, osm_qos_options_t *f) +{ + opt->max_vls = 0; + opt->high_limit = -1; + if (opt->vlarb_high) + free(opt->vlarb_high); + opt->vlarb_high = NULL; + if (opt->vlarb_low) + free(opt->vlarb_low); + opt->vlarb_low = NULL; + if (opt->sl2vl) + free(opt->sl2vl); + opt->sl2vl = NULL; + if (f) + memcpy(f, opt, sizeof(*f)); +} + +void osm_subn_set_default_opt(IN osm_subn_opt_t * p_opt) +{ + memset(p_opt, 0, sizeof(osm_subn_opt_t)); + p_opt->guid = 0; + p_opt->m_key = OSM_DEFAULT_M_KEY; + p_opt->sm_key = OSM_DEFAULT_SM_KEY; + p_opt->sa_key = OSM_DEFAULT_SA_KEY; + p_opt->subnet_prefix = IB_DEFAULT_SUBNET_PREFIX; + p_opt->m_key_lease_period = 0; + p_opt->m_key_protect_bits = 0; + p_opt->m_key_lookup = TRUE; + p_opt->sweep_interval = OSM_DEFAULT_SWEEP_INTERVAL_SECS; + p_opt->max_wire_smps = OSM_DEFAULT_SMP_MAX_ON_WIRE; + p_opt->max_wire_smps2 = p_opt->max_wire_smps; + p_opt->console = strdup(OSM_DEFAULT_CONSOLE); + p_opt->console_port = OSM_DEFAULT_CONSOLE_PORT; + p_opt->transaction_timeout = OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; + p_opt->transaction_retries = OSM_DEFAULT_RETRY_COUNT; + p_opt->long_transaction_timeout = OSM_DEFAULT_LONG_TRANS_TIMEOUT_MILLISEC; + p_opt->max_smps_timeout = 1000 * p_opt->transaction_timeout * + p_opt->transaction_retries; + /* by default we will consider waiting for 50x transaction timeout normal */ + p_opt->max_msg_fifo_timeout = 50 * OSM_DEFAULT_TRANS_TIMEOUT_MILLISEC; + p_opt->sm_priority = OSM_DEFAULT_SM_PRIORITY; + p_opt->lmc = OSM_DEFAULT_LMC; + p_opt->lmc_esp0 = FALSE; + p_opt->max_op_vls = OSM_DEFAULT_MAX_OP_VLS; + p_opt->force_link_speed = IB_LINK_SPEED_SET_LSS; + p_opt->force_link_speed_ext = IB_LINK_SPEED_EXT_SET_LSES; + p_opt->force_link_width = IB_LINK_WIDTH_SET_LWS; + p_opt->fdr10 = 1; + p_opt->reassign_lids = FALSE; + p_opt->ignore_other_sm = FALSE; + p_opt->single_thread = FALSE; + p_opt->disable_multicast = FALSE; + p_opt->force_log_flush = FALSE; + p_opt->subnet_timeout = OSM_DEFAULT_SUBNET_TIMEOUT; + p_opt->packet_life_time = OSM_DEFAULT_SWITCH_PACKET_LIFE; + p_opt->vl_stall_count = OSM_DEFAULT_VL_STALL_COUNT; + p_opt->leaf_vl_stall_count = OSM_DEFAULT_LEAF_VL_STALL_COUNT; + p_opt->head_of_queue_lifetime = OSM_DEFAULT_HEAD_OF_QUEUE_LIFE; + p_opt->leaf_head_of_queue_lifetime = + OSM_DEFAULT_LEAF_HEAD_OF_QUEUE_LIFE; + p_opt->local_phy_errors_threshold = OSM_DEFAULT_ERROR_THRESHOLD; + p_opt->overrun_errors_threshold = OSM_DEFAULT_ERROR_THRESHOLD; + p_opt->use_mfttop = TRUE; + p_opt->sminfo_polling_timeout = + OSM_SM_DEFAULT_POLLING_TIMEOUT_MILLISECS; + p_opt->polling_retry_number = OSM_SM_DEFAULT_POLLING_RETRY_NUMBER; + p_opt->force_heavy_sweep = FALSE; + p_opt->log_flags = OSM_LOG_DEFAULT_LEVEL; + p_opt->honor_guid2lid_file = FALSE; + p_opt->daemon = FALSE; + p_opt->sm_inactive = FALSE; + p_opt->babbling_port_policy = FALSE; + p_opt->drop_event_subscriptions = FALSE; + p_opt->ipoib_mcgroup_creation_validation = TRUE; + p_opt->mcgroup_join_validation = TRUE; + p_opt->use_original_extended_sa_rates_only = FALSE; + p_opt->use_optimized_slvl = FALSE; + p_opt->fsync_high_avail_files = TRUE; +#ifdef ENABLE_OSM_PERF_MGR + p_opt->perfmgr = FALSE; + p_opt->perfmgr_redir = TRUE; + p_opt->perfmgr_sweep_time_s = OSM_PERFMGR_DEFAULT_SWEEP_TIME_S; + p_opt->perfmgr_max_outstanding_queries = + OSM_PERFMGR_DEFAULT_MAX_OUTSTANDING_QUERIES; + p_opt->perfmgr_ignore_cas = FALSE; + p_opt->event_db_dump_file = NULL; /* use default */ + p_opt->perfmgr_rm_nodes = TRUE; + p_opt->perfmgr_log_errors = TRUE; + p_opt->perfmgr_query_cpi = TRUE; + p_opt->perfmgr_xmit_wait_log = FALSE; + p_opt->perfmgr_xmit_wait_threshold = OSM_PERFMGR_DEFAULT_XMIT_WAIT_THRESHOLD; +#endif /* ENABLE_OSM_PERF_MGR */ + + p_opt->event_plugin_name = NULL; + p_opt->event_plugin_options = NULL; + p_opt->node_name_map_name = NULL; + + p_opt->dump_files_dir = getenv("OSM_TMP_DIR"); + if (!p_opt->dump_files_dir || !(*p_opt->dump_files_dir)) + p_opt->dump_files_dir = strdup(OSM_DEFAULT_TMP_DIR); + else + p_opt->dump_files_dir = strdup(p_opt->dump_files_dir); + p_opt->log_file = strdup(OSM_DEFAULT_LOG_FILE); + p_opt->log_max_size = 0; + p_opt->partition_config_file = strdup(OSM_DEFAULT_PARTITION_CONFIG_FILE); + p_opt->no_partition_enforcement = FALSE; + p_opt->part_enforce = strdup(OSM_PARTITION_ENFORCE_BOTH); + p_opt->allow_both_pkeys = FALSE; + p_opt->keep_pkey_indexes = TRUE; + p_opt->sm_assigned_guid = 0; + p_opt->qos = FALSE; + p_opt->qos_policy_file = strdup(OSM_DEFAULT_QOS_POLICY_FILE); + p_opt->suppress_sl2vl_mad_status_errors = FALSE; + p_opt->accum_log_file = TRUE; + p_opt->port_prof_ignore_file = NULL; + p_opt->hop_weights_file = NULL; + p_opt->port_search_ordering_file = NULL; + p_opt->port_profile_switch_nodes = FALSE; + p_opt->sweep_on_trap = TRUE; + p_opt->use_ucast_cache = FALSE; + p_opt->routing_engine_names = NULL; + p_opt->avoid_throttled_links = FALSE; + p_opt->connect_roots = FALSE; + p_opt->lid_matrix_dump_file = NULL; + p_opt->lfts_file = NULL; + p_opt->root_guid_file = NULL; + p_opt->cn_guid_file = NULL; + p_opt->io_guid_file = NULL; + p_opt->port_shifting = FALSE; + p_opt->scatter_ports = OSM_DEFAULT_SCATTER_PORTS; + p_opt->max_reverse_hops = 0; + p_opt->ids_guid_file = NULL; + p_opt->guid_routing_order_file = NULL; + p_opt->guid_routing_order_no_scatter = FALSE; + p_opt->sa_db_file = NULL; + p_opt->sa_db_dump = FALSE; + p_opt->torus_conf_file = strdup(OSM_DEFAULT_TORUS_CONF_FILE); + p_opt->do_mesh_analysis = FALSE; + p_opt->exit_on_fatal = TRUE; + p_opt->congestion_control = FALSE; + p_opt->cc_key = OSM_DEFAULT_CC_KEY; + p_opt->cc_max_outstanding_mads = OSM_CC_DEFAULT_MAX_OUTSTANDING_QUERIES; + p_opt->enable_quirks = FALSE; + p_opt->no_clients_rereg = FALSE; + p_opt->prefix_routes_file = strdup(OSM_DEFAULT_PREFIX_ROUTES_FILE); + p_opt->consolidate_ipv6_snm_req = FALSE; + p_opt->lash_start_vl = 0; + p_opt->sm_sl = OSM_DEFAULT_SL; + p_opt->nue_max_num_vls = 1; + p_opt->nue_include_switches = FALSE; + p_opt->log_prefix = NULL; + p_opt->per_module_logging_file = strdup(OSM_DEFAULT_PER_MOD_LOGGING_CONF_FILE); + subn_init_qos_options(&p_opt->qos_options, NULL); + subn_init_qos_options(&p_opt->qos_ca_options, NULL); + subn_init_qos_options(&p_opt->qos_sw0_options, NULL); + subn_init_qos_options(&p_opt->qos_swe_options, NULL); + subn_init_qos_options(&p_opt->qos_rtr_options, NULL); + p_opt->cc_cct.entries_len = 0; + p_opt->cc_cct.input_str = NULL; + p_opt->quasi_ftree_indexing = FALSE; +} + +static char *clean_val(char *val) +{ + char *p = val; + /* clean leading spaces */ + while (isspace(*p)) + p++; + val = p; + if (!*val) + return val; + /* clean trailing spaces */ + p = val + strlen(val) - 1; + while (p > val && isspace(*p)) + p--; + p[1] = '\0'; + /* clean quotas */ + if ((*val == '\"' && *p == '\"') || (*val == '\'' && *p == '\'')) { + val++; + *p-- = '\0'; + } + return val; +} + +static int subn_dump_qos_options(FILE * file, const char *set_name, + const char *prefix, osm_qos_options_t * opt) +{ + return fprintf(file, "# %s\n" + "%s_max_vls %u\n" + "%s_high_limit %d\n" + "%s_vlarb_high %s\n" + "%s_vlarb_low %s\n" + "%s_sl2vl %s\n", + set_name, + prefix, opt->max_vls, + prefix, opt->high_limit, + prefix, opt->vlarb_high, + prefix, opt->vlarb_low, prefix, opt->sl2vl); +} + +static ib_api_status_t append_prefix_route(IN osm_subn_t * p_subn, + uint64_t prefix, uint64_t guid) +{ + osm_prefix_route_t *route; + + route = malloc(sizeof *route); + if (! route) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "out of memory"); + return IB_ERROR; + } + + route->prefix = cl_hton64(prefix); + route->guid = cl_hton64(guid); + cl_qlist_insert_tail(&p_subn->prefix_routes_list, &route->list_item); + return IB_SUCCESS; +} + +static ib_api_status_t parse_prefix_routes_file(IN osm_subn_t * p_subn) +{ + osm_log_t *log = &p_subn->p_osm->log; + FILE *fp; + char buf[1024]; + int line = 0; + int errors = 0; + + while (!cl_is_qlist_empty(&p_subn->prefix_routes_list)) { + cl_list_item_t *item = cl_qlist_remove_head(&p_subn->prefix_routes_list); + free(item); + } + + fp = fopen(p_subn->opt.prefix_routes_file, "r"); + if (! fp) { + if (errno == ENOENT) + return IB_SUCCESS; + + OSM_LOG(log, OSM_LOG_ERROR, "fopen(%s) failed: %s", + p_subn->opt.prefix_routes_file, strerror(errno)); + return IB_ERROR; + } + + while (fgets(buf, sizeof buf, fp) != NULL) { + char *p_prefix, *p_guid, *p_extra, *p_last, *p_end; + uint64_t prefix, guid; + + line++; + if (errors > 10) + break; + + p_prefix = strtok_r(buf, " \t\n", &p_last); + if (! p_prefix) + continue; /* ignore blank lines */ + + if (*p_prefix == '#') + continue; /* ignore comment lines */ + + p_guid = strtok_r(NULL, " \t\n", &p_last); + if (! p_guid) { + OSM_LOG(log, OSM_LOG_ERROR, "%s:%d: missing GUID\n", + p_subn->opt.prefix_routes_file, line); + errors++; + continue; + } + + p_extra = strtok_r(NULL, " \t\n", &p_last); + if (p_extra && *p_extra != '#') { + OSM_LOG(log, OSM_LOG_INFO, "%s:%d: extra tokens ignored\n", + p_subn->opt.prefix_routes_file, line); + } + + if (strcmp(p_prefix, "*") == 0) + prefix = 0; + else { + prefix = strtoull(p_prefix, &p_end, 16); + if (*p_end != '\0') { + OSM_LOG(log, OSM_LOG_ERROR, "%s:%d: illegal prefix: %s\n", + p_subn->opt.prefix_routes_file, line, p_prefix); + errors++; + continue; + } + } + + if (strcmp(p_guid, "*") == 0) + guid = 0; + else { + guid = strtoull(p_guid, &p_end, 16); + if (*p_end != '\0' && *p_end != '#') { + OSM_LOG(log, OSM_LOG_ERROR, "%s:%d: illegal GUID: %s\n", + p_subn->opt.prefix_routes_file, line, p_guid); + errors++; + continue; + } + } + + if (append_prefix_route(p_subn, prefix, guid) != IB_SUCCESS) { + errors++; + break; + } + } + + fclose(fp); + return (errors == 0) ? IB_SUCCESS : IB_ERROR; +} + +static ib_api_status_t insert_per_module_debug(IN osm_subn_t * p_subn, + char *mod_name, + osm_log_level_t level) +{ + uint8_t index; + + if (find_module_name(mod_name, &index)) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, + "Module name %s not found\n", mod_name); + return IB_ERROR; + } + osm_set_log_per_module(&p_subn->p_osm->log, index, level); + return IB_SUCCESS; +} + +static ib_api_status_t parse_per_mod_logging_file(IN osm_subn_t * p_subn) +{ + osm_log_t *log = &p_subn->p_osm->log; + FILE *fp; + char buf[1024]; + int line = 0; + int errors = 0; + + osm_reset_log_per_module(log); + + if (p_subn->opt.per_module_logging_file == NULL) + return IB_SUCCESS; + + fp = fopen(p_subn->opt.per_module_logging_file, "r"); + if (!fp) { + if (errno == ENOENT) + return IB_SUCCESS; + + OSM_LOG(log, OSM_LOG_ERROR, "fopen(%s) failed: %s", + p_subn->opt.per_module_logging_file, strerror(errno)); + return IB_ERROR; + } + + while (fgets(buf, sizeof buf, fp) != NULL) { + char *p_mod_name, *p_level, *p_extra, *p_last; + osm_log_level_t level; + + line++; + if (errors > 10) + break; + + p_mod_name = strtok_r(buf, " =,\t\n", &p_last); + if (!p_mod_name) + continue; /* ignore blank lines */ + + if (*p_mod_name == '#') + continue; /* ignore comment lines */ + + p_level = strtok_r(NULL, " \t\n", &p_last); + if (!p_level) { + OSM_LOG(log, OSM_LOG_ERROR, "%s:%d: missing log level\n", + p_subn->opt.per_module_logging_file, line); + errors++; + continue; + } + p_extra = strtok_r(NULL, " \t\n", &p_last); + if (p_extra && *p_extra != '#') { + OSM_LOG(log, OSM_LOG_INFO, "%s:%d: extra tokens ignored\n", + p_subn->opt.per_module_logging_file, line); + } + + level = strtoul(p_level, NULL, 0); + if (insert_per_module_debug(p_subn, p_mod_name, level) != IB_SUCCESS) { + errors++; + break; + } + } + + fclose(fp); + return (errors == 0) ? IB_SUCCESS : IB_ERROR; +} + +static void subn_verify_max_vls(unsigned *max_vls, const char *prefix) +{ + if (!*max_vls || *max_vls > 15) { + if (*max_vls) + log_report(" Invalid Cached Option: %s_max_vls=%u: " + "Using Default = %u\n", + prefix, *max_vls, OSM_DEFAULT_QOS_MAX_VLS); + *max_vls = 0; + } +} + +static void subn_verify_high_limit(int *high_limit, const char *prefix) +{ + if (*high_limit < 0 || *high_limit > 255) { + if (*high_limit > 255) + log_report(" Invalid Cached Option: %s_high_limit=%d: " + "Using Default: %d\n", + prefix, *high_limit, + OSM_DEFAULT_QOS_HIGH_LIMIT); + *high_limit = -1; + } +} + +static void subn_verify_vlarb(char **vlarb, const char *prefix, + const char *suffix) +{ + char *str, *tok, *end, *ptr; + int count = 0; + + if (*vlarb == NULL) + return; + + str = strdup(*vlarb); + + tok = strtok_r(str, ",\n", &ptr); + while (tok) { + char *vl_str, *weight_str; + + vl_str = tok; + weight_str = strchr(tok, ':'); + + if (weight_str) { + long vl, weight; + + *weight_str = '\0'; + weight_str++; + + vl = strtol(vl_str, &end, 0); + + if (*end) + log_report(" Warning: Cached Option " + "%s_vlarb_%s:vl=%s" + " improperly formatted\n", + prefix, suffix, vl_str); + else if (vl < 0 || vl > 14) + log_report(" Warning: Cached Option " + "%s_vlarb_%s:vl=%ld out of range\n", + prefix, suffix, vl); + + weight = strtol(weight_str, &end, 0); + + if (*end) + log_report(" Warning: Cached Option " + "%s_vlarb_%s:weight=%s " + "improperly formatted\n", + prefix, suffix, weight_str); + else if (weight < 0 || weight > 255) + log_report(" Warning: Cached Option " + "%s_vlarb_%s:weight=%ld " + "out of range\n", + prefix, suffix, weight); + } else + log_report(" Warning: Cached Option " + "%s_vlarb_%s:vl:weight=%s " + "improperly formatted\n", + prefix, suffix, tok); + + count++; + tok = strtok_r(NULL, ",\n", &ptr); + } + + if (count > 64) + log_report(" Warning: Cached Option %s_vlarb_%s: > 64 listed:" + " excess vl:weight pairs will be dropped\n", + prefix, suffix); + + free(str); +} + +static void subn_verify_sl2vl(char **sl2vl, const char *prefix) +{ + char *str, *tok, *end, *ptr; + int count = 0; + + if (*sl2vl == NULL) + return; + + str = strdup(*sl2vl); + + tok = strtok_r(str, ",\n", &ptr); + while (tok) { + long vl = strtol(tok, &end, 0); + + if (*end) + log_report(" Warning: Cached Option %s_sl2vl:vl=%s " + "improperly formatted\n", prefix, tok); + else if (vl < 0 || vl > 15) + log_report(" Warning: Cached Option %s_sl2vl:vl=%ld " + "out of range\n", prefix, vl); + + count++; + tok = strtok_r(NULL, ",\n", &ptr); + } + + if (count < 16) + log_report(" Warning: Cached Option %s_sl2vl: < 16 VLs " + "listed\n", prefix); + else if (count > 16) + log_report(" Warning: Cached Option %s_sl2vl: > 16 listed: " + "excess VLs will be dropped\n", prefix); + + free(str); +} + +static void subn_verify_qos_set(osm_qos_options_t *set, const char *prefix) +{ + subn_verify_max_vls(&set->max_vls, prefix); + subn_verify_high_limit(&set->high_limit, prefix); + subn_verify_vlarb(&set->vlarb_low, prefix, "low"); + subn_verify_vlarb(&set->vlarb_high, prefix, "high"); + subn_verify_sl2vl(&set->sl2vl, prefix); +} + +int osm_subn_verify_config(IN osm_subn_opt_t * p_opts) +{ + if (p_opts->lmc > 7) { + log_report(" Invalid Cached Option Value:lmc = %u:" + "Using Default:%u\n", p_opts->lmc, OSM_DEFAULT_LMC); + p_opts->lmc = OSM_DEFAULT_LMC; + } + + if (15 < p_opts->sm_priority) { + log_report(" Invalid Cached Option Value:sm_priority = %u:" + "Using Default:%u\n", + p_opts->sm_priority, OSM_DEFAULT_SM_PRIORITY); + p_opts->sm_priority = OSM_DEFAULT_SM_PRIORITY; + } + + if ((IB_LINK_SPEED_SET_LSS < p_opts->force_link_speed) || + (p_opts->force_link_speed > IB_LINK_SPEED_2_5_5_OR_10 && + p_opts->force_link_speed < IB_LINK_SPEED_SET_LSS)) { + log_report(" Invalid Cached Option Value:force_link_speed = %u:" + "Using Default:%u\n", p_opts->force_link_speed, + IB_LINK_SPEED_SET_LSS); + p_opts->force_link_speed = IB_LINK_SPEED_SET_LSS; + } + + if ((IB_LINK_SPEED_EXT_SET_LSES < p_opts->force_link_speed_ext) || + (p_opts->force_link_speed_ext > IB_LINK_SPEED_EXT_14_25_OR_50 && + p_opts->force_link_speed_ext < IB_LINK_SPEED_EXT_DISABLE)) { + log_report(" Invalid Cached Option Value:force_link_speed_ext = %u:" + "Using Default:%u\n", p_opts->force_link_speed_ext, + IB_LINK_SPEED_EXT_SET_LSES); + p_opts->force_link_speed_ext = IB_LINK_SPEED_EXT_SET_LSES; + } + + if ((IB_LINK_WIDTH_SET_LWS < p_opts->force_link_width) || + (p_opts->force_link_width > IB_LINK_WIDTH_1X_2X_4X_8X_OR_12X && + p_opts->force_link_width < IB_LINK_WIDTH_SET_LWS)) { + log_report(" Invalid Cached Option Value:force_link_width = %u:" + "Using Default:%u\n", p_opts->force_link_width, + IB_LINK_WIDTH_SET_LWS); + p_opts->force_link_width = IB_LINK_WIDTH_SET_LWS; + } + + if (2 < p_opts->fdr10) { + log_report(" Invalid Cached Option Value:fdr10 = %u:" + "Using Default:%u\n", p_opts->fdr10, 1); + p_opts->fdr10 = 1; + } + + if (p_opts->max_wire_smps == 0) + p_opts->max_wire_smps = 0x7FFFFFFF; + else if (p_opts->max_wire_smps > 0x7FFFFFFF) { + log_report(" Invalid Cached Option Value: max_wire_smps = %u," + " Using Default: %u\n", + p_opts->max_wire_smps, OSM_DEFAULT_SMP_MAX_ON_WIRE); + p_opts->max_wire_smps = OSM_DEFAULT_SMP_MAX_ON_WIRE; + } + + if (p_opts->max_wire_smps2 > 0x7FFFFFFF) { + log_report(" Invalid Cached Option Value: max_wire_smps2 = %u," + " Using Default: %u", + p_opts->max_wire_smps2, p_opts->max_wire_smps); + p_opts->max_wire_smps2 = p_opts->max_wire_smps; + } + + if (p_opts->long_transaction_timeout < p_opts->transaction_timeout) { + log_report(" Invalid Cached Option Value: long_transaction_timeout = %u," + " Using transaction_timeout: %u", + p_opts->long_transaction_timeout, p_opts->transaction_timeout); + p_opts->long_transaction_timeout = p_opts->transaction_timeout; + } + + if (strcmp(p_opts->console, OSM_DISABLE_CONSOLE) + && strcmp(p_opts->console, OSM_LOCAL_CONSOLE) +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + && strcmp(p_opts->console, OSM_LOOPBACK_CONSOLE) +#endif +#ifdef ENABLE_OSM_CONSOLE_SOCKET + && strcmp(p_opts->console, OSM_REMOTE_CONSOLE) +#endif + ) { + log_report(" Invalid Cached Option Value:console = %s" + ", Using Default:%s\n", + p_opts->console, OSM_DEFAULT_CONSOLE); + free(p_opts->console); + p_opts->console = strdup(OSM_DEFAULT_CONSOLE); + } + + if (p_opts->no_partition_enforcement == TRUE) { + strcpy(p_opts->part_enforce, OSM_PARTITION_ENFORCE_OFF); + p_opts->part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_OFF; + } else { + if (strcmp(p_opts->part_enforce, OSM_PARTITION_ENFORCE_BOTH) == 0) + p_opts->part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_BOTH; + else if (strcmp(p_opts->part_enforce, OSM_PARTITION_ENFORCE_IN) == 0) + p_opts->part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_IN; + else if (strcmp(p_opts->part_enforce, OSM_PARTITION_ENFORCE_OUT) == 0) + p_opts->part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_OUT; + else if (strcmp(p_opts->part_enforce, OSM_PARTITION_ENFORCE_OFF) == 0) + p_opts->part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_OFF; + else { + log_report(" Invalid Cached Option Value:part_enforce = %s" + ", Using Default:%s\n", + p_opts->part_enforce, OSM_PARTITION_ENFORCE_BOTH); + strcpy(p_opts->part_enforce, OSM_PARTITION_ENFORCE_BOTH); + p_opts->part_enforce_enum = OSM_PARTITION_ENFORCE_TYPE_BOTH; + } + } + + if (p_opts->qos) { + subn_verify_qos_set(&p_opts->qos_options, "qos"); + subn_verify_qos_set(&p_opts->qos_ca_options, "qos_ca"); + subn_verify_qos_set(&p_opts->qos_sw0_options, "qos_sw0"); + subn_verify_qos_set(&p_opts->qos_swe_options, "qos_swe"); + subn_verify_qos_set(&p_opts->qos_rtr_options, "qos_rtr"); + } + +#ifdef ENABLE_OSM_PERF_MGR + if (p_opts->perfmgr_sweep_time_s < 1) { + log_report(" Invalid Cached Option Value:perfmgr_sweep_time_s " + "= %u Using Default:%u\n", + p_opts->perfmgr_sweep_time_s, + OSM_PERFMGR_DEFAULT_SWEEP_TIME_S); + p_opts->perfmgr_sweep_time_s = OSM_PERFMGR_DEFAULT_SWEEP_TIME_S; + } + if (p_opts->perfmgr_max_outstanding_queries < 1) { + log_report(" Invalid Cached Option Value:" + "perfmgr_max_outstanding_queries = %u" + " Using Default:%u\n", + p_opts->perfmgr_max_outstanding_queries, + OSM_PERFMGR_DEFAULT_MAX_OUTSTANDING_QUERIES); + p_opts->perfmgr_max_outstanding_queries = + OSM_PERFMGR_DEFAULT_MAX_OUTSTANDING_QUERIES; + } +#endif + + if (p_opts->m_key_protect_bits > 3) { + log_report(" Invalid Cached Option Value:" + "m_key_protection_level = %u Setting to %u " + "instead\n", p_opts->m_key_protect_bits, 2); + p_opts->m_key_protect_bits = 2; + } + if (p_opts->m_key_protect_bits && p_opts->m_key_lease_period) { + if (!p_opts->sweep_interval) { + log_report(" Sweep disabled with protected mkey " + "leases in effect; re-enabling sweeping " + "with interval %u\n", + cl_ntoh16(p_opts->m_key_lease_period) - 1); + p_opts->sweep_interval = + cl_ntoh16(p_opts->m_key_lease_period) - 1; + } + if (p_opts->sweep_interval >= + cl_ntoh16(p_opts->m_key_lease_period)) { + log_report(" Sweep interval %u >= mkey lease period " + "%u. Setting lease period to %u\n", + p_opts->sweep_interval, + cl_ntoh16(p_opts->m_key_lease_period), + p_opts->sweep_interval + 1); + p_opts->m_key_lease_period = + cl_hton16(p_opts->sweep_interval + 1); + } + } + + return 0; +} + +int osm_subn_parse_conf_file(const char *file_name, osm_subn_opt_t * p_opts) +{ + char line[1024]; + FILE *opts_file; + char *p_key, *p_val, *pound_sign; + const opt_rec_t *r; + void *p_field1, *p_field2; + int token_matched; + + opts_file = fopen(file_name, "r"); + if (!opts_file) { + if (errno == ENOENT) + return 1; + printf("cannot open file \'%s\': %s\n", + file_name, strerror(errno)); + return -1; + } + + printf(" Reading Cached Option File: %s\n", file_name); + + p_opts->config_file = file_name; + if (!p_opts->file_opts && !(p_opts->file_opts = malloc(sizeof(*p_opts)))) { + fclose(opts_file); + return -1; + } + memcpy(p_opts->file_opts, p_opts, sizeof(*p_opts)); + p_opts->file_opts->file_opts = NULL; + + while (fgets(line, 1023, opts_file) != NULL) { + pound_sign = strchr(line,'#'); + token_matched = 0; + /* Truncate any comments. */ + if (pound_sign) + *pound_sign = '\0'; + + /* get the first token */ + p_key = strtok_r(line, " \t\n", &p_val); + if (!p_key) + continue; + + p_val = clean_val(p_val); + + for (r = opt_tbl; r->name; r++) { + if (strcmp(r->name, p_key)) + continue; + + token_matched = 1; + p_field1 = (void *)p_opts->file_opts + r->opt_offset; + p_field2 = (void *)p_opts + r->opt_offset; + /* don't call setup function first time */ + r->parse_fn(NULL, p_key, p_val, p_field1, p_field2, + NULL); + break; + } + + if (!token_matched) + log_report(" Unrecognized token: \"%s\"\n", p_key); + } + fclose(opts_file); + + osm_subn_verify_config(p_opts); + + return 0; +} + +int osm_subn_rescan_conf_files(IN osm_subn_t * p_subn) +{ + char line[1024]; + osm_subn_opt_t *p_opts = &p_subn->opt; + const opt_rec_t *r; + FILE *opts_file; + char *p_key, *p_val, *pound_sign; + void *p_field1, *p_field2; + int token_matched; + + if (!p_opts->config_file) + return 0; + + opts_file = fopen(p_opts->config_file, "r"); + if (!opts_file) { + if (errno == ENOENT) + return 1; + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, + "cannot open file \'%s\': %s\n", + p_opts->config_file, strerror(errno)); + return -1; + } + + subn_init_qos_options(&p_opts->qos_options, + &p_opts->file_opts->qos_options); + subn_init_qos_options(&p_opts->qos_ca_options, + &p_opts->file_opts->qos_ca_options); + subn_init_qos_options(&p_opts->qos_sw0_options, + &p_opts->file_opts->qos_sw0_options); + subn_init_qos_options(&p_opts->qos_swe_options, + &p_opts->file_opts->qos_swe_options); + subn_init_qos_options(&p_opts->qos_rtr_options, + &p_opts->file_opts->qos_rtr_options); + + while (fgets(line, 1023, opts_file) != NULL) { + pound_sign = strchr(line,'#'); + token_matched = 0; + + /* Truncate any comments. */ + if (pound_sign) + *pound_sign = '\0'; + + /* get the first token */ + p_key = strtok_r(line, " \t\n", &p_val); + if (!p_key) + continue; + + p_val = clean_val(p_val); + + for (r = opt_tbl; r->name; r++) { + if (strcmp(r->name, p_key)) + continue; + + token_matched = 1; + + if (!r->can_update) + continue; + + p_field1 = (void *)p_opts->file_opts + r->opt_offset; + p_field2 = (void *)p_opts + r->opt_offset; + r->parse_fn(p_subn, p_key, p_val, p_field1, p_field2, + r->setup_fn); + break; + } + if (!token_matched) + log_report(" Unrecognized token: \"%s\"\n", p_key); + } + fclose(opts_file); + + osm_subn_verify_config(p_opts); + + parse_prefix_routes_file(p_subn); + + parse_per_mod_logging_file(p_subn); + + return 0; +} + +void osm_subn_output_conf(FILE *out, IN osm_subn_opt_t * p_opts) +{ + int cacongoutputcount = 0; + int i; + + fprintf(out, + "#\n# DEVICE ATTRIBUTES OPTIONS\n#\n" + "# The port GUID on which the OpenSM is running\n" + "guid 0x%016" PRIx64 "\n\n" + "# M_Key value sent to all ports qualifying all Set(PortInfo)\n" + "m_key 0x%016" PRIx64 "\n\n" + "# The lease period used for the M_Key on this subnet in [sec]\n" + "m_key_lease_period %u\n\n" + "# The protection level used for the M_Key on this subnet\n" + "m_key_protection_level %u\n\n" + "# If TRUE, SM tries to determine the m_key of unknown ports from guid2mkey file\n" + "# If FALSE, SM won't try to determine the m_key of unknown ports.\n" + "# Preconfigured m_key will be used instead\n" + "m_key_lookup %s\n\n" + "# SM_Key value of the SM used for SM authentication\n" + "sm_key 0x%016" PRIx64 "\n\n" + "# SM_Key value to qualify rcv SA queries as 'trusted'\n" + "sa_key 0x%016" PRIx64 "\n\n" + "# Note that for both values above (sm_key and sa_key)\n" + "# OpenSM version 3.2.1 and below used the default value '1'\n" + "# in a host byte order, it is fixed now but you may need to\n" + "# change the values to interoperate with old OpenSM running\n" + "# on a little endian machine.\n\n" + "# Subnet prefix used on this subnet\n" + "subnet_prefix 0x%016" PRIx64 "\n\n" + "# The LMC value used on this subnet\n" + "lmc %u\n\n" + "# lmc_esp0 determines whether LMC value used on subnet is used for\n" + "# enhanced switch port 0. If TRUE, LMC value for subnet is used for\n" + "# ESP0. Otherwise, LMC value for ESP0s is 0.\n" + "lmc_esp0 %s\n\n" + "# sm_sl determines SMSL used for SM/SA communication\n" + "sm_sl %u\n\n" + "# The code of maximal time a packet can live in a switch\n" + "# The actual time is 4.096usec * 2^\n" + "# The value 0x14 disables this mechanism\n" + "packet_life_time 0x%02x\n\n" + "# The number of sequential packets dropped that cause the port\n" + "# to enter the VLStalled state. The result of setting this value to\n" + "# zero is undefined.\n" + "vl_stall_count 0x%02x\n\n" + "# The number of sequential packets dropped that cause the port\n" + "# to enter the VLStalled state. This value is for switch ports\n" + "# driving a CA or router port. The result of setting this value\n" + "# to zero is undefined.\n" + "leaf_vl_stall_count 0x%02x\n\n" + "# The code of maximal time a packet can wait at the head of\n" + "# transmission queue.\n" + "# The actual time is 4.096usec * 2^\n" + "# The value 0x14 disables this mechanism\n" + "head_of_queue_lifetime 0x%02x\n\n" + "# The maximal time a packet can wait at the head of queue on\n" + "# switch port connected to a CA or router port\n" + "leaf_head_of_queue_lifetime 0x%02x\n\n" + "# Limit the maximal operational VLs\n" + "max_op_vls %u\n\n" + "# Force PortInfo:LinkSpeedEnabled on switch ports\n" + "# If 0, don't modify PortInfo:LinkSpeedEnabled on switch port\n" + "# Otherwise, use value for PortInfo:LinkSpeedEnabled on switch port\n" + "# Values are (IB Spec 1.2.1, 14.2.5.6 Table 146 \"PortInfo\")\n" + "# 1: 2.5 Gbps\n" + "# 3: 2.5 or 5.0 Gbps\n" + "# 5: 2.5 or 10.0 Gbps\n" + "# 7: 2.5 or 5.0 or 10.0 Gbps\n" + "# 2,4,6,8-14 Reserved\n" + "# Default 15: set to PortInfo:LinkSpeedSupported\n" + "force_link_speed %u\n\n" + "# Force PortInfo:LinkSpeedExtEnabled on ports\n" + "# If 0, don't modify PortInfo:LinkSpeedExtEnabled on port\n" + "# Otherwise, use value for PortInfo:LinkSpeedExtEnabled on port\n" + "# Values are (MgtWG RefIDs #4722 and #9366)\n" + "# 1: 14.0625 Gbps\n" + "# 2: 25.78125 Gbps\n" + "# 3: 14.0625 Gbps or 25.78125 Gbps\n" + "# 4: 53.125 Gbps\n" + "# 5: 14.0625 Gbps or 53.125 Gbps\n" + "# 6: 25.78125 Gbps or 53.125 Gbps\n" + "# 7: 14.0625 Gbps, 25.78125 Gbps or 53.125 Gbps\n" + "# 30: Disable extended link speeds\n" + "# Default 31: set to PortInfo:LinkSpeedExtSupported\n" + "force_link_speed_ext %u\n\n" + "# Force PortInfo:LinkWidthEnabled on switch ports\n" + "# If 0, don't modify PortInfo:LinkWidthEnabled on switch port\n" + "# Otherwise, use value for PortInfo:LinkWidthEnabled on switch port\n" + "# Values are (IB Spec 1.2.1, 14.2.5.6 Table 146 \"PortInfo\"\n" + "# augmented by MgtWG RefIDs #9306-9309)\n" + "# 1: 1x\n" + "# 2: 4x\n" + "# 3: 1x or 4x\n" + "# 4: 8x\n" + "# 5: 1x or 8x\n" + "# 6: 4x or 8x\n" + "# 7: 1x or 4x or 8x\n" + "# 8: 12x\n" + "# 9: 1x or 12x\n" + "# 10: 4x or 12x\n" + "# 11: 1x or 4x or 12x\n" + "# 12: 8x or 12x\n" + "# 13: 1x or 8x or 12x\n" + "# 14: 4x or 8x or 12x\n" + "# 15: 1x or 4x or 8x or 12x\n" + "# 16: 2x\n" + "# 17: 1x or 2x\n" + "# 18: 2x or 4x\n" + "# 19: 1x or 2x or 4x\n" + "# 20: 2x or 8x\n" + "# 21: 1x or 2x or 8x\n" + "# 22: 2x or 4x or 8x\n" + "# 23: 1x or 2x or 4x or 8x\n" + "# 24: 2x or 12x\n" + "# 25: 1x or 2x or 12x\n" + "# 26: 2x or 4x or 12x\n" + "# 27: 1x or 2x or 4x or 12x\n" + "# 28: 2x or 8x or 12x\n" + "# 29: 1x or 2x or 8x or 12x\n" + "# 30: 2x or 4x or 8x or 12x\n" + "# 31: 1x or 2x or 4x or 8x or 12x\n" + "# 32-254 Reserved\n" + "# Default 255: set to PortInfo:LinkWidthSupported\n" + "force_link_width %u\n\n" + "# FDR10 on ports on devices that support FDR10\n" + "# Values are:\n" + "# 0: don't use fdr10 (no MLNX ExtendedPortInfo MADs)\n" + "# Default 1: enable fdr10 when supported\n" + "# 2: disable fdr10 when supported\n" + "fdr10 %u\n\n" + "# The subnet_timeout code that will be set for all the ports\n" + "# The actual timeout is 4.096usec * 2^\n" + "subnet_timeout %u\n\n" + "# Threshold of local phy errors for sending Trap 129\n" + "local_phy_errors_threshold 0x%02x\n\n" + "# Threshold of credit overrun errors for sending Trap 130\n" + "overrun_errors_threshold 0x%02x\n\n" + "# Use SwitchInfo:MulticastFDBTop if advertised in PortInfo:CapabilityMask\n" + "use_mfttop %s\n\n", + cl_ntoh64(p_opts->guid), + cl_ntoh64(p_opts->m_key), + cl_ntoh16(p_opts->m_key_lease_period), + p_opts->m_key_protect_bits, + p_opts->m_key_lookup ? "TRUE" : "FALSE", + cl_ntoh64(p_opts->sm_key), + cl_ntoh64(p_opts->sa_key), + cl_ntoh64(p_opts->subnet_prefix), + p_opts->lmc, + p_opts->lmc_esp0 ? "TRUE" : "FALSE", + p_opts->sm_sl, + p_opts->packet_life_time, + p_opts->vl_stall_count, + p_opts->leaf_vl_stall_count, + p_opts->head_of_queue_lifetime, + p_opts->leaf_head_of_queue_lifetime, + p_opts->max_op_vls, + p_opts->force_link_speed, + p_opts->force_link_speed_ext, + p_opts->force_link_width, + p_opts->fdr10, + p_opts->subnet_timeout, + p_opts->local_phy_errors_threshold, + p_opts->overrun_errors_threshold, + p_opts->use_mfttop ? "TRUE" : "FALSE"); + + fprintf(out, + "#\n# PARTITIONING OPTIONS\n#\n" + "# Partition configuration file to be used\n" + "partition_config_file %s\n\n" + "# Disable partition enforcement by switches (DEPRECATED)\n" + "# This option is DEPRECATED. Please use part_enforce instead\n" + "no_partition_enforcement %s\n\n" + "# Partition enforcement type (for switches)\n" + "# Values are both, out, in and off\n" + "# Default is both (outbound and inbound enforcement)\n" + "part_enforce %s\n\n" + "# Allow both full and limited membership on the same partition\n" + "allow_both_pkeys %s\n\n" + "# Keep current and take into account old pkey indexes\n" + "# during calculation of physical ports pkey tables\n" + "keep_pkey_indexes %s\n\n" + "# SM assigned GUID byte where GUID is formed from OpenFabrics OUI\n" + "# followed by 40 bits xy 00 ab cd ef where xy is the SM assigned GUID byte\n" + "# and ab cd ef is an SM autogenerated 24 bits\n" + "# SM assigned GUID byte should be configured as subnet unique\n" + "sm_assigned_guid 0x%02x\n\n", + p_opts->partition_config_file, + p_opts->no_partition_enforcement ? "TRUE" : "FALSE", + p_opts->part_enforce, + p_opts->allow_both_pkeys ? "TRUE" : "FALSE", + p_opts->keep_pkey_indexes ? "TRUE" : "FALSE", + p_opts->sm_assigned_guid); + + fprintf(out, + "#\n# SWEEP OPTIONS\n#\n" + "# The number of seconds between subnet sweeps (0 disables it)\n" + "sweep_interval %u\n\n" + "# If TRUE cause all lids to be reassigned\n" + "reassign_lids %s\n\n" + "# If TRUE forces every sweep to be a heavy sweep\n" + "force_heavy_sweep %s\n\n" + "# If TRUE every trap 128 and 144 will cause a heavy sweep.\n" + "# NOTE: successive identical traps (>10) are suppressed\n" + "sweep_on_trap %s\n\n", + p_opts->sweep_interval, + p_opts->reassign_lids ? "TRUE" : "FALSE", + p_opts->force_heavy_sweep ? "TRUE" : "FALSE", + p_opts->sweep_on_trap ? "TRUE" : "FALSE"); + + fprintf(out, + "#\n# ROUTING OPTIONS\n#\n" + "# If TRUE count switches as link subscriptions\n" + "port_profile_switch_nodes %s\n\n", + p_opts->port_profile_switch_nodes ? "TRUE" : "FALSE"); + + fprintf(out, + "# Name of file with port guids to be ignored by port profiling\n" + "port_prof_ignore_file %s\n\n", p_opts->port_prof_ignore_file ? + p_opts->port_prof_ignore_file : null_str); + + fprintf(out, + "# The file holding routing weighting factors per output port\n" + "hop_weights_file %s\n\n", + p_opts->hop_weights_file ? p_opts->hop_weights_file : null_str); + + fprintf(out, + "# The file holding non-default port order per switch for routing\n" + "port_search_ordering_file %s\n\n", + p_opts->port_search_ordering_file ? + p_opts->port_search_ordering_file : null_str); + + fprintf(out, + "# Routing engine\n" + "# Multiple routing engines can be specified separated by\n" + "# commas so that specific ordering of routing algorithms will\n" + "# be tried if earlier routing engines fail.\n" + "# Supported engines: minhop, updn, dnup, file, ftree, lash,\n" + "# dor, torus-2QoS, nue, dfsssp, sssp\n" + "routing_engine %s\n\n", p_opts->routing_engine_names ? + p_opts->routing_engine_names : null_str); + + fprintf(out, + "# Routing engines will avoid throttled switch-to-switch links\n" + "# (supported by: nue, dfsssp, sssp; use FALSE if unsure)\n" + "avoid_throttled_links %s\n\n", + p_opts->avoid_throttled_links ? "TRUE" : "FALSE"); + + fprintf(out, + "# Connect roots (use FALSE if unsure)\n" + "connect_roots %s\n\n", + p_opts->connect_roots ? "TRUE" : "FALSE"); + + fprintf(out, + "# Use unicast routing cache (use FALSE if unsure)\n" + "use_ucast_cache %s\n\n", + p_opts->use_ucast_cache ? "TRUE" : "FALSE"); + + fprintf(out, + "# Lid matrix dump file name\n" + "lid_matrix_dump_file %s\n\n", p_opts->lid_matrix_dump_file ? + p_opts->lid_matrix_dump_file : null_str); + + fprintf(out, + "# LFTs file name\nlfts_file %s\n\n", + p_opts->lfts_file ? p_opts->lfts_file : null_str); + + fprintf(out, + "# The file holding the root node guids (for fat-tree or Up/Down)\n" + "# One guid in each line\nroot_guid_file %s\n\n", + p_opts->root_guid_file ? p_opts->root_guid_file : null_str); + + fprintf(out, + "# The file holding the fat-tree compute node guids\n" + "# One guid in each line\ncn_guid_file %s\n\n", + p_opts->cn_guid_file ? p_opts->cn_guid_file : null_str); + + fprintf(out, + "# The file holding the fat-tree I/O node guids\n" + "# One guid in each line.\n" + "# If only io_guid file is provided, the rest of nodes\n" + "# are considered as compute nodes.\n" + "io_guid_file %s\n\n", + p_opts->io_guid_file ? p_opts->io_guid_file : null_str); + + fprintf(out, + "# If TRUE enables alternative indexing policy for ftree routing\n" + "# in quasi-ftree topologies that can improve shift-pattern support.\n" + "# The switch indexing starts from root switch and leaf switches\n" + "# are termination points of BFS algorithm\n" + "# If FALSE, the indexing starts from leaf switch (default)\n" + "quasi_ftree_indexing %s\n\n", + p_opts->quasi_ftree_indexing ? "TRUE" : "FALSE"); + + fprintf(out, + "# Number of reverse hops allowed for I/O nodes\n" + "# Used for connectivity between I/O nodes connected to Top Switches\nmax_reverse_hops %d\n\n", + p_opts->max_reverse_hops); + + fprintf(out, + "# The file holding the node ids which will be used by" + " Up/Down algorithm instead\n# of GUIDs (one guid and" + " id in each line)\nids_guid_file %s\n\n", + p_opts->ids_guid_file ? p_opts->ids_guid_file : null_str); + + fprintf(out, + "# The file holding guid routing order guids (for MinHop and Up/Down)\n" + "guid_routing_order_file %s\n\n", + p_opts->guid_routing_order_file ? p_opts->guid_routing_order_file : null_str); + + fprintf(out, + "# Do mesh topology analysis (for LASH algorithm)\n" + "do_mesh_analysis %s\n\n", + p_opts->do_mesh_analysis ? "TRUE" : "FALSE"); + + fprintf(out, + "# Starting VL for LASH algorithm\n" + "lash_start_vl %u\n\n", + p_opts->lash_start_vl); + + fprintf(out, + "# Maximum number of VLs for Nue routing algorithm (default: 1; to enforce\n" + "# deadlock-freedom even if QoS is not enabled). Set to 0 if Nue should\n" + "# automatically determine and choose maximum supported by the fabric, or\n" + "# any integer >= 1 (then Nue uses min(max_supported,nue_max_num_vls)\n" + "nue_max_num_vls %u\n\n", + p_opts->nue_max_num_vls); + + fprintf(out, + "# If TRUE, then Nue assumes that switches will send/receive\n" + "# data traffic, too, and hence their paths are included in\n" + "# the deadlock-avoidance calculation (use FALSE if unsure)\n" + "nue_include_switches %s\n\n", + p_opts->nue_include_switches ? "TRUE" : "FALSE"); + + fprintf(out, + "# Port Shifting (use FALSE if unsure)\n" + "port_shifting %s\n\n", + p_opts->port_shifting ? "TRUE" : "FALSE"); + + fprintf(out, + "# Assign ports in a random order instead of round-robin\n" + "# If zero disable (default), otherwise use the value as a random seed\n" + "scatter_ports %d\n\n", + p_opts->scatter_ports); + + fprintf(out, + "# Don't use scatter for ports defined in\n" + "# guid_routing_order file\n" + "guid_routing_order_no_scatter %s\n\n", + p_opts->guid_routing_order_no_scatter ? "TRUE" : "FALSE"); + + fprintf(out, + "# SA database file name\nsa_db_file %s\n\n", + p_opts->sa_db_file ? p_opts->sa_db_file : null_str); + + fprintf(out, + "# If TRUE causes OpenSM to dump SA database at the end of\n" + "# every light sweep, regardless of the verbosity level\n" + "sa_db_dump %s\n\n", + p_opts->sa_db_dump ? "TRUE" : "FALSE"); + + fprintf(out, + "# Torus-2QoS configuration file name\ntorus_config %s\n\n", + p_opts->torus_conf_file ? p_opts->torus_conf_file : null_str); + + fprintf(out, + "#\n# HANDOVER - MULTIPLE SMs OPTIONS\n#\n" + "# SM priority used for deciding who is the master\n" + "# Range goes from 0 (lowest priority) to 15 (highest).\n" + "sm_priority %u\n\n" + "# If TRUE other SMs on the subnet should be ignored\n" + "ignore_other_sm %s\n\n" + "# Timeout in [msec] between two polls of active master SM\n" + "sminfo_polling_timeout %u\n\n" + "# Number of failing polls of remote SM that declares it dead\n" + "polling_retry_number %u\n\n" + "# If TRUE honor the guid2lid file when coming out of standby\n" + "# state, if such file exists and is valid\n" + "honor_guid2lid_file %s\n\n", + p_opts->sm_priority, + p_opts->ignore_other_sm ? "TRUE" : "FALSE", + p_opts->sminfo_polling_timeout, + p_opts->polling_retry_number, + p_opts->honor_guid2lid_file ? "TRUE" : "FALSE"); + + fprintf(out, + "#\n# TIMING AND THREADING OPTIONS\n#\n" + "# Maximum number of SMPs sent in parallel\n" + "max_wire_smps %u\n\n" + "# Maximum number of timeout based SMPs allowed to be outstanding\n" + "# A value less than or equal to max_wire_smps disables this mechanism\n" + "max_wire_smps2 %u\n\n" + "# The timeout in [usec] used for sending SMPs above max_wire_smps limit\n" + "# and below max_wire_smps2 limit\n" + "max_smps_timeout %u\n\n" + "# The maximum time in [msec] allowed for a transaction to complete\n" + "transaction_timeout %u\n\n" + "# The maximum number of retries allowed for a transaction to complete\n" + "transaction_retries %u\n\n" + "# The maximum time in [msec] allowed for a \"long\" transacrion to complete\n" + "# Currently, long transaction is only set of optimized SL2VLMappingTable\n" + "long_transaction_timeout %u\n\n" + "# Maximal time in [msec] a message can stay in the incoming message queue.\n" + "# If there is more than one message in the queue and the last message\n" + "# stayed in the queue more than this value, any SA request will be\n" + "# immediately be dropped but BUSY status is not currently returned.\n" + "max_msg_fifo_timeout %u\n\n" + "# Use a single thread for handling SA queries\n" + "single_thread %s\n\n", + p_opts->max_wire_smps, + p_opts->max_wire_smps2, + p_opts->max_smps_timeout, + p_opts->transaction_timeout, + p_opts->transaction_retries, + p_opts->long_transaction_timeout, + p_opts->max_msg_fifo_timeout, + p_opts->single_thread ? "TRUE" : "FALSE"); + + fprintf(out, + "#\n# MISC OPTIONS\n#\n" + "# Daemon mode\n" + "daemon %s\n\n" + "# SM Inactive\n" + "sm_inactive %s\n\n" + "# Babbling Port Policy\n" + "babbling_port_policy %s\n\n" + "# Drop event subscriptions (InformInfo and ServiceRecord) on port removal and SM coming out of STANDBY\n" + "drop_event_subscriptions %s\n\n" + "# Validate IPoIB non-broadcast group creation parameters against\n" + "# broadcast group parameters per IETF RFC 4391 (default TRUE)\n" + "ipoib_mcgroup_creation_validation %s\n\n" + "# Validate multicast join parameters against multicast group\n" + "# parameters when MC group already exists\n" + "mcgroup_join_validation %s\n\n" + "# Use original extended SA rates only\n" + "# The original extended SA rates are up through 300 Gbps (12x EDR)\n" + "# Set to TRUE for subnets with old kernels/drivers that don't understand\n" + "# the new SA rates for 2x link width and/or HDR link speed (19-22)\n" + "# default is FALSE\n" + "use_original_extended_sa_rates_only %s\n\n" + "# Use Optimized SLtoVLMapping programming if supported by device\n" + "use_optimized_slvl %s\n\n" + "# Sync in memory files used for high availability with storage\n" + "fsync_high_avail_files %s\n\n", + p_opts->daemon ? "TRUE" : "FALSE", + p_opts->sm_inactive ? "TRUE" : "FALSE", + p_opts->babbling_port_policy ? "TRUE" : "FALSE", + p_opts->drop_event_subscriptions ? "TRUE" : "FALSE", + p_opts->ipoib_mcgroup_creation_validation ? "TRUE" : "FALSE", + p_opts->mcgroup_join_validation ? "TRUE" : "FALSE", + p_opts->use_original_extended_sa_rates_only ? "TRUE" : "FALSE", + p_opts->use_optimized_slvl ? "TRUE" : "FALSE", + p_opts->fsync_high_avail_files ? "TRUE" : "FALSE"); + +#ifdef ENABLE_OSM_PERF_MGR + fprintf(out, + "#\n# Performance Manager Options\n#\n" + "# perfmgr enable\n" + "# PerfMgr is enabled if TRUE and disabled if FALSE (default FALSE)\n" + "perfmgr %s\n\n" + "# redirection enable\n" + "# Redirection supported if TRUE and not supported if FALSE (default TRUE)\n" + "perfmgr_redir %s\n\n" + "# sweep time in seconds (default %u seconds)\n" + "perfmgr_sweep_time_s %u\n\n" + "# Max outstanding queries (default %u)\n" + "perfmgr_max_outstanding_queries %u\n\n" + "# Ignore CAs on sweep (default FALSE)\n" + "perfmgr_ignore_cas %s\n\n" + "# Remove missing nodes from DB (default TRUE)\n" + "perfmgr_rm_nodes %s\n\n" + "# Log error counters to opensm.log (default TRUE)\n" + "perfmgr_log_errors %s\n\n" + "# Query PerfMgt Get(ClassPortInfo) for extended capabilities\n" + "# Extended capabilities include 64 bit extended counters\n" + "# and transmit wait support (default TRUE)\n" + "perfmgr_query_cpi %s\n\n" + "# Log xmit_wait errors (default FALSE)\n" + "perfmgr_xmit_wait_log %s\n\n" + "# If logging xmit_wait's; set threshold (default %u)\n" + "perfmgr_xmit_wait_threshold %u\n\n" + , + p_opts->perfmgr ? "TRUE" : "FALSE", + p_opts->perfmgr_redir ? "TRUE" : "FALSE", + OSM_PERFMGR_DEFAULT_SWEEP_TIME_S, + p_opts->perfmgr_sweep_time_s, + OSM_PERFMGR_DEFAULT_MAX_OUTSTANDING_QUERIES, + p_opts->perfmgr_max_outstanding_queries, + p_opts->perfmgr_ignore_cas ? "TRUE" : "FALSE", + p_opts->perfmgr_rm_nodes ? "TRUE" : "FALSE", + p_opts->perfmgr_log_errors ? "TRUE" : "FALSE", + p_opts->perfmgr_query_cpi ? "TRUE" : "FALSE", + p_opts->perfmgr_xmit_wait_log ? "TRUE" : "FALSE", + OSM_PERFMGR_DEFAULT_XMIT_WAIT_THRESHOLD, + p_opts->perfmgr_xmit_wait_threshold); + + fprintf(out, + "#\n# Event DB Options\n#\n" + "# Dump file to dump the events to\n" + "event_db_dump_file %s\n\n", p_opts->event_db_dump_file ? + p_opts->event_db_dump_file : null_str); +#endif /* ENABLE_OSM_PERF_MGR */ + + fprintf(out, + "#\n# Event Plugin Options\n#\n" + "# Event plugin name(s)\n" + "event_plugin_name %s\n\n" + "# Options string that would be passed to the plugin(s)\n" + "event_plugin_options %s\n\n", + p_opts->event_plugin_name ? + p_opts->event_plugin_name : null_str, + p_opts->event_plugin_options ? + p_opts->event_plugin_options : null_str); + + fprintf(out, + "#\n# Node name map for mapping node's to more descriptive node descriptions\n" + "# (man ibnetdiscover for more information)\n#\n" + "node_name_map_name %s\n\n", p_opts->node_name_map_name ? + p_opts->node_name_map_name : null_str); + + fprintf(out, + "#\n# DEBUG FEATURES\n#\n" + "# The log flags used\n" + "log_flags 0x%02x\n\n" + "# Force flush of the log file after each log message\n" + "force_log_flush %s\n\n" + "# Log file to be used\n" + "log_file %s\n\n" + "# Limit the size of the log file in MB. If overrun, log is restarted\n" + "log_max_size %u\n\n" + "# If TRUE will accumulate the log over multiple OpenSM sessions\n" + "accum_log_file %s\n\n" + "# Per module logging configuration file\n" + "# Each line in config file contains \n" + "# where module_name is file name including .c\n" + "# separator is either = , space, or tab\n" + "# log_flags is the same flags as used in the coarse/overall logging\n" + "per_module_logging_file %s\n\n" + "# The directory to hold the file OpenSM dumps\n" + "dump_files_dir %s\n\n" + "# If TRUE enables new high risk options and hardware specific quirks\n" + "enable_quirks %s\n\n" + "# If TRUE disables client reregistration\n" + "no_clients_rereg %s\n\n" + "# If TRUE OpenSM should disable multicast support and\n" + "# no multicast routing is performed if TRUE\n" + "disable_multicast %s\n\n" + "# If TRUE opensm will exit on fatal initialization issues\n" + "exit_on_fatal %s\n\n" "# console [off|local" +#ifdef ENABLE_OSM_CONSOLE_LOOPBACK + "|loopback" +#endif +#ifdef ENABLE_OSM_CONSOLE_SOCKET + "|socket]\n" +#else + "]\n" +#endif + "console %s\n\n" + "# Telnet port for console (default %d)\n" + "console_port %d\n\n", + p_opts->log_flags, + p_opts->force_log_flush ? "TRUE" : "FALSE", + p_opts->log_file, + p_opts->log_max_size, + p_opts->accum_log_file ? "TRUE" : "FALSE", + p_opts->per_module_logging_file ? + p_opts->per_module_logging_file : null_str, + p_opts->dump_files_dir, + p_opts->enable_quirks ? "TRUE" : "FALSE", + p_opts->no_clients_rereg ? "TRUE" : "FALSE", + p_opts->disable_multicast ? "TRUE" : "FALSE", + p_opts->exit_on_fatal ? "TRUE" : "FALSE", + p_opts->console, + OSM_DEFAULT_CONSOLE_PORT, p_opts->console_port); + + fprintf(out, + "#\n# QoS OPTIONS\n#\n" + "# Enable QoS setup\n" + "qos %s\n\n" + "# QoS policy file to be used\n" + "qos_policy_file %s\n\n" + "# Suppress QoS MAD status errors\n" + "suppress_sl2vl_mad_status_errors %s\n\n", + p_opts->qos ? "TRUE" : "FALSE", p_opts->qos_policy_file, + p_opts->suppress_sl2vl_mad_status_errors ? "TRUE" : "FALSE"); + + subn_dump_qos_options(out, + "QoS default options", "qos", + &p_opts->qos_options); + fprintf(out, "\n"); + subn_dump_qos_options(out, + "QoS CA options", "qos_ca", + &p_opts->qos_ca_options); + fprintf(out, "\n"); + subn_dump_qos_options(out, + "QoS Switch Port 0 options", "qos_sw0", + &p_opts->qos_sw0_options); + fprintf(out, "\n"); + subn_dump_qos_options(out, + "QoS Switch external ports options", "qos_swe", + &p_opts->qos_swe_options); + fprintf(out, "\n"); + subn_dump_qos_options(out, + "QoS Router ports options", "qos_rtr", + &p_opts->qos_rtr_options); + fprintf(out, "\n"); + + fprintf(out, + "#\n# Congestion Control OPTIONS (EXPERIMENTAL)\n#\n\n" + "# Enable Congestion Control Configuration\n" + "congestion_control %s\n\n" + "# CCKey to use when configuring congestion control\n" + "# note that this does not configure a new CCkey, only the CCkey to use\n" + "cc_key 0x%016" PRIx64 "\n\n" + "# Congestion Control Max outstanding MAD\n" + "cc_max_outstanding_mads %u\n\n", + p_opts->congestion_control ? "TRUE" : "FALSE", + cl_ntoh64(p_opts->cc_key), + p_opts->cc_max_outstanding_mads); + + fprintf(out, + "#\n# Congestion Control SwitchCongestionSetting options\n#\n" + "# Control Map - bitmask indicating which of the following are to be used\n" + "# bit 0 - victim mask\n" + "# bit 1 - credit mask\n" + "# bit 2 - threshold + packet size\n" + "# bit 3 - credit starvation threshold + return delay valid\n" + "# bit 4 - marking rate valid\n" + "cc_sw_cong_setting_control_map 0x%X\n\n", + cl_ntoh32(p_opts->cc_sw_cong_setting_control_map)); + + fprintf(out, + "# Victim Mask - 256 bit mask representing switch ports, mark packets with FECN\n" + "# whether they are the source or victim of congestion\n" + "# bit 0 - port 0 (enhanced port)\n" + "# bit 1 - port 1\n" + "# ...\n" + "# bit 254 - port 254\n" + "# bit 255 - reserved\n" + "cc_sw_cong_setting_victim_mask 0x"); + + for (i = 0; i < IB_CC_PORT_MASK_DATA_SIZE; i++) + fprintf(out, "%02X", p_opts->cc_sw_cong_setting_victim_mask[i]); + fprintf(out, "\n\n"); + + fprintf(out, + "# Credit Mask - 256 bit mask representing switch ports to apply credit starvation\n" + "# bit 0 - port 0 (enhanced port)\n" + "# bit 1 - port 1\n" + "# ...\n" + "# bit 254 - port 254\n" + "# bit 255 - reserved\n" + "cc_sw_cong_setting_credit_mask 0x"); + + for (i = 0; i < IB_CC_PORT_MASK_DATA_SIZE; i++) + fprintf(out, "%02X", p_opts->cc_sw_cong_setting_credit_mask[i]); + fprintf(out, "\n\n"); + + fprintf(out, + "# Threshold - value indicating aggressiveness of congestion marking\n" + "# 0x0 - none, 0x1 - loose, ..., 0xF - aggressive\n" + "cc_sw_cong_setting_threshold 0x%02X\n\n" + "# Packet Size - any packet less than this size will not be marked with a FECN\n" + "# units are in credits\n" + "cc_sw_cong_setting_packet_size %u\n\n" + "# Credit Starvation Threshold - value indicating aggressiveness of credit starvation\n" + "# 0x0 - none, 0x1 - loose, ..., 0xF - aggressive\n" + "cc_sw_cong_setting_credit_starvation_threshold 0x%02X\n\n" + "# Credit Starvation Return Delay - in CCT entry shift:multiplier format, see IB spec\n" + "cc_sw_cong_setting_credit_starvation_return_delay %u:%u\n\n" + "# Marking Rate - mean number of packets between markings\n" + "cc_sw_cong_setting_marking_rate %u\n\n", + p_opts->cc_sw_cong_setting_threshold, + p_opts->cc_sw_cong_setting_packet_size, + p_opts->cc_sw_cong_setting_credit_starvation_threshold, + p_opts->cc_sw_cong_setting_credit_starvation_return_delay.shift, + p_opts->cc_sw_cong_setting_credit_starvation_return_delay.multiplier, + cl_ntoh16(p_opts->cc_sw_cong_setting_marking_rate)); + + fprintf(out, + "#\n# Congestion Control CA Congestion Setting options\n#\n" + "# Port Control\n" + "# bit 0 = 0, QP based congestion control\n" + "# bit 0 = 1, SL/port based congestion control\n" + "cc_ca_cong_setting_port_control 0x%04X\n\n" + "# Control Map - 16 bit bitmask indicating which SLs should be configured\n" + "cc_ca_cong_setting_control_map 0x%04X\n\n", + cl_ntoh16(p_opts->cc_ca_cong_setting_port_control), + cl_ntoh16(p_opts->cc_ca_cong_setting_control_map)); + + fprintf(out, + "#\n# CA Congestion Setting Entries\n#\n" + "# Each of congestion control settings below configures the CA Congestion\n" + "# Settings for an individual SL. The SL must be specified before the value.\n" + "# These options may be specified multiple times to configure different values\n" + "# for different SLs.\n" + "#\n" + "# ccti timer - when expires decrements 1 from the CCTI\n" + "# ccti increase - number to be added to the table index on receipt of a BECN\n" + "# trigger threshold - when the ccti is equal to this, an event is logged\n" + "# ccti min - the minimum value for the ccti. This imposes a minimum rate\n" + "# on the injection rate\n\n"); + + for (i = 0; i < IB_CA_CONG_ENTRY_DATA_SIZE; i++) { + /* Don't output unless one of the settings has been set, there's no need + * to output 16 chunks of this with all defaults of 0 */ + if (p_opts->cc_ca_cong_entries[i].ccti_timer + || p_opts->cc_ca_cong_entries[i].ccti_increase + || p_opts->cc_ca_cong_entries[i].trigger_threshold + || p_opts->cc_ca_cong_entries[i].ccti_min) { + fprintf(out, + "# SL = %u\n" + "cc_ca_cong_setting_ccti_timer %u %u\n" + "cc_ca_cong_setting_ccti_increase %u %u\n" + "cc_ca_cong_setting_trigger_threshold %u %u\n" + "cc_ca_cong_setting_ccti_min %u %u\n\n", + i, + i, + cl_ntoh16(p_opts->cc_ca_cong_entries[i].ccti_timer), + i, + p_opts->cc_ca_cong_entries[i].ccti_increase, + i, + p_opts->cc_ca_cong_entries[i].trigger_threshold, + i, + p_opts->cc_ca_cong_entries[i].ccti_min); + cacongoutputcount++; + } + } + + /* If by chance all the CA Cong Settings are default, output at least 1 chunk + * for illustration */ + if (!cacongoutputcount) + fprintf(out, + "# SL = 0\n" + "cc_ca_cong_setting_ccti_timer 0 %u\n" + "cc_ca_cong_setting_ccti_increase 0 %u\n" + "cc_ca_cong_setting_trigger_threshold 0 %u\n" + "cc_ca_cong_setting_ccti_min 0 %u\n\n", + cl_ntoh16(p_opts->cc_ca_cong_entries[0].ccti_timer), + p_opts->cc_ca_cong_entries[0].ccti_increase, + p_opts->cc_ca_cong_entries[0].trigger_threshold, + p_opts->cc_ca_cong_entries[0].ccti_min); + + fprintf(out, + "#\n# Congestion Control Table\n#\n" + "# Comma separated list of CCT entries representing CCT.\n" + "# Format is shift:multipler,shift_multiplier,shift:multiplier,...\n" + "cc_cct "); + + if (!p_opts->cc_cct.entries_len) { + fprintf(out, "%s\n", null_str); + } + else { + fprintf(out, "%u:%u", + p_opts->cc_cct.entries[0].shift, + p_opts->cc_cct.entries[0].multiplier); + for (i = 1; i < p_opts->cc_cct.entries_len; i++) { + fprintf(out, ",%u:%u", + p_opts->cc_cct.entries[i].shift, + p_opts->cc_cct.entries[i].multiplier); + } + fprintf(out, "\n"); + } + fprintf(out, "\n"); + + fprintf(out, + "# Prefix routes file name\n" + "prefix_routes_file %s\n\n", + p_opts->prefix_routes_file); + + fprintf(out, + "#\n# IPv6 Solicited Node Multicast (SNM) Options\n#\n" + "consolidate_ipv6_snm_req %s\n\n", + p_opts->consolidate_ipv6_snm_req ? "TRUE" : "FALSE"); + + fprintf(out, "# Log prefix\nlog_prefix %s\n\n", p_opts->log_prefix); + + /* optional string attributes ... */ + +} + +int osm_subn_write_conf_file(char *file_name, IN osm_subn_opt_t * p_opts) +{ + FILE *opts_file; + + opts_file = fopen(file_name, "w"); + if (!opts_file) { + printf("cannot open file \'%s\' for writing: %s\n", + file_name, strerror(errno)); + return -1; + } + + osm_subn_output_conf(opts_file, p_opts); + + fclose(opts_file); + + return 0; +} diff --git a/opensm/osm_sw_info_rcv.c b/opensm/osm_sw_info_rcv.c new file mode 100644 index 0000000..0cc4d61 --- /dev/null +++ b/opensm/osm_sw_info_rcv.c @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005,2008 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_si_rcv_t. + * This object represents the SwitchInfo Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SW_INFO_RCV_C +#include +#include +#include +#include +#include + +#if 0 +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void si_rcv_get_fwd_tbl(IN osm_sm_t * sm, IN osm_switch_t * p_sw) +{ + osm_madw_context_t context; + osm_dr_path_t *p_dr_path; + osm_physp_t *p_physp; + osm_node_t *p_node; + uint32_t block_id_ho; + uint32_t max_block_id_ho; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_sw); + + p_node = p_sw->p_node; + + CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); + + context.lft_context.node_guid = osm_node_get_node_guid(p_node); + context.lft_context.set_method = FALSE; + + max_block_id_ho = osm_switch_get_max_block_id_in_use(p_sw); + + p_physp = osm_node_get_physp_ptr(p_node, 0); + p_dr_path = osm_physp_get_dr_path_ptr(p_physp); + + for (block_id_ho = 0; block_id_ho <= max_block_id_ho; block_id_ho++) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Retrieving FT block %u\n", block_id_ho); + + status = osm_req_get(sm, p_dr_path, IB_MAD_ATTR_LIN_FWD_TBL, + cl_hton32(block_id_ho), TRUE, 0, + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + /* continue the loop despite the error */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3603: " + "Failure initiating PortInfo request (%s)\n", + ib_get_err_str(status)); + } + + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + The plock must be held before calling this function. +**********************************************************************/ +static void si_rcv_get_mcast_fwd_tbl(IN osm_sm_t * sm, IN osm_switch_t * p_sw) +{ + osm_madw_context_t context; + osm_dr_path_t *p_dr_path; + osm_physp_t *p_physp; + osm_node_t *p_node; + osm_mcast_tbl_t *p_tbl; + uint32_t block_id_ho; + uint32_t max_block_id_ho; + uint32_t position; + uint32_t max_position; + uint32_t attr_mod_ho; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_sw); + + p_node = p_sw->p_node; + + CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); + + if (osm_switch_get_mcast_fwd_tbl_size(p_sw) == 0) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Multicast not supported by switch 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + goto Exit; + } + + context.mft_context.node_guid = osm_node_get_node_guid(p_node); + context.mft_context.set_method = FALSE; + + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + max_block_id_ho = osm_mcast_tbl_get_max_block(p_tbl); + + if (max_block_id_ho > IB_MCAST_MAX_BLOCK_ID) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3609: " + "Out-of-range mcast block size = %u on switch 0x%016" + PRIx64 "\n", max_block_id_ho, + cl_ntoh64(osm_node_get_node_guid(p_node))); + goto Exit; + } + + max_position = osm_mcast_tbl_get_max_position(p_tbl); + + CL_ASSERT(max_position <= IB_MCAST_POSITION_MAX); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Max MFT block = %u, Max position = %u\n", max_block_id_ho, + max_position); + + p_physp = osm_node_get_physp_ptr(p_node, 0); + p_dr_path = osm_physp_get_dr_path_ptr(p_physp); + + for (block_id_ho = 0; block_id_ho <= max_block_id_ho; block_id_ho++) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Retrieving MFT block %u\n", block_id_ho); + + for (position = 0; position <= max_position; position++) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Retrieving MFT position %u\n", position); + + attr_mod_ho = + block_id_ho | position << IB_MCAST_POSITION_SHIFT; + status = + osm_req_get(sm, p_dr_path, + IB_MAD_ATTR_MCAST_FWD_TBL, + cl_hton32(attr_mod_ho), TRUE, 0, + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + /* continue the loop despite the error */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3607: " + "Failure initiating PortInfo request (%s)\n", + ib_get_err_str(status)); + } + } + +Exit: + OSM_LOG_EXIT(sm->p_log); +} +#endif + +/********************************************************************** + Lock must be held on entry to this function. +**********************************************************************/ +static void si_rcv_process_new(IN osm_sm_t * sm, IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + osm_switch_t *p_sw; + osm_switch_t *p_check; + ib_switch_info_t *p_si; + ib_smp_t *p_smp; + cl_qmap_t *p_sw_guid_tbl; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl; + p_smp = osm_madw_get_smp_ptr(p_madw); + p_si = ib_smp_get_payload_ptr(p_smp); + + osm_dump_switch_info_v2(sm->p_log, p_si, FILE_ID, OSM_LOG_DEBUG); + + p_sw = osm_switch_new(p_node, p_madw); + if (p_sw == NULL) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3608: " + "Unable to allocate new switch object\n"); + goto Exit; + } + + /* set subnet max mlid to the minimum MulticastFDBCap of all switches */ + if (p_si->mcast_cap && + cl_ntoh16(p_si->mcast_cap) + IB_LID_MCAST_START_HO - 1 < + sm->p_subn->max_mcast_lid_ho) { + sm->p_subn->max_mcast_lid_ho = cl_ntoh16(p_si->mcast_cap) + + IB_LID_MCAST_START_HO - 1; + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Subnet max multicast lid is 0x%X\n", + sm->p_subn->max_mcast_lid_ho); + } + + /* set subnet max unicast lid to the minimum LinearFDBCap of all switches */ + if (cl_ntoh16(p_si->lin_cap) < sm->p_subn->max_ucast_lid_ho) { + sm->p_subn->max_ucast_lid_ho = cl_ntoh16(p_si->lin_cap); + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Subnet max unicast lid is 0x%X\n", + sm->p_subn->max_ucast_lid_ho); + } + + p_check = (osm_switch_t *) cl_qmap_insert(p_sw_guid_tbl, + osm_node_get_node_guid + (p_node), &p_sw->map_item); + if (p_check != p_sw) { + /* This shouldn't happen since we hold the lock! */ + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3605: " + "Unable to add new switch object to database\n"); + osm_switch_delete(&p_sw); + goto Exit; + } + + p_node->sw = p_sw; + + /* Update the switch info according to the info we just received. */ + osm_switch_set_switch_info(p_sw, p_si); + +#if 0 + /* Don't bother retrieving the current unicast and multicast tables + from the switches. The current version of SM does + not support silent take-over of an existing multicast + configuration. + + Gathering the multicast tables can also generate large amounts + of extra subnet-init traffic. + + The code to retrieve the tables was fully debugged. */ + + si_rcv_get_fwd_tbl(sm, p_sw); + if (!sm->p_subn->opt.disable_multicast) + si_rcv_get_mcast_fwd_tbl(sm, p_sw); +#endif + +Exit: + OSM_LOG_EXIT(sm->p_log); +} + +/********************************************************************** + Lock must be held on entry to this function. + Return 1 if the caller is expected to send a change_detected event. + this can not be done internally as the event needs the lock... +**********************************************************************/ +static boolean_t si_rcv_process_existing(IN osm_sm_t * sm, + IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + osm_switch_t *p_sw = p_node->sw; + ib_switch_info_t *p_si; + osm_si_context_t *p_si_context; + ib_smp_t *p_smp; + osm_epi_lft_change_event_t lft_change; + boolean_t is_change_detected = FALSE; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_si = ib_smp_get_payload_ptr(p_smp); + p_si_context = osm_madw_get_si_context_ptr(p_madw); + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received logical %cetResp()\n", + p_si_context->set_method ? 'S' : 'G'); + + osm_switch_set_switch_info(p_sw, p_si); + + if (p_si_context->light_sweep == TRUE && !p_si_context->set_method) { + /* If state changed bit is on the mad was returned with an + error - signal a change to the state manager. */ + if (ib_smp_get_status(p_smp) != 0) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "GetResp() received with error in light sweep. " + "Commencing heavy sweep\n"); + is_change_detected = TRUE; + } else if (ib_switch_info_get_state_change(p_si)) { + osm_dump_switch_info_v2(sm->p_log, p_si, FILE_ID, OSM_LOG_DEBUG); + is_change_detected = TRUE; + } + } + + if (sm->p_subn->first_time_master_sweep == FALSE && + p_si_context->set_method && p_si_context->lft_top_change) { + lft_change.p_sw = p_sw; + lft_change.flags = LFT_CHANGED_LFT_TOP; + lft_change.lft_top = cl_ntoh16(p_si->lin_top); + lft_change.block_num = 0; + osm_opensm_report_event(sm->p_subn->p_osm, + OSM_EVENT_ID_LFT_CHANGE, + &lft_change); + } + + OSM_LOG_EXIT(sm->p_log); + return is_change_detected; +} + +static void si_rcv_get_sp0_info(IN osm_sm_t * sm, IN osm_node_t * node) +{ + osm_madw_context_t context; + osm_physp_t *physp; + ib_api_status_t status; + int mlnx_epi_supported = 0; + + physp = osm_node_get_physp_ptr(node, 0); + + context.pi_context.node_guid = osm_node_get_node_guid(node); + context.pi_context.port_guid = osm_physp_get_port_guid(physp); + context.pi_context.set_method = FALSE; + context.pi_context.light_sweep = FALSE; + context.pi_context.active_transition = FALSE; + context.pi_context.client_rereg = FALSE; + + status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp), + IB_MAD_ATTR_PORT_INFO, 0, TRUE, 0, + 0, CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3611: " + "Failure initiating PortInfo request (%s)\n", + ib_get_err_str(status)); + + if (ib_switch_info_is_enhanced_port0(&node->sw->switch_info) && + sm->p_subn->opt.fdr10) { + mlnx_epi_supported = is_mlnx_ext_port_info_supported( + ib_node_info_get_vendor_id(&node->node_info), + node->node_info.device_id); + if (mlnx_epi_supported) { + status = osm_req_get(sm, + osm_physp_get_dr_path_ptr(physp), + IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO, + 0, TRUE, 0, 0, + CL_DISP_MSGID_NONE, &context); + if (status != IB_SUCCESS) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3616: " + "Failure initiating MLNX ExtPortInfo request (%s)\n", + ib_get_err_str(status)); + } + } + +} + +void osm_si_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_switch_info_t *p_si; + ib_smp_t *p_smp; + osm_node_t *p_node; + ib_net64_t node_guid; + osm_si_context_t *p_context; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_si = ib_smp_get_payload_ptr(p_smp); + p_context = osm_madw_get_si_context_ptr(p_madw); + node_guid = p_context->node_guid; + + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Switch GUID 0x%016" PRIx64 ", TID 0x%" PRIx64 "\n", + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit2; + } + + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + + p_node = osm_get_node_by_guid(sm->p_subn, node_guid); + if (!p_node) { + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3606: " + "SwitchInfo received for nonexistent node " + "with GUID 0x%" PRIx64 "\n", cl_ntoh64(node_guid)); + goto Exit; + } + + /* Hack for bad value in Mellanox switch */ + if (cl_ntoh16(p_si->lin_top) > IB_LID_UCAST_END_HO) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3610: " + "\n\t\t\t\tBad LinearFDBTop value = 0x%X " + "on switch 0x%" PRIx64 + "\n\t\t\t\tForcing internal correction to 0x%X\n", + cl_ntoh16(p_si->lin_top), + cl_ntoh64(osm_node_get_node_guid(p_node)), 0); + p_si->lin_top = 0; + } + + /* Acquire the switch object for this switch. */ + if (!p_node->sw) { + si_rcv_process_new(sm, p_node, p_madw); + /* A new switch was found during the sweep so we need + to ignore the current LFT settings. */ + sm->p_subn->ignore_existing_lfts = TRUE; + } else if (si_rcv_process_existing(sm, p_node, p_madw)) + /* we might get back a request for signaling change was detected */ + sm->p_subn->force_heavy_sweep = TRUE; + + if (p_context->light_sweep || p_context->set_method) + goto Exit; + + si_rcv_get_sp0_info(sm, p_node); + +Exit: + CL_PLOCK_RELEASE(sm->p_lock); +Exit2: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_switch.c b/opensm/osm_switch.c new file mode 100644 index 0000000..cfcf410 --- /dev/null +++ b/opensm/osm_switch.c @@ -0,0 +1,767 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_switch_t. + * This object represents an Infiniband switch. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_SWITCH_C +#include + +struct switch_port_path { + uint8_t port_num; + uint32_t path_count; + int found_sys_guid; + int found_node_guid; + uint32_t forwarded_to; +}; + +cl_status_t osm_switch_set_hops(IN osm_switch_t * p_sw, IN uint16_t lid_ho, + IN uint8_t port_num, IN uint8_t num_hops) +{ + if (!lid_ho || lid_ho > p_sw->max_lid_ho) + return -1; + if (port_num >= p_sw->num_ports) + return -1; + if (!p_sw->hops[lid_ho]) { + p_sw->hops[lid_ho] = malloc(p_sw->num_ports); + if (!p_sw->hops[lid_ho]) + return -1; + memset(p_sw->hops[lid_ho], OSM_NO_PATH, p_sw->num_ports); + } + + p_sw->hops[lid_ho][port_num] = num_hops; + if (p_sw->hops[lid_ho][0] > num_hops) + p_sw->hops[lid_ho][0] = num_hops; + + return 0; +} + +void osm_switch_delete(IN OUT osm_switch_t ** pp_sw) +{ + osm_switch_t *p_sw = *pp_sw; + unsigned i; + + osm_mcast_tbl_destroy(&p_sw->mcast_tbl); + if (p_sw->p_prof) + free(p_sw->p_prof); + if (p_sw->search_ordering_ports) + free(p_sw->search_ordering_ports); + if (p_sw->lft) + free(p_sw->lft); + if (p_sw->new_lft) + free(p_sw->new_lft); + if (p_sw->hops) { + for (i = 0; i < p_sw->num_hops; i++) + if (p_sw->hops[i]) + free(p_sw->hops[i]); + free(p_sw->hops); + } + free(*pp_sw); + *pp_sw = NULL; +} + +osm_switch_t *osm_switch_new(IN osm_node_t * p_node, + IN const osm_madw_t * p_madw) +{ + osm_switch_t *p_sw; + ib_switch_info_t *p_si; + ib_smp_t *p_smp; + uint8_t num_ports; + uint32_t port_num; + + CL_ASSERT(p_madw); + CL_ASSERT(p_node); + + p_smp = osm_madw_get_smp_ptr(p_madw); + p_si = ib_smp_get_payload_ptr(p_smp); + num_ports = osm_node_get_num_physp(p_node); + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_SWITCH_INFO); + + if (!p_si->lin_cap) /* The switch doesn't support LFT */ + return NULL; + + p_sw = malloc(sizeof(*p_sw)); + if (!p_sw) + return NULL; + + memset(p_sw, 0, sizeof(*p_sw)); + + p_sw->p_node = p_node; + p_sw->switch_info = *p_si; + p_sw->num_ports = num_ports; + p_sw->need_update = 2; + + p_sw->p_prof = malloc(sizeof(*p_sw->p_prof) * num_ports); + if (!p_sw->p_prof) + goto err; + + memset(p_sw->p_prof, 0, sizeof(*p_sw->p_prof) * num_ports); + + osm_mcast_tbl_init(&p_sw->mcast_tbl, osm_node_get_num_physp(p_node), + cl_ntoh16(p_si->mcast_cap)); + + for (port_num = 0; port_num < num_ports; port_num++) + osm_port_prof_construct(&p_sw->p_prof[port_num]); + + return p_sw; + +err: + osm_switch_delete(&p_sw); + return NULL; +} + +boolean_t osm_switch_get_lft_block(IN const osm_switch_t * p_sw, + IN uint16_t block_id, OUT uint8_t * p_block) +{ + uint16_t base_lid_ho = block_id * IB_SMP_DATA_SIZE; + + CL_ASSERT(p_sw); + CL_ASSERT(p_block); + + if (base_lid_ho > p_sw->max_lid_ho) + return FALSE; + + CL_ASSERT(base_lid_ho + IB_SMP_DATA_SIZE - 1 <= IB_LID_UCAST_END_HO); + memcpy(p_block, &(p_sw->new_lft[base_lid_ho]), IB_SMP_DATA_SIZE); + return TRUE; +} + +static struct osm_remote_node * +switch_find_guid_common(IN const osm_switch_t * p_sw, + IN struct osm_remote_guids_count *r, + IN uint8_t port_num, IN int find_sys_guid, + IN int find_node_guid) +{ + struct osm_remote_node *p_remote_guid = NULL; + osm_physp_t *p_physp; + osm_physp_t *p_rem_physp; + osm_node_t *p_rem_node; + uint64_t sys_guid; + uint64_t node_guid; + unsigned int i; + + CL_ASSERT(p_sw); + + if (!r) + goto out; + + p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); + if (!p_physp) + goto out; + + p_rem_physp = osm_physp_get_remote(p_physp); + p_rem_node = osm_physp_get_node_ptr(p_rem_physp); + sys_guid = p_rem_node->node_info.sys_guid; + node_guid = p_rem_node->node_info.node_guid; + + for (i = 0; i < r->count; i++) { + if ((!find_sys_guid + || r->guids[i].node->node_info.sys_guid == sys_guid) + && (!find_node_guid + || r->guids[i].node->node_info.node_guid == node_guid)) { + p_remote_guid = &r->guids[i]; + break; + } + } + +out: + return p_remote_guid; +} + +static struct osm_remote_node * +switch_find_sys_guid_count(IN const osm_switch_t * p_sw, + IN struct osm_remote_guids_count *r, + IN uint8_t port_num) +{ + return switch_find_guid_common(p_sw, r, port_num, 1, 0); +} + +static struct osm_remote_node * +switch_find_node_guid_count(IN const osm_switch_t * p_sw, + IN struct osm_remote_guids_count *r, + IN uint8_t port_num) +{ + return switch_find_guid_common(p_sw, r, port_num, 0, 1); +} + +uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw, + IN osm_port_t * p_port, IN uint16_t lid_ho, + IN unsigned start_from, + IN boolean_t ignore_existing, + IN boolean_t routing_for_lmc, + IN boolean_t dor, + IN boolean_t port_shifting, + IN uint32_t scatter_ports, + IN osm_lft_type_enum lft_enum) +{ + /* + We support an enhanced LMC aware routing mode: + In the case of LMC > 0, we can track the remote side + system and node for all of the lids of the target + and try and avoid routing again through the same + system / node. + + Assume if routing_for_lmc is true that this procedure was + provided the tracking array and counter via p_port->priv, + and we can conduct this algorithm. + */ + uint16_t base_lid; + uint8_t hops; + uint8_t least_hops; + uint8_t port_num; + uint8_t num_ports; + uint32_t least_paths = 0xFFFFFFFF; + unsigned i; + /* + The following will track the least paths if the + route should go through a new system/node + */ + uint32_t least_paths_other_sys = 0xFFFFFFFF; + uint32_t least_paths_other_nodes = 0xFFFFFFFF; + uint32_t least_forwarded_to = 0xFFFFFFFF; + uint32_t check_count; + uint8_t best_port = 0; + /* + These vars track the best port if it connects to + not used system/node. + */ + uint8_t best_port_other_sys = 0; + uint8_t best_port_other_node = 0; + boolean_t port_found = FALSE; + osm_physp_t *p_physp; + osm_physp_t *p_rem_physp; + osm_node_t *p_rem_node; + osm_node_t *p_rem_node_first = NULL; + struct osm_remote_node *p_remote_guid = NULL; + struct osm_remote_node null_remote_node = {NULL, 0, 0}; + struct switch_port_path port_paths[IB_NODE_NUM_PORTS_MAX]; + unsigned int port_paths_total_paths = 0; + unsigned int port_paths_count = 0; + uint8_t scatter_possible_ports[IB_NODE_NUM_PORTS_MAX]; + unsigned int scatter_possible_ports_count = 0; + int found_sys_guid = 0; + int found_node_guid = 0; + + CL_ASSERT(lid_ho > 0); + + if (p_port->p_node->sw) { + if (p_port->p_node->sw == p_sw) + return 0; + base_lid = osm_port_get_base_lid(p_port); + } else { + p_physp = p_port->p_physp; + if (!p_physp || !p_physp->p_remote_physp || + !p_physp->p_remote_physp->p_node->sw) + return OSM_NO_PATH; + + if (p_physp->p_remote_physp->p_node->sw == p_sw) + return p_physp->p_remote_physp->port_num; + base_lid = + osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0); + } + base_lid = cl_ntoh16(base_lid); + + num_ports = p_sw->num_ports; + + least_hops = osm_switch_get_least_hops(p_sw, base_lid); + if (least_hops == OSM_NO_PATH) + return OSM_NO_PATH; + + /* + First, inquire with the forwarding table for an existing + route. If one is found, honor it unless: + 1. the ignore existing flag is set. + 2. the physical port is not a valid one or not healthy + 3. the physical port has a remote port (the link is up) + 4. the port has min-hops to the target (avoid loops) + */ + if (!ignore_existing) { + port_num = osm_switch_get_port_by_lid(p_sw, lid_ho, lft_enum); + + if (port_num != OSM_NO_PATH) { + CL_ASSERT(port_num < num_ports); + + p_physp = + osm_node_get_physp_ptr(p_sw->p_node, port_num); + /* + Don't be too trusting of the current forwarding table! + Verify that the port number is legal and that the + LID is reachable through this port. + */ + if (p_physp && osm_physp_is_healthy(p_physp) && + osm_physp_get_remote(p_physp)) { + hops = + osm_switch_get_hop_count(p_sw, base_lid, + port_num); + /* + If we aren't using pre-defined user routes + function, then we need to make sure that the + current path is the minimum one. In case of + having such a user function - this check will + not be done, and the old routing will be used. + Note: This means that it is the user's job to + clean all data in the forwarding tables that + he wants to be overridden by the minimum + hop function. + */ + if (hops == least_hops) + return port_num; + } + } + } + + /* + This algorithm selects a port based on a static load balanced + selection across equal hop-count ports. + There is lots of room for improved sophistication here, + possibly guided by user configuration info. + */ + + /* + OpenSM routing is "local" - not considering a full lid to lid + path. As such we can not guarantee a path will not loop if we + do not always follow least hops. + So we must abort if not least hops. + */ + + /* port number starts with one and num_ports is 1 + num phys ports */ + for (i = start_from; i < start_from + num_ports; i++) { + port_num = osm_switch_get_dimn_port(p_sw, i % num_ports); + if (!port_num || + osm_switch_get_hop_count(p_sw, base_lid, port_num) != + least_hops) + continue; + + /* let us make sure it is not down or unhealthy */ + p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); + if (!p_physp || !osm_physp_is_healthy(p_physp) || + /* + we require all - non sma ports to be linked + to be routed through + */ + !osm_physp_get_remote(p_physp)) + continue; + + /* + We located a least-hop port, possibly one of many. + For this port, check the running total count of + the number of paths through this port. Select + the port routing the least number of paths. + */ + check_count = + osm_port_prof_path_count_get(&p_sw->p_prof[port_num]); + + + if (dor) { + /* Get the Remote Node */ + p_rem_physp = osm_physp_get_remote(p_physp); + p_rem_node = osm_physp_get_node_ptr(p_rem_physp); + /* use the first dimension, but spread traffic + * out among the group of ports representing + * that dimension */ + if (!p_rem_node_first) + p_rem_node_first = p_rem_node; + else if (p_rem_node != p_rem_node_first) + continue; + if (routing_for_lmc) { + struct osm_remote_guids_count *r = p_port->priv; + uint8_t rem_port = osm_physp_get_port_num(p_rem_physp); + unsigned int j; + + for (j = 0; j < r->count; j++) { + p_remote_guid = &r->guids[j]; + if ((p_remote_guid->node == p_rem_node) + && (p_remote_guid->port == rem_port)) + break; + } + if (j == r->count) + p_remote_guid = &null_remote_node; + } + /* + Advanced LMC routing requires tracking of the + best port by the node connected to the other side of + it. + */ + } else if (routing_for_lmc) { + /* Is the sys guid already used ? */ + p_remote_guid = switch_find_sys_guid_count(p_sw, + p_port->priv, + port_num); + + /* If not update the least hops for this case */ + if (!p_remote_guid) { + if (check_count < least_paths_other_sys) { + least_paths_other_sys = check_count; + best_port_other_sys = port_num; + least_forwarded_to = 0; + } + found_sys_guid = 0; + } else { /* same sys found - try node */ + + + /* Else is the node guid already used ? */ + p_remote_guid = switch_find_node_guid_count(p_sw, + p_port->priv, + port_num); + + /* If not update the least hops for this case */ + if (!p_remote_guid + && check_count < least_paths_other_nodes) { + least_paths_other_nodes = check_count; + best_port_other_node = port_num; + least_forwarded_to = 0; + } + /* else prior sys and node guid already used */ + + if (!p_remote_guid) + found_node_guid = 0; + else + found_node_guid = 1; + found_sys_guid = 1; + } /* same sys found */ + } + + port_paths[port_paths_count].port_num = port_num; + port_paths[port_paths_count].path_count = check_count; + if (routing_for_lmc) { + port_paths[port_paths_count].found_sys_guid = found_sys_guid; + port_paths[port_paths_count].found_node_guid = found_node_guid; + } + if (routing_for_lmc && p_remote_guid) + port_paths[port_paths_count].forwarded_to = p_remote_guid->forwarded_to; + else + port_paths[port_paths_count].forwarded_to = 0; + port_paths_total_paths += check_count; + port_paths_count++; + + /* routing for LMC mode */ + /* + the count is min but also lower then the max subscribed + */ + if (check_count < least_paths) { + port_found = TRUE; + best_port = port_num; + least_paths = check_count; + scatter_possible_ports_count = 0; + scatter_possible_ports[scatter_possible_ports_count++] = port_num; + if (routing_for_lmc + && p_remote_guid + && p_remote_guid->forwarded_to < least_forwarded_to) + least_forwarded_to = p_remote_guid->forwarded_to; + } else if (scatter_ports + && check_count == least_paths) { + scatter_possible_ports[scatter_possible_ports_count++] = port_num; + } else if (routing_for_lmc + && p_remote_guid + && check_count == least_paths + && p_remote_guid->forwarded_to < least_forwarded_to) { + least_forwarded_to = p_remote_guid->forwarded_to; + best_port = port_num; + } + } + + if (port_found == FALSE) + return OSM_NO_PATH; + + if (port_shifting && port_paths_count) { + /* In the port_paths[] array, we now have all the ports that we + * can route out of. Using some shifting math below, possibly + * select a different one so that lids won't align in LFTs + * + * If lmc > 0, we need to loop through these ports to find the + * least_forwarded_to port, best_port_other_sys, and + * best_port_other_node just like before but through the different + * ordering. + */ + + least_paths = 0xFFFFFFFF; + least_paths_other_sys = 0xFFFFFFFF; + least_paths_other_nodes = 0xFFFFFFFF; + least_forwarded_to = 0xFFFFFFFF; + best_port = 0; + best_port_other_sys = 0; + best_port_other_node = 0; + + for (i = 0; i < port_paths_count; i++) { + unsigned int idx; + + idx = (port_paths_total_paths/port_paths_count + i) % port_paths_count; + + if (routing_for_lmc) { + if (!port_paths[idx].found_sys_guid + && port_paths[idx].path_count < least_paths_other_sys) { + least_paths_other_sys = port_paths[idx].path_count; + best_port_other_sys = port_paths[idx].port_num; + least_forwarded_to = 0; + } + else if (!port_paths[idx].found_node_guid + && port_paths[idx].path_count < least_paths_other_nodes) { + least_paths_other_nodes = port_paths[idx].path_count; + best_port_other_node = port_paths[idx].port_num; + least_forwarded_to = 0; + } + } + + if (port_paths[idx].path_count < least_paths) { + best_port = port_paths[idx].port_num; + least_paths = port_paths[idx].path_count; + if (routing_for_lmc + && (port_paths[idx].found_sys_guid + || port_paths[idx].found_node_guid) + && port_paths[idx].forwarded_to < least_forwarded_to) + least_forwarded_to = port_paths[idx].forwarded_to; + } + else if (routing_for_lmc + && (port_paths[idx].found_sys_guid + || port_paths[idx].found_node_guid) + && port_paths[idx].path_count == least_paths + && port_paths[idx].forwarded_to < least_forwarded_to) { + least_forwarded_to = port_paths[idx].forwarded_to; + best_port = port_paths[idx].port_num; + } + + } + } + + /* + if we are in enhanced routing mode and the best port is not + the local port 0 + */ + if (routing_for_lmc && best_port && !scatter_ports) { + /* Select the least hop port of the non used sys first */ + if (best_port_other_sys) + best_port = best_port_other_sys; + else if (best_port_other_node) + best_port = best_port_other_node; + } else if (scatter_ports) { + /* + * There is some danger that this random could "rebalance" the routes + * every time, to combat this there is a global srandom that + * occurs at the start of every sweep. + */ + unsigned int idx = random() % scatter_possible_ports_count; + best_port = scatter_possible_ports[idx]; + } + return best_port; +} + +void osm_switch_clear_hops(IN osm_switch_t * p_sw) +{ + unsigned i; + + for (i = 0; i < p_sw->num_hops; i++) + if (p_sw->hops[i]) + memset(p_sw->hops[i], OSM_NO_PATH, p_sw->num_ports); +} + +static int alloc_lft(IN osm_switch_t * p_sw, uint16_t lids) +{ + uint16_t lft_size; + + /* Ensure LFT is in units of LFT block size */ + lft_size = (lids / IB_SMP_DATA_SIZE + 1) * IB_SMP_DATA_SIZE; + if (lft_size > p_sw->lft_size) { + uint8_t *new_lft = realloc(p_sw->lft, lft_size); + if (!new_lft) + return -1; + memset(new_lft + p_sw->lft_size, OSM_NO_PATH, + lft_size - p_sw->lft_size); + p_sw->lft = new_lft; + p_sw->lft_size = lft_size; + } + + return 0; +} + +int osm_switch_prepare_path_rebuild(IN osm_switch_t * p_sw, IN uint16_t max_lids) +{ + uint8_t **hops; + uint8_t *new_lft; + unsigned i; + + if (alloc_lft(p_sw, max_lids)) + return -1; + + for (i = 0; i < p_sw->num_ports; i++) + osm_port_prof_construct(&p_sw->p_prof[i]); + + osm_switch_clear_hops(p_sw); + + if (!(new_lft = realloc(p_sw->new_lft, p_sw->lft_size))) + return -1; + + p_sw->new_lft = new_lft; + + memset(p_sw->new_lft, OSM_NO_PATH, p_sw->lft_size); + + if (!p_sw->hops) { + hops = malloc((max_lids + 1) * sizeof(hops[0])); + if (!hops) + return -1; + memset(hops, 0, (max_lids + 1) * sizeof(hops[0])); + p_sw->hops = hops; + p_sw->num_hops = max_lids + 1; + } else if (max_lids + 1 > p_sw->num_hops) { + hops = realloc(p_sw->hops, (max_lids + 1) * sizeof(hops[0])); + if (!hops) + return -1; + memset(hops + p_sw->num_hops, 0, + (max_lids + 1 - p_sw->num_hops) * sizeof(hops[0])); + p_sw->hops = hops; + p_sw->num_hops = max_lids + 1; + } + p_sw->max_lid_ho = max_lids; + + return 0; +} + +uint8_t osm_switch_get_port_least_hops(IN const osm_switch_t * p_sw, + IN const osm_port_t * p_port) +{ + uint16_t lid; + + if (p_port->p_node->sw) { + if (p_port->p_node->sw == p_sw) + return 0; + lid = osm_node_get_base_lid(p_port->p_node, 0); + return osm_switch_get_least_hops(p_sw, cl_ntoh16(lid)); + } else { + osm_physp_t *p = p_port->p_physp; + uint8_t hops; + + if (!p || !p->p_remote_physp || !p->p_remote_physp->p_node->sw) + return OSM_NO_PATH; + if (p->p_remote_physp->p_node->sw == p_sw) + return 1; + lid = osm_node_get_base_lid(p->p_remote_physp->p_node, 0); + hops = osm_switch_get_least_hops(p_sw, cl_ntoh16(lid)); + return hops != OSM_NO_PATH ? hops + 1 : OSM_NO_PATH; + } +} + +uint8_t osm_switch_recommend_mcast_path(IN osm_switch_t * p_sw, + IN osm_port_t * p_port, + IN uint16_t mlid_ho, + IN boolean_t ignore_existing) +{ + uint16_t base_lid; + uint8_t hops; + uint8_t port_num; + uint8_t num_ports; + uint8_t least_hops; + + CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO); + + if (p_port->p_node->sw) { + if (p_port->p_node->sw == p_sw) + return 0; + base_lid = osm_port_get_base_lid(p_port); + } else { + osm_physp_t *p_physp = p_port->p_physp; + if (!p_physp || !p_physp->p_remote_physp || + !p_physp->p_remote_physp->p_node->sw) + return OSM_NO_PATH; + if (p_physp->p_remote_physp->p_node->sw == p_sw) + return p_physp->p_remote_physp->port_num; + base_lid = + osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0); + } + base_lid = cl_ntoh16(base_lid); + num_ports = p_sw->num_ports; + + /* + If the user wants us to ignore existing multicast routes, + then simply return the shortest hop count path to the + target port. + + Otherwise, return the first port that has a path to the target, + picking from the ports that are already in the multicast group. + */ + if (!ignore_existing) { + for (port_num = 1; port_num < num_ports; port_num++) { + if (!osm_mcast_tbl_is_port + (&p_sw->mcast_tbl, mlid_ho, port_num)) + continue; + /* + Don't be too trusting of the current forwarding table! + Verify that the LID is reachable through this port. + */ + hops = + osm_switch_get_hop_count(p_sw, base_lid, port_num); + if (hops != OSM_NO_PATH) + return port_num; + } + } + + /* + Either no existing mcast paths reach this port or we are + ignoring existing paths. + + Determine the best multicast path to the target. Note that this + algorithm is slightly different from the one used for unicast route + recommendation. In this case (multicast), we must NOT + perform any sort of load balancing. We MUST take the FIRST + port found that has <= the lowest hop count path. This prevents + more than one multicast path to the same remote switch which + prevents a multicast loop. Multicast loops are bad since the same + multicast packet will go around and around, inevitably creating + a black hole that will destroy the Earth in a firey conflagration. + */ + least_hops = osm_switch_get_least_hops(p_sw, base_lid); + if (least_hops == OSM_NO_PATH) + return OSM_NO_PATH; + for (port_num = 1; port_num < num_ports; port_num++) + if (osm_switch_get_hop_count(p_sw, base_lid, port_num) == + least_hops) + break; + + CL_ASSERT(port_num < num_ports); + return port_num; +} diff --git a/opensm/osm_torus.c b/opensm/osm_torus.c new file mode 100644 index 0000000..9165c99 --- /dev/null +++ b/opensm/osm_torus.c @@ -0,0 +1,9564 @@ +/* + * Copyright 2009 Sandia Corporation. Under the terms of Contract + * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + * certain rights in this software. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (c) 2010-2012 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#define FILE_ID OSM_FILE_TORUS_C +#include +#include +#include +#include +#include + +#define TORUS_MAX_DIM 3 +#define PORTGRP_MAX_PORTS 16 +#define SWITCH_MAX_PORTGRPS (1 + 2 * TORUS_MAX_DIM) +#define DEFAULT_MAX_CHANGES 32 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef ib_net64_t guid_t; + +/* + * An endpoint terminates a link, and is one of three types: + * UNKNOWN - Uninitialized endpoint. + * SRCSINK - generates or consumes traffic, and thus has an associated LID; + * i.e. a CA or router port. + * PASSTHRU - Has no associated LID; i.e. a switch port. + * + * If it is possible to communicate in-band with a switch, it will require + * a port with a GUID in the switch to source/sink that traffic, but there + * will be no attached link. This code assumes there is only one such port. + * + * Here is an endpoint taxonomy: + * + * type == SRCSINK + * link == pointer to a valid struct link + * ==> This endpoint is a CA or router port connected via a link to + * either a switch or another CA/router. Thus: + * n_id ==> identifies the CA/router node GUID + * sw ==> NULL + * port ==> identifies the port on the CA/router this endpoint uses + * pgrp ==> NULL + * + * type == SRCSINK + * link == NULL pointer + * ==> This endpoint is the switch port used for in-band communication + * with the switch itself. Thus: + * n_id ==> identifies the node GUID used to talk to the switch + * containing this endpoint + * sw ==> pointer to valid struct switch containing this endpoint + * port ==> identifies the port on the switch this endpoint uses + * pgrp ==> NULL, or pointer to the valid struct port_grp holding + * the port in a t_switch. + * + * type == PASSTHRU + * link == pointer to valid struct link + * ==> This endpoint is a switch port connected via a link to either + * another switch or a CA/router. Thus: + * n_id ==> identifies the node GUID used to talk to the switch + * containing this endpoint - since each switch is assumed + * to have only one in-band communication port, this is a + * convenient unique name for the switch itself. + * sw ==> pointer to valid struct switch containing this endpoint, + * or NULL, in the case of a fabric link that has been + * disconnected after being transferred to a torus link. + * port ==> identifies the port on the switch this endpoint uses. + * Note that in the special case of the coordinate direction + * links, the port value is -1, as those links aren't + * really connected to anything. + * pgrp ==> NULL, or pointer to the valid struct port_grp holding + * the port in a t_switch. + */ +enum endpt_type { UNKNOWN = 0, SRCSINK, PASSTHRU }; +struct torus; +struct t_switch; +struct port_grp; + +struct endpoint { + enum endpt_type type; + int port; + guid_t n_id; /* IBA node GUID */ + void *sw; /* void* can point to either switch type */ + struct link *link; + struct port_grp *pgrp; + void *tmp; + /* + * Note: osm_port is only guaranteed to contain a valid pointer + * when the call stack contains torus_build_lfts() or + * osm_port_relink_endpoint(). + * + * Otherwise, the opensm core could have deleted an osm_port object + * without notifying us, invalidating the pointer we hold. + * + * When presented with a pointer to an osm_port_t, it is generally + * safe and required to cast osm_port_t:priv to struct endpoint, and + * check that the endpoint's osm_port is the same as the original + * osm_port_t pointer. Failure to do so means that invalidated + * pointers will go undetected. + */ + struct osm_port *osm_port; +}; + +struct link { + struct endpoint end[2]; +}; + +/* + * A port group is a collection of endpoints on a switch that share certain + * characteristics. All the endpoints in a port group must have the same + * type. Furthermore, if that type is PASSTHRU, then the connected links: + * 1) are parallel to a given coordinate direction + * 2) share the same two switches as endpoints. + * + * Torus-2QoS uses one master spanning tree for multicast, of which every + * multicast group spanning tree is a subtree. to_stree_root is a pointer + * to the next port_grp on the path to the master spanning tree root. + * to_stree_tip is a pointer to the next port_grp on the path to a master + * spanning tree branch tip. + * + * Each t_switch can have at most one port_grp with a non-NULL to_stree_root. + * Exactly one t_switch in the fabric will have all port_grp objects with + * to_stree_root NULL; it is the master spanning tree root. + * + * A t_switch with all port_grp objects where to_stree_tip is NULL is at a + * master spanning tree branch tip. + */ +struct port_grp { + enum endpt_type type; + size_t port_cnt; /* number of attached ports in group */ + size_t port_grp; /* what switch port_grp we're in */ + unsigned sw_dlid_cnt; /* switch dlids routed through this group */ + unsigned ca_dlid_cnt; /* CA dlids routed through this group */ + struct t_switch *sw; /* what switch we're attached to */ + struct port_grp *to_stree_root; + struct port_grp *to_stree_tip; + struct endpoint **port; +}; + +/* + * A struct t_switch is used to represent a switch as placed in a torus. + * + * A t_switch used to build an N-dimensional torus will have 2N+1 port groups, + * used as follows, assuming 0 <= d < N: + * port_grp[2d] => links leaving in negative direction for coordinate d + * port_grp[2d+1] => links leaving in positive direction for coordinate d + * port_grp[2N] => endpoints local to switch; i.e., hosts on switch + * + * struct link objects referenced by a t_switch are assumed to be oriented: + * traversing a link from link.end[0] to link.end[1] is always in the positive + * coordinate direction. + */ +struct t_switch { + guid_t n_id; /* IBA node GUID */ + int i, j, k; + unsigned port_cnt; /* including management port */ + struct torus *torus; + void *tmp; + /* + * Note: osm_switch is only guaranteed to contain a valid pointer + * when the call stack contains torus_build_lfts(). + * + * Otherwise, the opensm core could have deleted an osm_switch object + * without notifying us, invalidating the pointer we hold. + * + * When presented with a pointer to an osm_switch_t, it is generally + * safe and required to cast osm_switch_t:priv to struct t_switch, and + * check that the switch's osm_switch is the same as the original + * osm_switch_t pointer. Failure to do so means that invalidated + * pointers will go undetected. + */ + struct osm_switch *osm_switch; + + struct port_grp ptgrp[SWITCH_MAX_PORTGRPS]; + struct endpoint **port; +}; + +/* + * We'd like to be able to discover the torus topology in a pile of switch + * links if we can. We'll use a struct f_switch to store raw topology for a + * fabric description, then contruct the torus topology from struct t_switch + * objects as we process the fabric and recover it. + */ +struct f_switch { + guid_t n_id; /* IBA node GUID */ + unsigned port_cnt; /* including management port */ + void *tmp; + /* + * Same rules apply here as for a struct t_switch member osm_switch. + */ + struct osm_switch *osm_switch; + struct endpoint **port; +}; + +struct fabric { + osm_opensm_t *osm; + unsigned ca_cnt; + unsigned link_cnt; + unsigned switch_cnt; + + unsigned link_cnt_max; + unsigned switch_cnt_max; + + struct link **link; + struct f_switch **sw; +}; + +struct coord_dirs { + /* + * These links define the coordinate directions for the torus. + * They are duplicates of links connected to switches. Each of + * these links must connect to a common switch. + * + * In the event that a failed switch was specified as one of these + * link endpoints, our algorithm would not be able to find the + * torus in the fabric. So, we'll allow multiple instances of + * this in the config file to allow improved resiliency. + */ + struct link xm_link, ym_link, zm_link; + struct link xp_link, yp_link, zp_link; + /* + * A torus dimension has coordinate values 0, 1, ..., radix - 1. + * The dateline, where we need to change VLs to avoid credit loops, + * for a torus dimension is always between coordinate values + * radix - 1 and 0. The following specify the dateline location + * relative to the coordinate links shared switch location. + * + * E.g. if the shared switch is at 0,0,0, the following are all + * zero; if the shared switch is at 1,1,1, the following are all + * -1, etc. + * + * Since our SL/VL assignment for a path depends on the position + * of the path endpoints relative to the torus datelines, we need + * this information to keep SL/VL assignment constant in the event + * one of the switches used to specify coordinate directions fails. + */ + int x_dateline, y_dateline, z_dateline; +}; + +struct torus { + osm_opensm_t *osm; + unsigned ca_cnt; + unsigned link_cnt; + unsigned switch_cnt; + unsigned seed_cnt, seed_idx; + unsigned x_sz, y_sz, z_sz; + + unsigned port_order[IB_NODE_NUM_PORTS_MAX+1]; + + unsigned sw_pool_sz; + unsigned link_pool_sz; + unsigned seed_sz; + unsigned portgrp_sz; /* max ports for port groups in this torus */ + + struct fabric *fabric; + struct t_switch **sw_pool; + struct link *link_pool; + + struct coord_dirs *seed; + struct t_switch ****sw; + struct t_switch *master_stree_root; + + unsigned flags; + unsigned max_changes; + int debug; +}; + +/* + * Bits to use in torus.flags + */ +#define X_MESH (1U << 0) +#define Y_MESH (1U << 1) +#define Z_MESH (1U << 2) +#define MSG_DEADLOCK (1U << 29) +#define NOTIFY_CHANGES (1U << 30) + +#define ALL_MESH(flags) \ + ((flags & (X_MESH | Y_MESH | Z_MESH)) == (X_MESH | Y_MESH | Z_MESH)) + + +struct torus_context { + osm_opensm_t *osm; + struct torus *torus; + struct fabric fabric; +}; + +static +void teardown_fabric(struct fabric *f) +{ + unsigned l, p, s; + struct endpoint *port; + struct f_switch *sw; + + if (!f) + return; + + if (f->sw) { + /* + * Need to free switches, and also find/free the endpoints + * we allocated for switch management ports. + */ + for (s = 0; s < f->switch_cnt; s++) { + sw = f->sw[s]; + if (!sw) + continue; + + for (p = 0; p < sw->port_cnt; p++) { + port = sw->port[p]; + if (port && !port->link) + free(port); /* management port */ + } + free(sw); + } + free(f->sw); + } + if (f->link) { + for (l = 0; l < f->link_cnt; l++) + if (f->link[l]) + free(f->link[l]); + + free(f->link); + } + memset(f, 0, sizeof(*f)); +} + +void teardown_torus(struct torus *t) +{ + unsigned p, s; + struct endpoint *port; + struct t_switch *sw; + + if (!t) + return; + + if (t->sw_pool) { + /* + * Need to free switches, and also find/free the endpoints + * we allocated for switch management ports. + */ + for (s = 0; s < t->switch_cnt; s++) { + sw = t->sw_pool[s]; + if (!sw) + continue; + + for (p = 0; p < sw->port_cnt; p++) { + port = sw->port[p]; + if (port && !port->link) + free(port); /* management port */ + } + free(sw); + } + free(t->sw_pool); + } + if (t->link_pool) + free(t->link_pool); + + if (t->sw) + free(t->sw); + + if (t->seed) + free(t->seed); + + free(t); +} + +static +struct torus_context *torus_context_create(osm_opensm_t *osm) +{ + struct torus_context *ctx; + + ctx = calloc(1, sizeof(*ctx)); + if (ctx) + ctx->osm = osm; + else + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "ERR 4E01: calloc: %s\n", strerror(errno)); + + return ctx; +} + +static +void torus_context_delete(void *context) +{ + struct torus_context *ctx = context; + + teardown_fabric(&ctx->fabric); + if (ctx->torus) + teardown_torus(ctx->torus); + free(ctx); +} + +static +bool grow_seed_array(struct torus *t, int new_seeds) +{ + unsigned cnt; + void *ptr; + + cnt = t->seed_cnt + new_seeds; + if (cnt > t->seed_sz) { + cnt += 2 + cnt / 2; + ptr = realloc(t->seed, cnt * sizeof(*t->seed)); + if (!ptr) + return false; + t->seed = ptr; + t->seed_sz = cnt; + memset(&t->seed[t->seed_cnt], 0, + (cnt - t->seed_cnt) * sizeof(*t->seed)); + } + return true; +} + +static +struct f_switch *find_f_sw(struct fabric *f, guid_t sw_guid) +{ + unsigned s; + struct f_switch *sw; + + if (f->sw) { + for (s = 0; s < f->switch_cnt; s++) { + sw = f->sw[s]; + if (sw->n_id == sw_guid) + return sw; + } + } + return NULL; +} + +static +struct link *find_f_link(struct fabric *f, + guid_t guid0, int port0, guid_t guid1, int port1) +{ + unsigned l; + struct link *link; + + if (f->link) { + for (l = 0; l < f->link_cnt; l++) { + link = f->link[l]; + if ((link->end[0].n_id == guid0 && + link->end[0].port == port0 && + link->end[1].n_id == guid1 && + link->end[1].port == port1) || + (link->end[0].n_id == guid1 && + link->end[0].port == port1 && + link->end[1].n_id == guid0 && + link->end[1].port == port0)) + return link; + } + } + return NULL; +} + +static +struct f_switch *alloc_fswitch(struct fabric *f, + guid_t sw_id, unsigned port_cnt) +{ + size_t new_sw_sz; + unsigned cnt_max; + struct f_switch *sw = NULL; + void *ptr; + + if (f->switch_cnt >= f->switch_cnt_max) { + + cnt_max = 16 + 5 * f->switch_cnt_max / 4; + ptr = realloc(f->sw, cnt_max * sizeof(*f->sw)); + if (!ptr) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E02: realloc: %s\n", strerror(errno)); + goto out; + } + f->sw = ptr; + f->switch_cnt_max = cnt_max; + memset(&f->sw[f->switch_cnt], 0, + (f->switch_cnt_max - f->switch_cnt)*sizeof(*f->sw)); + } + new_sw_sz = sizeof(*sw) + port_cnt * sizeof(*sw->port); + sw = calloc(1, new_sw_sz); + if (!sw) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E03: calloc: %s\n", strerror(errno)); + goto out; + } + sw->port = (void *)(sw + 1); + sw->n_id = sw_id; + sw->port_cnt = port_cnt; + f->sw[f->switch_cnt++] = sw; +out: + return sw; +} + +static +struct link *alloc_flink(struct fabric *f) +{ + unsigned cnt_max; + struct link *l = NULL; + void *ptr; + + if (f->link_cnt >= f->link_cnt_max) { + + cnt_max = 16 + 5 * f->link_cnt_max / 4; + ptr = realloc(f->link, cnt_max * sizeof(*f->link)); + if (!ptr) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E04: realloc: %s\n", strerror(errno)); + goto out; + } + f->link = ptr; + f->link_cnt_max = cnt_max; + memset(&f->link[f->link_cnt], 0, + (f->link_cnt_max - f->link_cnt) * sizeof(*f->link)); + } + l = calloc(1, sizeof(*l)); + if (!l) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E05: calloc: %s\n", strerror(errno)); + goto out; + } + f->link[f->link_cnt++] = l; +out: + return l; +} + +/* + * Caller must ensure osm_port points to a valid port which contains + * a valid osm_physp_t pointer for port 0, the switch management port. + */ +static +bool build_sw_endpoint(struct fabric *f, osm_port_t *osm_port) +{ + int sw_port; + guid_t sw_guid; + struct osm_switch *osm_sw; + struct f_switch *sw; + struct endpoint *ep; + bool success = false; + + sw_port = osm_physp_get_port_num(osm_port->p_physp); + sw_guid = osm_node_get_node_guid(osm_port->p_node); + osm_sw = osm_port->p_node->sw; + + /* + * The switch must already exist. + */ + sw = find_f_sw(f, sw_guid); + if (!sw) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E06: missing switch w/GUID 0x%04"PRIx64"\n", + cl_ntoh64(sw_guid)); + goto out; + } + /* + * The endpoint may already exist. + */ + if (sw->port[sw_port]) { + if (sw->port[sw_port]->n_id == sw_guid) { + ep = sw->port[sw_port]; + goto success; + } else + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E07: switch port %d has id " + "0x%04"PRIx64", expected 0x%04"PRIx64"\n", + sw_port, cl_ntoh64(sw->port[sw_port]->n_id), + cl_ntoh64(sw_guid)); + goto out; + } + ep = calloc(1, sizeof(*ep)); + if (!ep) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E08: allocating endpoint: %s\n", strerror(errno)); + goto out; + } + ep->type = SRCSINK; + ep->port = sw_port; + ep->n_id = sw_guid; + ep->link = NULL; + ep->sw = sw; + + sw->port[sw_port] = ep; + +success: + /* + * Fabric objects are temporary, so don't set osm_sw/osm_port priv + * pointers using them. Wait until torus objects get constructed. + */ + sw->osm_switch = osm_sw; + ep->osm_port = osm_port; + + success = true; +out: + return success; +} + +static +bool build_ca_link(struct fabric *f, + osm_port_t *osm_port_ca, guid_t sw_guid, int sw_port) +{ + int ca_port; + guid_t ca_guid; + struct link *l; + struct f_switch *sw; + bool success = false; + + ca_port = osm_physp_get_port_num(osm_port_ca->p_physp); + ca_guid = osm_node_get_node_guid(osm_port_ca->p_node); + + /* + * The link may already exist. + */ + l = find_f_link(f, sw_guid, sw_port, ca_guid, ca_port); + if (l) { + success = true; + goto out; + } + /* + * The switch must already exist. + */ + sw = find_f_sw(f, sw_guid); + if (!sw) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E09: missing switch w/GUID 0x%04"PRIx64"\n", + cl_ntoh64(sw_guid)); + goto out; + } + l = alloc_flink(f); + if (!l) + goto out; + + l->end[0].type = PASSTHRU; + l->end[0].port = sw_port; + l->end[0].n_id = sw_guid; + l->end[0].sw = sw; + l->end[0].link = l; + + sw->port[sw_port] = &l->end[0]; + + l->end[1].type = SRCSINK; + l->end[1].port = ca_port; + l->end[1].n_id = ca_guid; + l->end[1].sw = NULL; /* Correct for a CA */ + l->end[1].link = l; + + /* + * Fabric objects are temporary, so don't set osm_sw/osm_port priv + * pointers using them. Wait until torus objects get constructed. + */ + l->end[1].osm_port = osm_port_ca; + + ++f->ca_cnt; + success = true; +out: + return success; +} + +static +bool build_link(struct fabric *f, + guid_t sw_guid0, int sw_port0, guid_t sw_guid1, int sw_port1) +{ + struct link *l; + struct f_switch *sw0, *sw1; + bool success = false; + + /* + * The link may already exist. + */ + l = find_f_link(f, sw_guid0, sw_port0, sw_guid1, sw_port1); + if (l) { + success = true; + goto out; + } + /* + * The switches must already exist. + */ + sw0 = find_f_sw(f, sw_guid0); + if (!sw0) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E0A: missing switch w/GUID 0x%04"PRIx64"\n", + cl_ntoh64(sw_guid0)); + goto out; + } + sw1 = find_f_sw(f, sw_guid1); + if (!sw1) { + OSM_LOG(&f->osm->log, OSM_LOG_ERROR, + "ERR 4E0B: missing switch w/GUID 0x%04"PRIx64"\n", + cl_ntoh64(sw_guid1)); + goto out; + } + l = alloc_flink(f); + if (!l) + goto out; + + l->end[0].type = PASSTHRU; + l->end[0].port = sw_port0; + l->end[0].n_id = sw_guid0; + l->end[0].sw = sw0; + l->end[0].link = l; + + sw0->port[sw_port0] = &l->end[0]; + + l->end[1].type = PASSTHRU; + l->end[1].port = sw_port1; + l->end[1].n_id = sw_guid1; + l->end[1].sw = sw1; + l->end[1].link = l; + + sw1->port[sw_port1] = &l->end[1]; + + success = true; +out: + return success; +} + +static +bool parse_size(unsigned *tsz, unsigned *tflags, unsigned mask, + const char *parse_sep) +{ + char *val, *nextchar; + + val = strtok(NULL, parse_sep); + if (!val) + return false; + *tsz = strtoul(val, &nextchar, 0); + if (*tsz) { + if (*nextchar == 't' || *nextchar == 'T') + *tflags &= ~mask; + else if (*nextchar == 'm' || *nextchar == 'M') + *tflags |= mask; + /* + * A torus of radix two is also a mesh of radix two + * with multiple links between switches in that direction. + * + * Make it so always, otherwise the failure case routing + * logic gets confused. + */ + if (*tsz == 2) + *tflags |= mask; + } + return true; +} + +static +bool parse_torus(struct torus *t, const char *parse_sep) +{ + unsigned i, j, k, cnt; + char *ptr; + bool success = false; + + /* + * There can be only one. Ignore the imposters. + */ + if (t->sw_pool) + goto out; + + if (!parse_size(&t->x_sz, &t->flags, X_MESH, parse_sep)) + goto out; + + if (!parse_size(&t->y_sz, &t->flags, Y_MESH, parse_sep)) + goto out; + + if (!parse_size(&t->z_sz, &t->flags, Z_MESH, parse_sep)) + goto out; + + /* + * Set up a linear array of switch pointers big enough to hold + * all expected switches. + */ + t->sw_pool_sz = t->x_sz * t->y_sz * t->z_sz; + t->sw_pool = calloc(t->sw_pool_sz, sizeof(*t->sw_pool)); + if (!t->sw_pool) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E0C: Torus switch array calloc: %s\n", + strerror(errno)); + goto out; + } + /* + * Set things up so that t->sw[i][j][k] can point to the i,j,k switch. + */ + cnt = t->x_sz * (1 + t->y_sz * (1 + t->z_sz)); + t->sw = malloc(cnt * sizeof(void *)); + if (!t->sw) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E0D: Torus switch array malloc: %s\n", + strerror(errno)); + goto out; + } + ptr = (void *)(t->sw); + + ptr += t->x_sz * sizeof(void *); + for (i = 0; i < t->x_sz; i++) { + t->sw[i] = (void *)ptr; + ptr += t->y_sz * sizeof(void *); + } + for (i = 0; i < t->x_sz; i++) + for (j = 0; j < t->y_sz; j++) { + t->sw[i][j] = (void *)ptr; + ptr += t->z_sz * sizeof(void *); + } + + for (i = 0; i < t->x_sz; i++) + for (j = 0; j < t->y_sz; j++) + for (k = 0; k < t->z_sz; k++) + t->sw[i][j][k] = NULL; + + success = true; +out: + return success; +} + +static +bool parse_unsigned(unsigned *result, const char *parse_sep) +{ + char *val, *nextchar; + + val = strtok(NULL, parse_sep); + if (!val) + return false; + *result = strtoul(val, &nextchar, 0); + return true; +} + +static +bool parse_port_order(struct torus *t, const char *parse_sep) +{ + unsigned i, j, k, n; + + for (i = 0; i < ARRAY_SIZE(t->port_order); i++) { + if (!parse_unsigned(&(t->port_order[i]), parse_sep)) + break; + + for (j = 0; j < i; j++) { + if (t->port_order[j] == t->port_order[i]) { + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Ignored duplicate port %u in" + " port_order parsing\n", + t->port_order[j]); + i--; /* Ignore duplicate port number */ + break; + } + } + } + + n = i; + for (j = 0; j < ARRAY_SIZE(t->port_order); j++) { + for (k = 0; k < i; k++) + if (t->port_order[k] == j) + break; + if (k >= i) + t->port_order[n++] = j; + } + + return true; +} + +static +bool parse_guid(struct torus *t, guid_t *guid, const char *parse_sep) +{ + char *val; + bool success = false; + + val = strtok(NULL, parse_sep); + if (!val) + goto out; + *guid = strtoull(val, NULL, 0); + *guid = cl_hton64(*guid); + + success = true; +out: + return success; +} + +static +bool parse_dir_link(int c_dir, struct torus *t, const char *parse_sep) +{ + guid_t sw_guid0, sw_guid1; + struct link *l; + bool success = false; + + if (!parse_guid(t, &sw_guid0, parse_sep)) + goto out; + + if (!parse_guid(t, &sw_guid1, parse_sep)) + goto out; + + if (!t) { + success = true; + goto out; + } + + switch (c_dir) { + case -1: + l = &t->seed[t->seed_cnt - 1].xm_link; + break; + case 1: + l = &t->seed[t->seed_cnt - 1].xp_link; + break; + case -2: + l = &t->seed[t->seed_cnt - 1].ym_link; + break; + case 2: + l = &t->seed[t->seed_cnt - 1].yp_link; + break; + case -3: + l = &t->seed[t->seed_cnt - 1].zm_link; + break; + case 3: + l = &t->seed[t->seed_cnt - 1].zp_link; + break; + default: + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E0E: unknown link direction %d\n", c_dir); + goto out; + } + l->end[0].type = PASSTHRU; + l->end[0].port = -1; /* We don't really connect. */ + l->end[0].n_id = sw_guid0; + l->end[0].sw = NULL; /* Fix this up later. */ + l->end[0].link = NULL; /* Fix this up later. */ + + l->end[1].type = PASSTHRU; + l->end[1].port = -1; /* We don't really connect. */ + l->end[1].n_id = sw_guid1; + l->end[1].sw = NULL; /* Fix this up later. */ + l->end[1].link = NULL; /* Fix this up later. */ + + success = true; +out: + return success; +} + +static +bool parse_dir_dateline(int c_dir, struct torus *t, const char *parse_sep) +{ + char *val; + int *dl, max_dl; + bool success = false; + + val = strtok(NULL, parse_sep); + if (!val) + goto out; + + if (!t) { + success = true; + goto out; + } + + switch (c_dir) { + case 1: + dl = &t->seed[t->seed_cnt - 1].x_dateline; + max_dl = t->x_sz; + break; + case 2: + dl = &t->seed[t->seed_cnt - 1].y_dateline; + max_dl = t->y_sz; + break; + case 3: + dl = &t->seed[t->seed_cnt - 1].z_dateline; + max_dl = t->z_sz; + break; + default: + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E0F: unknown dateline direction %d\n", c_dir); + goto out; + } + *dl = strtol(val, NULL, 0); + + if ((*dl < 0 && *dl <= -max_dl) || *dl >= max_dl) + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E10: dateline value for coordinate direction %d " + "must be %d < dl < %d\n", + c_dir, -max_dl, max_dl); + else + success = true; +out: + return success; +} + +static +bool parse_config(const char *fn, struct fabric *f, struct torus *t) +{ + FILE *fp; + unsigned i; + char *keyword; + char *line_buf = NULL; + const char *parse_sep = " \n\t\015"; + size_t line_buf_sz = 0; + size_t line_cntr = 0; + ssize_t llen; + bool kw_success, success = true; + + if (!grow_seed_array(t, 2)) + return false; + + for (i = 0; i < ARRAY_SIZE(t->port_order); i++) + t->port_order[i] = i; + + fp = fopen(fn, "r"); + if (!fp) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E11: Opening %s: %s\n", fn, strerror(errno)); + return false; + } + t->flags |= NOTIFY_CHANGES; + t->portgrp_sz = PORTGRP_MAX_PORTS; + t->max_changes = DEFAULT_MAX_CHANGES; + +next_line: + llen = getline(&line_buf, &line_buf_sz, fp); + if (llen < 0) + goto out; + + ++line_cntr; + + keyword = strtok(line_buf, parse_sep); + if (!keyword) + goto next_line; + + if (strcmp("torus", keyword) == 0) { + kw_success = parse_torus(t, parse_sep); + } else if (strcmp("mesh", keyword) == 0) { + t->flags |= X_MESH | Y_MESH | Z_MESH; + kw_success = parse_torus(t, parse_sep); + } else if (strcmp("port_order", keyword) == 0) { + kw_success = parse_port_order(t, parse_sep); + } else if (strcmp("next_seed", keyword) == 0) { + kw_success = grow_seed_array(t, 1); + t->seed_cnt++; + } else if (strcmp("portgroup_max_ports", keyword) == 0) { + kw_success = parse_unsigned(&t->portgrp_sz, parse_sep); + } else if (strcmp("xp_link", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_link(1, t, parse_sep); + } else if (strcmp("xm_link", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_link(-1, t, parse_sep); + } else if (strcmp("x_dateline", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_dateline(1, t, parse_sep); + } else if (strcmp("yp_link", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_link(2, t, parse_sep); + } else if (strcmp("ym_link", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_link(-2, t, parse_sep); + } else if (strcmp("y_dateline", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_dateline(2, t, parse_sep); + } else if (strcmp("zp_link", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_link(3, t, parse_sep); + } else if (strcmp("zm_link", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_link(-3, t, parse_sep); + } else if (strcmp("z_dateline", keyword) == 0) { + if (!t->seed_cnt) + t->seed_cnt++; + kw_success = parse_dir_dateline(3, t, parse_sep); + } else if (strcmp("max_changes", keyword) == 0) { + kw_success = parse_unsigned(&t->max_changes, parse_sep); + } else if (keyword[0] == '#') + goto next_line; + else { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E12: no keyword found: line %u\n", + (unsigned)line_cntr); + kw_success = false; + } + if (!kw_success) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E13: parsing '%s': line %u\n", + keyword, (unsigned)line_cntr); + } + success = success && kw_success; + goto next_line; + +out: + if (line_buf) + free(line_buf); + fclose(fp); + return success; +} + +static +bool capture_fabric(struct fabric *fabric) +{ + osm_subn_t *subnet = &fabric->osm->subn; + osm_switch_t *osm_sw; + osm_physp_t *lphysp, *rphysp; + osm_port_t *lport; + osm_node_t *osm_node; + cl_map_item_t *item; + uint8_t ltype, rtype; + int p, port_cnt; + guid_t sw_guid; + bool success = true; + + OSM_LOG_ENTER(&fabric->osm->log); + + /* + * On OpenSM data structures: + * + * Apparently, every port in a fabric has an associated osm_physp_t, + * but not every port has an associated osm_port_t. Apparently every + * osm_port_t has an associated osm_physp_t. + * + * So, in order to find the inter-switch links we need to walk the + * switch list and examine each port, via its osm_physp_t object. + * + * But, we need to associate our CA and switch management port + * endpoints with the corresponding osm_port_t objects, in order + * to simplify computation of LFT entries and perform SL lookup for + * path records. Since it is apparently difficult to locate the + * osm_port_t that corresponds to a given osm_physp_t, we also + * need to walk the list of ports indexed by GUID to get access + * to the appropriate osm_port_t objects. + * + * Need to allocate our switches before we do anything else. + */ + item = cl_qmap_head(&subnet->sw_guid_tbl); + while (item != cl_qmap_end(&subnet->sw_guid_tbl)) { + + osm_sw = (osm_switch_t *)item; + item = cl_qmap_next(item); + osm_sw->priv = NULL; /* avoid stale pointer dereferencing */ + osm_node = osm_sw->p_node; + + if (osm_node_get_type(osm_node) != IB_NODE_TYPE_SWITCH) + continue; + + port_cnt = osm_node_get_num_physp(osm_node); + sw_guid = osm_node_get_node_guid(osm_node); + + success = alloc_fswitch(fabric, sw_guid, port_cnt); + if (!success) + goto out; + } + /* + * Now build all our endpoints. + */ + item = cl_qmap_head(&subnet->port_guid_tbl); + while (item != cl_qmap_end(&subnet->port_guid_tbl)) { + + lport = (osm_port_t *)item; + item = cl_qmap_next(item); + lport->priv = NULL; /* avoid stale pointer dereferencing */ + + lphysp = lport->p_physp; + if (!(lphysp && osm_physp_is_valid(lphysp))) + continue; + + ltype = osm_node_get_type(lphysp->p_node); + /* + * Switch management port is always port 0. + */ + if (lphysp->port_num == 0 && ltype == IB_NODE_TYPE_SWITCH) { + success = build_sw_endpoint(fabric, lport); + if (!success) + goto out; + continue; + } + rphysp = lphysp->p_remote_physp; + if (!(rphysp && osm_physp_is_valid(rphysp))) + continue; + + rtype = osm_node_get_type(rphysp->p_node); + + if ((ltype != IB_NODE_TYPE_CA && + ltype != IB_NODE_TYPE_ROUTER) || + rtype != IB_NODE_TYPE_SWITCH) + continue; + + success = + build_ca_link(fabric, lport, + osm_node_get_node_guid(rphysp->p_node), + osm_physp_get_port_num(rphysp)); + if (!success) + goto out; + } + /* + * Lastly, build all our interswitch links. + */ + item = cl_qmap_head(&subnet->sw_guid_tbl); + while (item != cl_qmap_end(&subnet->sw_guid_tbl)) { + + osm_sw = (osm_switch_t *)item; + item = cl_qmap_next(item); + + port_cnt = osm_node_get_num_physp(osm_sw->p_node); + for (p = 0; p < port_cnt; p++) { + + lphysp = osm_node_get_physp_ptr(osm_sw->p_node, p); + if (!(lphysp && osm_physp_is_valid(lphysp))) + continue; + + rphysp = lphysp->p_remote_physp; + if (!(rphysp && osm_physp_is_valid(rphysp))) + continue; + + if (lphysp == rphysp) + continue; /* ignore loopbacks */ + + ltype = osm_node_get_type(lphysp->p_node); + rtype = osm_node_get_type(rphysp->p_node); + + if (ltype != IB_NODE_TYPE_SWITCH || + rtype != IB_NODE_TYPE_SWITCH) + continue; + + success = + build_link(fabric, + osm_node_get_node_guid(lphysp->p_node), + osm_physp_get_port_num(lphysp), + osm_node_get_node_guid(rphysp->p_node), + osm_physp_get_port_num(rphysp)); + if (!success) + goto out; + } + } +out: + OSM_LOG_EXIT(&fabric->osm->log); + return success; +} + +/* + * diagnose_fabric() is just intended to report on fabric elements that + * could not be placed into the torus. We want to warn that there were + * non-torus fabric elements, but they will be ignored for routing purposes. + * Having them is not an error, and diagnose_fabric() thus has no return + * value. + */ +static +void diagnose_fabric(struct fabric *f) +{ + struct link *l; + struct endpoint *ep; + unsigned k, p; + + /* + * Report on any links that didn't get transferred to the torus. + */ + for (k = 0; k < f->link_cnt; k++) { + l = f->link[k]; + + if (!(l->end[0].sw && l->end[1].sw)) + continue; + + OSM_LOG(&f->osm->log, OSM_LOG_INFO, + "Found non-torus fabric link:" + " sw GUID 0x%04"PRIx64" port %d <->" + " sw GUID 0x%04"PRIx64" port %d\n", + cl_ntoh64(l->end[0].n_id), l->end[0].port, + cl_ntoh64(l->end[1].n_id), l->end[1].port); + } + /* + * Report on any switches with ports using endpoints that didn't + * get transferred to the torus. + */ + for (k = 0; k < f->switch_cnt; k++) + for (p = 0; p < f->sw[k]->port_cnt; p++) { + + if (!f->sw[k]->port[p]) + continue; + + ep = f->sw[k]->port[p]; + + /* + * We already reported on inter-switch links above. + */ + if (ep->type == PASSTHRU) + continue; + + OSM_LOG(&f->osm->log, OSM_LOG_INFO, + "Found non-torus fabric port:" + " sw GUID 0x%04"PRIx64" port %d\n", + cl_ntoh64(f->sw[k]->n_id), p); + } +} + +static +struct t_switch *alloc_tswitch(struct torus *t, struct f_switch *fsw) +{ + unsigned g; + size_t new_sw_sz; + struct t_switch *sw = NULL; + void *ptr; + + if (!fsw) + goto out; + + if (t->switch_cnt >= t->sw_pool_sz) { + /* + * This should never happen, but occasionally a particularly + * pathological fabric can induce it. So log an error. + */ + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E14: unexpectedly requested too many switch " + "structures!\n"); + goto out; + } + new_sw_sz = sizeof(*sw) + + fsw->port_cnt * sizeof(*sw->port) + + SWITCH_MAX_PORTGRPS * t->portgrp_sz * sizeof(*sw->ptgrp[0].port); + sw = calloc(1, new_sw_sz); + if (!sw) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E15: calloc: %s\n", strerror(errno)); + goto out; + } + sw->port = (void *)(sw + 1); + sw->n_id = fsw->n_id; + sw->port_cnt = fsw->port_cnt; + sw->torus = t; + sw->tmp = fsw; + + ptr = &sw->port[sw->port_cnt]; + + for (g = 0; g < SWITCH_MAX_PORTGRPS; g++) { + sw->ptgrp[g].port_grp = g; + sw->ptgrp[g].sw = sw; + sw->ptgrp[g].port = ptr; + ptr = &sw->ptgrp[g].port[t->portgrp_sz]; + } + t->sw_pool[t->switch_cnt++] = sw; +out: + return sw; +} + +/* + * install_tswitch() expects the switch coordinates i,j,k to be canonicalized + * by caller. + */ +static +bool install_tswitch(struct torus *t, + int i, int j, int k, struct f_switch *fsw) +{ + struct t_switch **sw = &t->sw[i][j][k]; + + if (!*sw) + *sw = alloc_tswitch(t, fsw); + + if (*sw) { + (*sw)->i = i; + (*sw)->j = j; + (*sw)->k = k; + } + return !!*sw; +} + +static +struct link *alloc_tlink(struct torus *t) +{ + if (t->link_cnt >= t->link_pool_sz) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E16: unexpectedly out of pre-allocated link " + "structures!\n"); + return NULL; + } + return &t->link_pool[t->link_cnt++]; +} + +static +int canonicalize(int v, int vmax) +{ + if (v >= 0 && v < vmax) + return v; + + if (v < 0) + v += vmax * (1 - v/vmax); + + return v % vmax; +} + +static +unsigned set_fp_bit(bool present, int i, int j, int k) +{ + return (unsigned)(!present) << (i + 2 * j + 4 * k); +} + +/* + * Returns an 11-bit fingerprint of what switches are absent in a cube of + * neighboring switches. Each bit 0-7 corresponds to a corner of the cube; + * if a bit is set the corresponding switch is absent. + * + * Bits 8-10 distinguish between 2D and 3D cases. If bit 8+d is set, + * for 0 <= d < 3; the d dimension of the desired torus has radix greater + * than 1. Thus, if all bits 8-10 are set, the desired torus is 3D. + */ +static +unsigned fingerprint(struct torus *t, int i, int j, int k) +{ + unsigned fp; + int ip1, jp1, kp1; + int x_sz_gt1, y_sz_gt1, z_sz_gt1; + + x_sz_gt1 = t->x_sz > 1; + y_sz_gt1 = t->y_sz > 1; + z_sz_gt1 = t->z_sz > 1; + + ip1 = canonicalize(i + 1, t->x_sz); + jp1 = canonicalize(j + 1, t->y_sz); + kp1 = canonicalize(k + 1, t->z_sz); + + fp = set_fp_bit(t->sw[i][j][k], 0, 0, 0); + fp |= set_fp_bit(t->sw[ip1][j][k], x_sz_gt1, 0, 0); + fp |= set_fp_bit(t->sw[i][jp1][k], 0, y_sz_gt1, 0); + fp |= set_fp_bit(t->sw[ip1][jp1][k], x_sz_gt1, y_sz_gt1, 0); + fp |= set_fp_bit(t->sw[i][j][kp1], 0, 0, z_sz_gt1); + fp |= set_fp_bit(t->sw[ip1][j][kp1], x_sz_gt1, 0, z_sz_gt1); + fp |= set_fp_bit(t->sw[i][jp1][kp1], 0, y_sz_gt1, z_sz_gt1); + fp |= set_fp_bit(t->sw[ip1][jp1][kp1], x_sz_gt1, y_sz_gt1, z_sz_gt1); + + fp |= x_sz_gt1 << 8; + fp |= y_sz_gt1 << 9; + fp |= z_sz_gt1 << 10; + + return fp; +} + +static +bool connect_tlink(struct port_grp *pg0, struct endpoint *f_ep0, + struct port_grp *pg1, struct endpoint *f_ep1, + struct torus *t) +{ + struct link *l; + bool success = false; + + if (pg0->port_cnt == t->portgrp_sz) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E17: exceeded port group max " + "port count (%d): switch GUID 0x%04"PRIx64"\n", + t->portgrp_sz, cl_ntoh64(pg0->sw->n_id)); + goto out; + } + if (pg1->port_cnt == t->portgrp_sz) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E18: exceeded port group max " + "port count (%d): switch GUID 0x%04"PRIx64"\n", + t->portgrp_sz, cl_ntoh64(pg1->sw->n_id)); + goto out; + } + l = alloc_tlink(t); + if (!l) + goto out; + + l->end[0].type = f_ep0->type; + l->end[0].port = f_ep0->port; + l->end[0].n_id = f_ep0->n_id; + l->end[0].sw = pg0->sw; + l->end[0].link = l; + l->end[0].pgrp = pg0; + pg0->port[pg0->port_cnt++] = &l->end[0]; + pg0->sw->port[f_ep0->port] = &l->end[0]; + + if (f_ep0->osm_port) { + l->end[0].osm_port = f_ep0->osm_port; + l->end[0].osm_port->priv = &l->end[0]; + f_ep0->osm_port = NULL; + } + + l->end[1].type = f_ep1->type; + l->end[1].port = f_ep1->port; + l->end[1].n_id = f_ep1->n_id; + l->end[1].sw = pg1->sw; + l->end[1].link = l; + l->end[1].pgrp = pg1; + pg1->port[pg1->port_cnt++] = &l->end[1]; + pg1->sw->port[f_ep1->port] = &l->end[1]; + + if (f_ep1->osm_port) { + l->end[1].osm_port = f_ep1->osm_port; + l->end[1].osm_port->priv = &l->end[1]; + f_ep1->osm_port = NULL; + } + /* + * Disconnect fabric link, so that later we can see if any were + * left unconnected in the torus. + */ + ((struct f_switch *)f_ep0->sw)->port[f_ep0->port] = NULL; + f_ep0->sw = NULL; + f_ep0->port = -1; + + ((struct f_switch *)f_ep1->sw)->port[f_ep1->port] = NULL; + f_ep1->sw = NULL; + f_ep1->port = -1; + + success = true; +out: + return success; +} + +static +bool link_tswitches(struct torus *t, int cdir, + struct t_switch *t_sw0, struct t_switch *t_sw1) +{ + int p; + struct port_grp *pg0, *pg1; + struct f_switch *f_sw0, *f_sw1; + const char *cdir_name = "unknown"; + unsigned port_cnt; + int success = false; + + /* + * If this is a 2D torus, it is possible for this function to be + * called with its two switch arguments being the same switch, in + * which case there are no links to install. + */ + if (t_sw0 == t_sw1 && + ((cdir == 0 && t->x_sz == 1) || + (cdir == 1 && t->y_sz == 1) || + (cdir == 2 && t->z_sz == 1))) { + success = true; + goto out; + } + /* + * Ensure that t_sw1 is in the positive cdir direction wrt. t_sw0. + * ring_next_sw() relies on it. + */ + switch (cdir) { + case 0: + if (t->x_sz > 1 && + canonicalize(t_sw0->i + 1, t->x_sz) != t_sw1->i) { + cdir_name = "x"; + goto cdir_error; + } + break; + case 1: + if (t->y_sz > 1 && + canonicalize(t_sw0->j + 1, t->y_sz) != t_sw1->j) { + cdir_name = "y"; + goto cdir_error; + } + break; + case 2: + if (t->z_sz > 1 && + canonicalize(t_sw0->k + 1, t->z_sz) != t_sw1->k) { + cdir_name = "z"; + goto cdir_error; + } + break; + default: + cdir_error: + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, "ERR 4E19: " + "sw 0x%04"PRIx64" (%d,%d,%d) <--> " + "sw 0x%04"PRIx64" (%d,%d,%d) " + "invalid torus %s link orientation\n", + cl_ntoh64(t_sw0->n_id), t_sw0->i, t_sw0->j, t_sw0->k, + cl_ntoh64(t_sw1->n_id), t_sw1->i, t_sw1->j, t_sw1->k, + cdir_name); + goto out; + } + + f_sw0 = t_sw0->tmp; + f_sw1 = t_sw1->tmp; + + if (!f_sw0 || !f_sw1) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E1A: missing fabric switches!\n" + " switch GUIDs: 0x%04"PRIx64" 0x%04"PRIx64"\n", + cl_ntoh64(t_sw0->n_id), cl_ntoh64(t_sw1->n_id)); + goto out; + } + pg0 = &t_sw0->ptgrp[2*cdir + 1]; + pg0->type = PASSTHRU; + + pg1 = &t_sw1->ptgrp[2*cdir]; + pg1->type = PASSTHRU; + + port_cnt = f_sw0->port_cnt; + /* + * Find all the links between these two switches. + */ + for (p = 0; p < port_cnt; p++) { + struct endpoint *f_ep0 = NULL, *f_ep1 = NULL; + + if (!f_sw0->port[p] || !f_sw0->port[p]->link) + continue; + + if (f_sw0->port[p]->link->end[0].n_id == t_sw0->n_id && + f_sw0->port[p]->link->end[1].n_id == t_sw1->n_id) { + + f_ep0 = &f_sw0->port[p]->link->end[0]; + f_ep1 = &f_sw0->port[p]->link->end[1]; + } else if (f_sw0->port[p]->link->end[1].n_id == t_sw0->n_id && + f_sw0->port[p]->link->end[0].n_id == t_sw1->n_id) { + + f_ep0 = &f_sw0->port[p]->link->end[1]; + f_ep1 = &f_sw0->port[p]->link->end[0]; + } else + continue; + + if (!(f_ep0->type == PASSTHRU && f_ep1->type == PASSTHRU)) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E1B: not interswitch " + "link:\n 0x%04"PRIx64"/%d <-> 0x%04"PRIx64"/%d\n", + cl_ntoh64(f_ep0->n_id), f_ep0->port, + cl_ntoh64(f_ep1->n_id), f_ep1->port); + goto out; + } + /* + * Skip over links that already have been established in the + * torus. + */ + if (!(f_ep0->sw && f_ep1->sw)) + continue; + + if (!connect_tlink(pg0, f_ep0, pg1, f_ep1, t)) + goto out; + } + success = true; +out: + return success; +} + +static +bool link_srcsink(struct torus *t, int i, int j, int k) +{ + struct endpoint *f_ep0; + struct endpoint *f_ep1; + struct t_switch *tsw; + struct f_switch *fsw; + struct port_grp *pg; + struct link *fl, *tl; + unsigned p, port_cnt; + bool success = false; + + i = canonicalize(i, t->x_sz); + j = canonicalize(j, t->y_sz); + k = canonicalize(k, t->z_sz); + + tsw = t->sw[i][j][k]; + if (!tsw) + return true; + + fsw = tsw->tmp; + /* + * link_srcsink is supposed to get called once for every switch in + * the fabric. At this point every fsw we encounter must have a + * non-null osm_switch. Otherwise something has gone horribly + * wrong with topology discovery; the most likely reason is that + * the fabric contains a radix-4 torus dimension, but the user gave + * a config that didn't say so, breaking all the checking in + * safe_x_perpendicular and friends. + */ + if (!(fsw && fsw->osm_switch)) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E1C: Invalid topology discovery. " + "Verify torus-2QoS.conf contents.\n"); + return false; + } + + pg = &tsw->ptgrp[2 * TORUS_MAX_DIM]; + pg->type = SRCSINK; + tsw->osm_switch = fsw->osm_switch; + tsw->osm_switch->priv = tsw; + fsw->osm_switch = NULL; + + port_cnt = fsw->port_cnt; + for (p = 0; p < port_cnt; p++) { + + if (!fsw->port[p]) + continue; + + if (fsw->port[p]->type == SRCSINK) { + /* + * If the endpoint is the switch port used for in-band + * communication with the switch itself, move it to + * the torus. + */ + if (pg->port_cnt == t->portgrp_sz) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E1D: exceeded port group max port " + "count (%d): switch GUID 0x%04"PRIx64"\n", + t->portgrp_sz, cl_ntoh64(tsw->n_id)); + goto out; + } + fsw->port[p]->sw = tsw; + fsw->port[p]->pgrp = pg; + tsw->port[p] = fsw->port[p]; + tsw->port[p]->osm_port->priv = tsw->port[p]; + pg->port[pg->port_cnt++] = fsw->port[p]; + fsw->port[p] = NULL; + + } else if (fsw->port[p]->link && + fsw->port[p]->type == PASSTHRU) { + /* + * If the endpoint is a link to a CA, create a new link + * in the torus. Disconnect the fabric link. + */ + + fl = fsw->port[p]->link; + + if (fl->end[0].sw == fsw) { + f_ep0 = &fl->end[0]; + f_ep1 = &fl->end[1]; + } else if (fl->end[1].sw == fsw) { + f_ep1 = &fl->end[0]; + f_ep0 = &fl->end[1]; + } else + continue; + + if (f_ep1->type != SRCSINK) + continue; + + if (pg->port_cnt == t->portgrp_sz) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E1E: exceeded port group max port " + "count (%d): switch GUID 0x%04"PRIx64"\n", + t->portgrp_sz, cl_ntoh64(tsw->n_id)); + goto out; + } + /* + * Switch ports connected to links don't get + * associated with osm_port_t objects; see + * capture_fabric(). So just check CA end. + */ + if (!f_ep1->osm_port) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E1F: NULL osm_port->priv port " + "GUID 0x%04"PRIx64"\n", + cl_ntoh64(f_ep1->n_id)); + goto out; + } + tl = alloc_tlink(t); + if (!tl) + continue; + + tl->end[0].type = f_ep0->type; + tl->end[0].port = f_ep0->port; + tl->end[0].n_id = f_ep0->n_id; + tl->end[0].sw = tsw; + tl->end[0].link = tl; + tl->end[0].pgrp = pg; + pg->port[pg->port_cnt++] = &tl->end[0]; + pg->sw->port[f_ep0->port] = &tl->end[0]; + + tl->end[1].type = f_ep1->type; + tl->end[1].port = f_ep1->port; + tl->end[1].n_id = f_ep1->n_id; + tl->end[1].sw = NULL; /* Correct for a CA */ + tl->end[1].link = tl; + tl->end[1].pgrp = NULL; /* Correct for a CA */ + + tl->end[1].osm_port = f_ep1->osm_port; + tl->end[1].osm_port->priv = &tl->end[1]; + f_ep1->osm_port = NULL; + + t->ca_cnt++; + f_ep0->sw = NULL; + f_ep0->port = -1; + fsw->port[p] = NULL; + } + } + success = true; +out: + return success; +} + +static +struct f_switch *ffind_face_corner(struct f_switch *fsw0, + struct f_switch *fsw1, + struct f_switch *fsw2) +{ + int p0, p3; + struct link *l; + struct endpoint *far_end; + struct f_switch *fsw, *fsw3 = NULL; + + if (!(fsw0 && fsw1 && fsw2)) + goto out; + + for (p0 = 0; p0 < fsw0->port_cnt; p0++) { + /* + * Ignore everything except switch links that haven't + * been installed into the torus. + */ + if (!(fsw0->port[p0] && fsw0->port[p0]->sw && + fsw0->port[p0]->type == PASSTHRU)) + continue; + + l = fsw0->port[p0]->link; + + if (l->end[0].n_id == fsw0->n_id) + far_end = &l->end[1]; + else + far_end = &l->end[0]; + + /* + * Ignore CAs + */ + if (!(far_end->type == PASSTHRU && far_end->sw)) + continue; + + fsw3 = far_end->sw; + if (fsw3->n_id == fsw1->n_id) /* existing corner */ + continue; + + for (p3 = 0; p3 < fsw3->port_cnt; p3++) { + /* + * Ignore everything except switch links that haven't + * been installed into the torus. + */ + if (!(fsw3->port[p3] && fsw3->port[p3]->sw && + fsw3->port[p3]->type == PASSTHRU)) + continue; + + l = fsw3->port[p3]->link; + + if (l->end[0].n_id == fsw3->n_id) + far_end = &l->end[1]; + else + far_end = &l->end[0]; + + /* + * Ignore CAs + */ + if (!(far_end->type == PASSTHRU && far_end->sw)) + continue; + + fsw = far_end->sw; + if (fsw->n_id == fsw2->n_id) + goto out; + } + } + fsw3 = NULL; +out: + return fsw3; +} + +static +struct f_switch *tfind_face_corner(struct t_switch *tsw0, + struct t_switch *tsw1, + struct t_switch *tsw2) +{ + if (!(tsw0 && tsw1 && tsw2)) + return NULL; + + return ffind_face_corner(tsw0->tmp, tsw1->tmp, tsw2->tmp); +} + +/* + * This code can break on any torus with a dimension that has radix four. + * + * What is supposed to happen is that this code will find the + * two faces whose shared edge is the desired perpendicular. + * + * What actually happens is while searching we send two connected + * edges that are colinear in a torus dimension with radix four to + * ffind_face_corner(), which tries to complete a face by finding a + * 4-loop of edges. + * + * In the radix four torus case, it can find a 4-loop which is a ring in a + * dimension with radix four, rather than the desired face. It thus returns + * true when it shouldn't, so the wrong edge is returned as the perpendicular. + * + * The appropriate instance of safe_N_perpendicular() (where N == x, y, z) + * should be used to determine if it is safe to call ffind_perpendicular(); + * these functions will return false it there is a possibility of finding + * a wrong perpendicular. + */ +struct f_switch *ffind_3d_perpendicular(struct f_switch *fsw0, + struct f_switch *fsw1, + struct f_switch *fsw2, + struct f_switch *fsw3) +{ + int p1; + struct link *l; + struct endpoint *far_end; + struct f_switch *fsw4 = NULL; + + if (!(fsw0 && fsw1 && fsw2 && fsw3)) + goto out; + + /* + * Look at all the ports on the switch, fsw1, that is the base of + * the perpendicular. + */ + for (p1 = 0; p1 < fsw1->port_cnt; p1++) { + /* + * Ignore everything except switch links that haven't + * been installed into the torus. + */ + if (!(fsw1->port[p1] && fsw1->port[p1]->sw && + fsw1->port[p1]->type == PASSTHRU)) + continue; + + l = fsw1->port[p1]->link; + + if (l->end[0].n_id == fsw1->n_id) + far_end = &l->end[1]; + else + far_end = &l->end[0]; + /* + * Ignore CAs + */ + if (!(far_end->type == PASSTHRU && far_end->sw)) + continue; + + fsw4 = far_end->sw; + if (fsw4->n_id == fsw3->n_id) /* wrong perpendicular */ + continue; + + if (ffind_face_corner(fsw0, fsw1, fsw4) && + ffind_face_corner(fsw2, fsw1, fsw4)) + goto out; + } + fsw4 = NULL; +out: + return fsw4; +} +struct f_switch *ffind_2d_perpendicular(struct f_switch *fsw0, + struct f_switch *fsw1, + struct f_switch *fsw2) +{ + int p1; + struct link *l; + struct endpoint *far_end; + struct f_switch *fsw3 = NULL; + + if (!(fsw0 && fsw1 && fsw2)) + goto out; + + /* + * Look at all the ports on the switch, fsw1, that is the base of + * the perpendicular. + */ + for (p1 = 0; p1 < fsw1->port_cnt; p1++) { + /* + * Ignore everything except switch links that haven't + * been installed into the torus. + */ + if (!(fsw1->port[p1] && fsw1->port[p1]->sw && + fsw1->port[p1]->type == PASSTHRU)) + continue; + + l = fsw1->port[p1]->link; + + if (l->end[0].n_id == fsw1->n_id) + far_end = &l->end[1]; + else + far_end = &l->end[0]; + /* + * Ignore CAs + */ + if (!(far_end->type == PASSTHRU && far_end->sw)) + continue; + + fsw3 = far_end->sw; + if (fsw3->n_id == fsw2->n_id) /* wrong perpendicular */ + continue; + + if (ffind_face_corner(fsw0, fsw1, fsw3)) + goto out; + } + fsw3 = NULL; +out: + return fsw3; +} + +static +struct f_switch *tfind_3d_perpendicular(struct t_switch *tsw0, + struct t_switch *tsw1, + struct t_switch *tsw2, + struct t_switch *tsw3) +{ + if (!(tsw0 && tsw1 && tsw2 && tsw3)) + return NULL; + + return ffind_3d_perpendicular(tsw0->tmp, tsw1->tmp, + tsw2->tmp, tsw3->tmp); +} + +static +struct f_switch *tfind_2d_perpendicular(struct t_switch *tsw0, + struct t_switch *tsw1, + struct t_switch *tsw2) +{ + if (!(tsw0 && tsw1 && tsw2)) + return NULL; + + return ffind_2d_perpendicular(tsw0->tmp, tsw1->tmp, tsw2->tmp); +} + +static +bool safe_x_ring(struct torus *t, int i, int j, int k) +{ + int im1, ip1, ip2; + bool success = true; + + /* + * If this x-direction radix-4 ring has at least two links + * already installed into the torus, then this ring does not + * prevent us from looking for y or z direction perpendiculars. + * + * It is easier to check for the appropriate switches being installed + * into the torus than it is to check for the links, so force the + * link installation if the appropriate switches are installed. + * + * Recall that canonicalize(n - 2, 4) == canonicalize(n + 2, 4). + */ + if (t->x_sz != 4 || t->flags & X_MESH) + goto out; + + im1 = canonicalize(i - 1, t->x_sz); + ip1 = canonicalize(i + 1, t->x_sz); + ip2 = canonicalize(i + 2, t->x_sz); + + if (!!t->sw[im1][j][k] + + !!t->sw[ip1][j][k] + !!t->sw[ip2][j][k] < 2) { + success = false; + goto out; + } + if (t->sw[ip2][j][k] && t->sw[im1][j][k]) + success = link_tswitches(t, 0, + t->sw[ip2][j][k], + t->sw[im1][j][k]) + && success; + + if (t->sw[im1][j][k] && t->sw[i][j][k]) + success = link_tswitches(t, 0, + t->sw[im1][j][k], + t->sw[i][j][k]) + && success; + + if (t->sw[i][j][k] && t->sw[ip1][j][k]) + success = link_tswitches(t, 0, + t->sw[i][j][k], + t->sw[ip1][j][k]) + && success; + + if (t->sw[ip1][j][k] && t->sw[ip2][j][k]) + success = link_tswitches(t, 0, + t->sw[ip1][j][k], + t->sw[ip2][j][k]) + && success; +out: + return success; +} + +static +bool safe_y_ring(struct torus *t, int i, int j, int k) +{ + int jm1, jp1, jp2; + bool success = true; + + /* + * If this y-direction radix-4 ring has at least two links + * already installed into the torus, then this ring does not + * prevent us from looking for x or z direction perpendiculars. + * + * It is easier to check for the appropriate switches being installed + * into the torus than it is to check for the links, so force the + * link installation if the appropriate switches are installed. + * + * Recall that canonicalize(n - 2, 4) == canonicalize(n + 2, 4). + */ + if (t->y_sz != 4 || (t->flags & Y_MESH)) + goto out; + + jm1 = canonicalize(j - 1, t->y_sz); + jp1 = canonicalize(j + 1, t->y_sz); + jp2 = canonicalize(j + 2, t->y_sz); + + if (!!t->sw[i][jm1][k] + + !!t->sw[i][jp1][k] + !!t->sw[i][jp2][k] < 2) { + success = false; + goto out; + } + if (t->sw[i][jp2][k] && t->sw[i][jm1][k]) + success = link_tswitches(t, 1, + t->sw[i][jp2][k], + t->sw[i][jm1][k]) + && success; + + if (t->sw[i][jm1][k] && t->sw[i][j][k]) + success = link_tswitches(t, 1, + t->sw[i][jm1][k], + t->sw[i][j][k]) + && success; + + if (t->sw[i][j][k] && t->sw[i][jp1][k]) + success = link_tswitches(t, 1, + t->sw[i][j][k], + t->sw[i][jp1][k]) + && success; + + if (t->sw[i][jp1][k] && t->sw[i][jp2][k]) + success = link_tswitches(t, 1, + t->sw[i][jp1][k], + t->sw[i][jp2][k]) + && success; +out: + return success; +} + +static +bool safe_z_ring(struct torus *t, int i, int j, int k) +{ + int km1, kp1, kp2; + bool success = true; + + /* + * If this z-direction radix-4 ring has at least two links + * already installed into the torus, then this ring does not + * prevent us from looking for x or y direction perpendiculars. + * + * It is easier to check for the appropriate switches being installed + * into the torus than it is to check for the links, so force the + * link installation if the appropriate switches are installed. + * + * Recall that canonicalize(n - 2, 4) == canonicalize(n + 2, 4). + */ + if (t->z_sz != 4 || t->flags & Z_MESH) + goto out; + + km1 = canonicalize(k - 1, t->z_sz); + kp1 = canonicalize(k + 1, t->z_sz); + kp2 = canonicalize(k + 2, t->z_sz); + + if (!!t->sw[i][j][km1] + + !!t->sw[i][j][kp1] + !!t->sw[i][j][kp2] < 2) { + success = false; + goto out; + } + if (t->sw[i][j][kp2] && t->sw[i][j][km1]) + success = link_tswitches(t, 2, + t->sw[i][j][kp2], + t->sw[i][j][km1]) + && success; + + if (t->sw[i][j][km1] && t->sw[i][j][k]) + success = link_tswitches(t, 2, + t->sw[i][j][km1], + t->sw[i][j][k]) + && success; + + if (t->sw[i][j][k] && t->sw[i][j][kp1]) + success = link_tswitches(t, 2, + t->sw[i][j][k], + t->sw[i][j][kp1]) + && success; + + if (t->sw[i][j][kp1] && t->sw[i][j][kp2]) + success = link_tswitches(t, 2, + t->sw[i][j][kp1], + t->sw[i][j][kp2]) + && success; +out: + return success; +} + +/* + * These functions return true when it safe to call + * tfind_3d_perpendicular()/ffind_3d_perpendicular(). + */ +static +bool safe_x_perpendicular(struct torus *t, int i, int j, int k) +{ + /* + * If the dimensions perpendicular to the search direction are + * not radix 4 torus dimensions, it is always safe to search for + * a perpendicular. + * + * Here we are checking for enough appropriate links having been + * installed into the torus to prevent an incorrect link from being + * considered as a perpendicular candidate. + */ + return safe_y_ring(t, i, j, k) && safe_z_ring(t, i, j, k); +} + +static +bool safe_y_perpendicular(struct torus *t, int i, int j, int k) +{ + /* + * If the dimensions perpendicular to the search direction are + * not radix 4 torus dimensions, it is always safe to search for + * a perpendicular. + * + * Here we are checking for enough appropriate links having been + * installed into the torus to prevent an incorrect link from being + * considered as a perpendicular candidate. + */ + return safe_x_ring(t, i, j, k) && safe_z_ring(t, i, j, k); +} + +static +bool safe_z_perpendicular(struct torus *t, int i, int j, int k) +{ + /* + * If the dimensions perpendicular to the search direction are + * not radix 4 torus dimensions, it is always safe to search for + * a perpendicular. + * + * Implement this by checking for enough appropriate links having + * been installed into the torus to prevent an incorrect link from + * being considered as a perpendicular candidate. + */ + return safe_x_ring(t, i, j, k) && safe_y_ring(t, i, j, k); +} + +/* + * Templates for determining 2D/3D case fingerprints. Recall that if + * a fingerprint bit is set the corresponding switch is absent from + * the all-switches-present template. + * + * I.e., for the 2D case where the x,y dimensions have a radix greater + * than one, and the z dimension has radix 1, fingerprint bits 4-7 are + * always zero. + * + * For the 2D case where the x,z dimensions have a radix greater than + * one, and the y dimension has radix 1, fingerprint bits 2,3,6,7 are + * always zero. + * + * For the 2D case where the y,z dimensions have a radix greater than + * one, and the x dimension has radix 1, fingerprint bits 1,3,5,7 are + * always zero. + * + * Recall also that bits 8-10 distinguish between 2D and 3D cases. + * If bit 8+d is set, for 0 <= d < 3; the d dimension of the desired + * torus has radix greater than 1. + */ + +/* + * 2D case 0x300 + * b0: t->sw[i ][j ][0 ] + * b1: t->sw[i+1][j ][0 ] + * b2: t->sw[i ][j+1][0 ] + * b3: t->sw[i+1][j+1][0 ] + * O . . . . . O + * 2D case 0x500 . . + * b0: t->sw[i ][0 ][k ] . . + * b1: t->sw[i+1][0 ][k ] . . + * b4: t->sw[i ][0 ][k+1] . . + * b5: t->sw[i+1][0 ][k+1] . . + * @ . . . . . O + * 2D case 0x600 + * b0: t->sw[0 ][j ][k ] + * b2: t->sw[0 ][j+1][k ] + * b4: t->sw[0 ][j ][k+1] + * b6: t->sw[0 ][j+1][k+1] + */ + +/* + * 3D case 0x700: O + * . . . + * b0: t->sw[i ][j ][k ] . . . + * b1: t->sw[i+1][j ][k ] . . . + * b2: t->sw[i ][j+1][k ] . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . . O . . + * b5: t->sw[i+1][j ][k+1] . . . . . . + * b6: t->sw[i ][j+1][k+1] . . . . + * b7: t->sw[i+1][j+1][k+1] . . . . . . + * . . O . . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ + +static +void log_no_crnr(struct torus *t, unsigned n, + int case_i, int case_j, int case_k, + int crnr_i, int crnr_j, int crnr_k) +{ + if (t->debug) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, "Case 0x%03x " + "@ %d %d %d: no corner @ %d %d %d\n", + n, case_i, case_j, case_k, crnr_i, crnr_j, crnr_k); +} + +static +void log_no_perp(struct torus *t, unsigned n, + int case_i, int case_j, int case_k, + int perp_i, int perp_j, int perp_k) +{ + if (t->debug) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, "Case 0x%03x " + "@ %d %d %d: no perpendicular @ %d %d %d\n", + n, case_i, case_j, case_k, perp_i, perp_j, perp_k); +} + +/* + * Handle the 2D cases with a single existing edge. + * + */ + +/* + * 2D case 0x30c + * b0: t->sw[i ][j ][0 ] + * b1: t->sw[i+1][j ][0 ] + * b2: + * b3: + * O O + * 2D case 0x530 + * b0: t->sw[i ][0 ][k ] + * b1: t->sw[i+1][0 ][k ] + * b4: + * b5: + * @ . . . . . O + * 2D case 0x650 + * b0: t->sw[0 ][j ][k ] + * b2: t->sw[0 ][j+1][k ] + * b4: + * b6: + */ +static +bool handle_case_0x30c(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jm1 = canonicalize(j - 1, t->y_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (safe_y_perpendicular(t, i, j, k) && + install_tswitch(t, i, jp1, k, + tfind_2d_perpendicular(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][jm1][k]))) { + return true; + } + log_no_perp(t, 0x30c, i, j, k, i, j, k); + + if (safe_y_perpendicular(t, ip1, j, k) && + install_tswitch(t, ip1, jp1, k, + tfind_2d_perpendicular(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][jm1][k]))) { + return true; + } + log_no_perp(t, 0x30c, i, j, k, ip1, j, k); + return false; +} + +static +bool handle_case_0x530(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int km1 = canonicalize(k - 1, t->z_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_z_perpendicular(t, i, j, k) && + install_tswitch(t, i, j, kp1, + tfind_2d_perpendicular(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][j][km1]))) { + return true; + } + log_no_perp(t, 0x530, i, j, k, i, j, k); + + if (safe_z_perpendicular(t, ip1, j, k) && + install_tswitch(t, ip1, j, kp1, + tfind_2d_perpendicular(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][j][km1]))) { + return true; + } + log_no_perp(t, 0x530, i, j, k, ip1, j, k); + return false; +} + +static +bool handle_case_0x650(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int km1 = canonicalize(k - 1, t->z_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_z_perpendicular(t, i, j, k) && + install_tswitch(t, i, j, kp1, + tfind_2d_perpendicular(t->sw[i][jp1][k], + t->sw[i][j][k], + t->sw[i][j][km1]))) { + return true; + } + log_no_perp(t, 0x650, i, j, k, i, j, k); + + if (safe_z_perpendicular(t, i, jp1, k) && + install_tswitch(t, i, jp1, kp1, + tfind_2d_perpendicular(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[i][jp1][km1]))) { + return true; + } + log_no_perp(t, 0x650, i, j, k, i, jp1, k); + return false; +} + +/* + * 2D case 0x305 + * b0: + * b1: t->sw[i+1][j ][0 ] + * b2: + * b3: t->sw[i+1][j+1][0 ] + * O O + * 2D case 0x511 . + * b0: . + * b1: t->sw[i+1][0 ][k ] . + * b4: . + * b5: t->sw[i+1][0 ][k+1] . + * @ O + * 2D case 0x611 + * b0: + * b2: t->sw[0 ][j+1][k ] + * b4: + * b6: t->sw[0 ][j+1][k+1] + */ +static +bool handle_case_0x305(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int ip2 = canonicalize(i + 2, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (safe_x_perpendicular(t, ip1, j, k) && + install_tswitch(t, i, j, k, + tfind_2d_perpendicular(t->sw[ip1][jp1][k], + t->sw[ip1][j][k], + t->sw[ip2][j][k]))) { + return true; + } + log_no_perp(t, 0x305, i, j, k, ip1, j, k); + + if (safe_x_perpendicular(t, ip1, jp1, k) && + install_tswitch(t, i, jp1, k, + tfind_2d_perpendicular(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[ip2][jp1][k]))) { + return true; + } + log_no_perp(t, 0x305, i, j, k, ip1, jp1, k); + return false; +} + +static +bool handle_case_0x511(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int ip2 = canonicalize(i + 2, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, ip1, j, k) && + install_tswitch(t, i, j, k, + tfind_2d_perpendicular(t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[ip2][j][k]))) { + return true; + } + log_no_perp(t, 0x511, i, j, k, ip1, j, k); + + if (safe_x_perpendicular(t, ip1, j, kp1) && + install_tswitch(t, i, j, kp1, + tfind_2d_perpendicular(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[ip2][j][kp1]))) { + return true; + } + log_no_perp(t, 0x511, i, j, k, ip1, j, kp1); + return false; +} + +static +bool handle_case_0x611(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int jp2 = canonicalize(j + 2, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, i, jp1, k) && + install_tswitch(t, i, j, k, + tfind_2d_perpendicular(t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[i][jp2][k]))) { + return true; + } + log_no_perp(t, 0x611, i, j, k, i, jp1, k); + + if (safe_y_perpendicular(t, i, jp1, kp1) && + install_tswitch(t, i, j, kp1, + tfind_2d_perpendicular(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[i][jp2][kp1]))) { + return true; + } + log_no_perp(t, 0x611, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 2D case 0x303 + * b0: + * b1: + * b2: t->sw[i ][j+1][0 ] + * b3: t->sw[i+1][j+1][0 ] + * O . . . . . O + * 2D case 0x503 + * b0: + * b1: + * b4: t->sw[i ][0 ][k+1] + * b5: t->sw[i+1][0 ][k+1] + * @ O + * 2D case 0x605 + * b0: + * b2: + * b4: t->sw[0 ][j ][k+1] + * b6: t->sw[0 ][j+1][k+1] + */ +static +bool handle_case_0x303(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jp2 = canonicalize(j + 2, t->y_sz); + + if (safe_y_perpendicular(t, i, jp1, k) && + install_tswitch(t, i, j, k, + tfind_2d_perpendicular(t->sw[ip1][jp1][k], + t->sw[i][jp1][k], + t->sw[i][jp2][k]))) { + return true; + } + log_no_perp(t, 0x303, i, j, k, i, jp1, k); + + if (safe_y_perpendicular(t, ip1, jp1, k) && + install_tswitch(t, ip1, j, k, + tfind_2d_perpendicular(t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp2][k]))) { + return true; + } + log_no_perp(t, 0x303, i, j, k, ip1, jp1, k); + return false; +} + +static +bool handle_case_0x503(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + int kp2 = canonicalize(k + 2, t->z_sz); + + if (safe_z_perpendicular(t, i, j, kp1) && + install_tswitch(t, i, j, k, + tfind_2d_perpendicular(t->sw[ip1][j][kp1], + t->sw[i][j][kp1], + t->sw[i][j][kp2]))) { + return true; + } + log_no_perp(t, 0x503, i, j, k, i, j, kp1); + + if (safe_z_perpendicular(t, ip1, j, kp1) && + install_tswitch(t, ip1, j, k, + tfind_2d_perpendicular(t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[ip1][j][kp2]))) { + return true; + } + log_no_perp(t, 0x503, i, j, k, ip1, j, kp1); + return false; +} + +static +bool handle_case_0x605(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + int kp2 = canonicalize(k + 2, t->z_sz); + + if (safe_z_perpendicular(t, i, j, kp1) && + install_tswitch(t, i, j, k, + tfind_2d_perpendicular(t->sw[i][jp1][kp1], + t->sw[i][j][kp1], + t->sw[i][j][kp2]))) { + return true; + } + log_no_perp(t, 0x605, i, j, k, i, j, kp1); + + if (safe_z_perpendicular(t, i, jp1, kp1) && + install_tswitch(t, i, jp1, k, + tfind_2d_perpendicular(t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[i][jp1][kp2]))) { + return true; + } + log_no_perp(t, 0x605, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 2D case 0x30a + * b0: t->sw[i ][j ][0 ] + * b1: + * b2: t->sw[i ][j+1][0 ] + * b3: + * O O + * 2D case 0x522 . + * b0: t->sw[i ][0 ][k ] . + * b1: . + * b4: t->sw[i ][0 ][k+1] . + * b5: . + * @ O + * 2D case 0x644 + * b0: t->sw[0 ][j ][k ] + * b2: + * b4: t->sw[0 ][j ][k+1] + * b6: + */ +static +bool handle_case_0x30a(struct torus *t, int i, int j, int k) +{ + int im1 = canonicalize(i - 1, t->x_sz); + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (safe_x_perpendicular(t, i, j, k) && + install_tswitch(t, ip1, j, k, + tfind_2d_perpendicular(t->sw[i][jp1][k], + t->sw[i][j][k], + t->sw[im1][j][k]))) { + return true; + } + log_no_perp(t, 0x30a, i, j, k, i, j, k); + + if (safe_x_perpendicular(t, i, jp1, k) && + install_tswitch(t, ip1, jp1, k, + tfind_2d_perpendicular(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[im1][jp1][k]))) { + return true; + } + log_no_perp(t, 0x30a, i, j, k, i, jp1, k); + return false; +} + +static +bool handle_case_0x522(struct torus *t, int i, int j, int k) +{ + int im1 = canonicalize(i - 1, t->x_sz); + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, i, j, k) && + install_tswitch(t, ip1, j, k, + tfind_2d_perpendicular(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[im1][j][k]))) { + return true; + } + log_no_perp(t, 0x522, i, j, k, i, j, k); + + if (safe_x_perpendicular(t, i, j, kp1) && + install_tswitch(t, ip1, j, kp1, + tfind_2d_perpendicular(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[im1][j][kp1]))) { + return true; + } + log_no_perp(t, 0x522, i, j, k, i, j, kp1); + return false; +} + +static +bool handle_case_0x644(struct torus *t, int i, int j, int k) +{ + int jm1 = canonicalize(j - 1, t->y_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, i, j, k) && + install_tswitch(t, i, jp1, k, + tfind_2d_perpendicular(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[i][jm1][k]))) { + return true; + } + log_no_perp(t, 0x644, i, j, k, i, j, k); + + if (safe_y_perpendicular(t, i, j, kp1) && + install_tswitch(t, i, jp1, kp1, + tfind_2d_perpendicular(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jm1][kp1]))) { + return true; + } + log_no_perp(t, 0x644, i, j, k, i, j, kp1); + return false; +} + +/* + * Handle the 2D cases where two existing edges meet at a corner. + * + */ + +/* + * 2D case 0x301 + * b0: + * b1: t->sw[i+1][j ][0 ] + * b2: t->sw[i ][j+1][0 ] + * b3: t->sw[i+1][j+1][0 ] + * O . . . . . O + * 2D case 0x501 . + * b0: . + * b1: t->sw[i+1][0 ][k ] . + * b4: t->sw[i ][0 ][k+1] . + * b5: t->sw[i+1][0 ][k+1] . + * @ O + * 2D case 0x601 + * b0: + * b2: t->sw[0 ][j+1][k ] + * b4: t->sw[0 ][j ][k+1] + * b6: t->sw[0 ][j+1][k+1] + */ +static +bool handle_case_0x301(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x301, i, j, k, i, j, k); + return false; +} + +static +bool handle_case_0x501(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x501, i, j, k, i, j, k); + return false; +} + +static +bool handle_case_0x601(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x601, i, j, k, i, j, k); + return false; +} + +/* + * 2D case 0x302 + * b0: t->sw[i ][j ][0 ] + * b1: + * b2: t->sw[i ][j+1][0 ] + * b3: t->sw[i+1][j+1][0 ] + * O . . . . . O + * 2D case 0x502 . + * b0: t->sw[i ][0 ][k ] . + * b1: . + * b4: t->sw[i ][0 ][k+1] . + * b5: t->sw[i+1][0 ][k+1] . + * @ O + * 2D case 0x604 + * b0: t->sw[0 ][j ][k ] + * b2: + * b4: t->sw[0 ][j ][k+1] + * b6: t->sw[0 ][j+1][k+1] + */ +static +bool handle_case_0x302(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x302, i, j, k, ip1, j, k); + return false; +} + +static +bool handle_case_0x502(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x502, i, j, k, ip1, j, k); + return false; +} + +static +bool handle_case_0x604(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x604, i, j, k, i, jp1, k); + return false; +} + + +/* + * 2D case 0x308 + * b0: t->sw[i ][j ][0 ] + * b1: t->sw[i+1][j ][0 ] + * b2: t->sw[i ][j+1][0 ] + * b3: + * O O + * 2D case 0x520 . + * b0: t->sw[i ][0 ][k ] . + * b1: t->sw[i+1][0 ][k ] . + * b4: t->sw[i ][0 ][k+1] . + * b5: . + * @ . . . . . O + * 2D case 0x640 + * b0: t->sw[0 ][j ][k ] + * b2: t->sw[0 ][j+1][k ] + * b4: t->sw[0 ][j ][k+1] + * b6: + */ +static +bool handle_case_0x308(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x308, i, j, k, ip1, jp1, k); + return false; +} + +static +bool handle_case_0x520(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x520, i, j, k, ip1, j, kp1); + return false; +} + +static +bool handle_case_0x640(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][j][k], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x640, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 2D case 0x304 + * b0: t->sw[i ][j ][0 ] + * b1: t->sw[i+1][j ][0 ] + * b2: + * b3: t->sw[i+1][j+1][0 ] + * O O + * 2D case 0x510 . + * b0: t->sw[i ][0 ][k ] . + * b1: t->sw[i+1][0 ][k ] . + * b4: . + * b5: t->sw[i+1][0 ][k+1] . + * @ . . . . . O + * 2D case 0x610 + * b0: t->sw[0 ][j ][k ] + * b2: t->sw[0 ][j+1][k ] + * b4: + * b6: t->sw[0 ][j+1][k+1] + */ +static +bool handle_case_0x304(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x304, i, j, k, i, jp1, k); + return false; +} + +static +bool handle_case_0x510(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x510, i, j, k, i, j, kp1); + return false; +} + +static +bool handle_case_0x610(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x610, i, j, k, i, j, kp1); + return false; +} + +/* + * Handle the 3D cases where two existing edges meet at a corner. + * + */ + +/* + * 3D case 0x71f: O + * . . + * b0: . . + * b1: . . + * b2: . . + * b3: O O + * b4: O + * b5: t->sw[i+1][j ][k+1] + * b6: t->sw[i ][j+1][k+1] + * b7: t->sw[i+1][j+1][k+1] + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x71f(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + int kp2 = canonicalize(k + 2, t->z_sz); + + if (safe_z_perpendicular(t, ip1, jp1, kp1) && + install_tswitch(t, ip1, jp1, k, + tfind_3d_perpendicular(t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp2]))) { + return true; + } + log_no_perp(t, 0x71f, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x72f: O + * . + * b0: . + * b1: . + * b2: . + * b3: O O + * b4: t->sw[i ][j ][k+1] . O + * b5: . + * b6: t->sw[i ][j+1][k+1] . + * b7: t->sw[i+1][j+1][k+1] . + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x72f(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + int kp2 = canonicalize(k + 2, t->z_sz); + + if (safe_z_perpendicular(t, i, jp1, kp1) && + install_tswitch(t, i, jp1, k, + tfind_3d_perpendicular(t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp2]))) { + return true; + } + log_no_perp(t, 0x72f, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x737: O + * . . + * b0: . . + * b1: . . + * b2: . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O + * b5: + * b6: t->sw[i ][j+1][k+1] + * b7: t->sw[i+1][j+1][k+1] + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x737(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jp2 = canonicalize(j + 2, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, ip1, jp1, kp1) && + install_tswitch(t, ip1, j, kp1, + tfind_3d_perpendicular(t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1], + t->sw[ip1][jp1][k], + t->sw[ip1][jp2][kp1]))) { + return true; + } + log_no_perp(t, 0x737, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x73b: O + * . + * b0: . + * b1: . + * b2: t->sw[i ][j+1][k ] . + * b3: O O + * b4: . O + * b5: . + * b6: t->sw[i ][j+1][k+1] . + * b7: t->sw[i+1][j+1][k+1] . + * . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x73b(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jp2 = canonicalize(j + 2, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, i, jp1, kp1) && + install_tswitch(t, i, j, kp1, + tfind_3d_perpendicular(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1], + t->sw[i][jp2][kp1]))) { + return true; + } + log_no_perp(t, 0x73b, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x74f: O + * . + * b0: . + * b1: . + * b2: . + * b3: O O + * b4: t->sw[i ][j ][k+1] O . + * b5: t->sw[i+1][j ][k+1] . + * b6: . + * b7: t->sw[i+1][j+1][k+1] . + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x74f(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + int kp2 = canonicalize(k + 2, t->z_sz); + + if (safe_z_perpendicular(t, ip1, j, kp1) && + install_tswitch(t, ip1, j, k, + tfind_3d_perpendicular(t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1], + t->sw[ip1][j][kp2]))) { + return true; + } + log_no_perp(t, 0x74f, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x757: O + * . . + * b0: . . + * b1: . . + * b2: . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O + * b5: t->sw[i+1][j ][k+1] + * b6: + * b7: t->sw[i+1][j+1][k+1] + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x757(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int ip2 = canonicalize(i + 2, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, ip1, jp1, kp1) && + install_tswitch(t, i, jp1, kp1, + tfind_3d_perpendicular(t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1], + t->sw[ip1][jp1][k], + t->sw[ip2][jp1][kp1]))) { + return true; + } + log_no_perp(t, 0x757, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x75d: O + * . + * b0: . + * b1: t->sw[i+1][j ][k ] . + * b2: . + * b3: O O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . + * b6: . + * b7: t->sw[i+1][j+1][k+1] . + * O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x75d(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int ip2 = canonicalize(i + 2, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, ip1, j, kp1) && + install_tswitch(t, i, j, kp1, + tfind_3d_perpendicular(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1], + t->sw[ip2][j][kp1]))) { + return true; + } + log_no_perp(t, 0x75d, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x773: O + * . + * b0: . + * b1: . + * b2: t->sw[i ][j+1][k ] . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O + * b5: . + * b6: . + * b7: t->sw[i+1][j+1][k+1] . + * . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x773(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jp2 = canonicalize(j + 2, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, ip1, jp1, k) && + install_tswitch(t, ip1, j, k, + tfind_3d_perpendicular(t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[ip1][jp2][k]))) { + return true; + } + log_no_perp(t, 0x773, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x775: O + * . + * b0: . + * b1: t->sw[i+1][j ][k ] . + * b2: . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O + * b5: . + * b6: . + * b7: t->sw[i+1][j+1][k+1] . + * O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x775(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int ip2 = canonicalize(i + 2, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, ip1, jp1, k) && + install_tswitch(t, i, jp1, k, + tfind_3d_perpendicular(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[ip2][jp1][k]))) { + return true; + } + log_no_perp(t, 0x775, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x78f: O + * + * b0: + * b1: + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] . O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x78f(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + int kp2 = canonicalize(k + 2, t->z_sz); + + if (safe_z_perpendicular(t, i, j, kp1) && + install_tswitch(t, i, j, k, + tfind_3d_perpendicular(t->sw[ip1][j][kp1], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[i][j][kp2]))) { + return true; + } + log_no_perp(t, 0x78f, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x7ab: O + * + * b0: + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . + * . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x7ab(struct torus *t, int i, int j, int k) +{ + int im1 = canonicalize(i - 1, t->x_sz); + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, i, jp1, kp1) && + install_tswitch(t, ip1, jp1, kp1, + tfind_3d_perpendicular(t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[im1][jp1][kp1]))) { + return true; + } + log_no_perp(t, 0x7ab, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x7ae: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] . O + * b5: . + * b6: t->sw[i ][j+1][k+1] . + * b7: . + * O + * O . O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7ae(struct torus *t, int i, int j, int k) +{ + int im1 = canonicalize(i - 1, t->x_sz); + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, i, j, kp1) && + install_tswitch(t, ip1, j, kp1, + tfind_3d_perpendicular(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[im1][j][kp1]))) { + return true; + } + log_no_perp(t, 0x7ae, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x7b3: O + * + * b0: + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . + * . . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x7b3(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jp2 = canonicalize(j + 2, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, i, jp1, k) && + install_tswitch(t, i, j, k, + tfind_3d_perpendicular(t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[i][jp2][k]))) { + return true; + } + log_no_perp(t, 0x7b3, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x7ba: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: . O + * b5: . + * b6: t->sw[i ][j+1][k+1] . + * b7: . + * . O + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7ba(struct torus *t, int i, int j, int k) +{ + int im1 = canonicalize(i - 1, t->x_sz); + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, i, jp1, k) && + install_tswitch(t, ip1, jp1, k, + tfind_3d_perpendicular(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[im1][jp1][k]))) { + return true; + } + log_no_perp(t, 0x7ba, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x7cd: O + * + * b0: + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: . . + * O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x7cd(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jm1 = canonicalize(j - 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, ip1, j, kp1) && + install_tswitch(t, ip1, jp1, kp1, + tfind_3d_perpendicular(t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[ip1][jm1][kp1]))) { + return true; + } + log_no_perp(t, 0x7cd, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x7ce: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] O . + * b5: t->sw[i+1][j ][k+1] . + * b6: . + * b7: . + * O + * O . O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7ce(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jm1 = canonicalize(j - 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, i, j, kp1) && + install_tswitch(t, i, jp1, kp1, + tfind_3d_perpendicular(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[i][jm1][kp1]))) { + return true; + } + log_no_perp(t, 0x7ce, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x7d5: O + * + * b0: + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: t->sw[i+1][j+1][k ] O O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: . . + * O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x7d5(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int ip2 = canonicalize(i + 2, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, ip1, j, k) && + install_tswitch(t, i, j, k, + tfind_3d_perpendicular(t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[ip2][j][k]))) { + return true; + } + log_no_perp(t, 0x7d5, i, j, k, ip1, j, k); + return false; +} + +/* + * 3D case 0x7dc: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: O O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . + * b6: . + * b7: . + * O . + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7dc(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jm1 = canonicalize(j - 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, ip1, j, k) && + install_tswitch(t, ip1, jp1, k, + tfind_3d_perpendicular(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[ip1][jm1][k]))) { + return true; + } + log_no_perp(t, 0x7dc, i, j, k, ip1, j, k); + return false; +} + +/* + * 3D case 0x7ea: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] O + * b5: + * b6: + * b7: + * O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7ea(struct torus *t, int i, int j, int k) +{ + int im1 = canonicalize(i - 1, t->x_sz); + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_x_perpendicular(t, i, j, k) && + install_tswitch(t, ip1, j, k, + tfind_3d_perpendicular(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[im1][j][k]))) { + return true; + } + log_no_perp(t, 0x7ea, i, j, k, i, j, k); + return false; +} + +/* + * 3D case 0x7ec: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] O + * b5: + * b6: + * b7: + * O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7ec(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int jm1 = canonicalize(j - 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_y_perpendicular(t, i, j, k) && + install_tswitch(t, i, jp1, k, + tfind_3d_perpendicular(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[i][jm1][k]))) { + return true; + } + log_no_perp(t, 0x7ec, i, j, k, i, j, k); + return false; +} + +/* + * 3D case 0x7f1: O + * + * b0: + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: O + * b5: . . + * b6: . . + * b7: . . + * . O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x7f1(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int km1 = canonicalize(k - 1, t->z_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_z_perpendicular(t, ip1, jp1, k) && + install_tswitch(t, ip1, jp1, kp1, + tfind_3d_perpendicular(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[i][jp1][k], + t->sw[ip1][jp1][km1]))) { + return true; + } + log_no_perp(t, 0x7f1, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x7f2: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: O + * b5: . + * b6: . + * b7: . + * . O + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7f2(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int km1 = canonicalize(k - 1, t->z_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_z_perpendicular(t, i, jp1, k) && + install_tswitch(t, i, jp1, kp1, + tfind_3d_perpendicular(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[i][jp1][km1]))) { + return true; + } + log_no_perp(t, 0x7f2, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x7f4: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: t->sw[i+1][j+1][k ] O O + * b4: O + * b5: . + * b6: . + * b7: . + * O . + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7f4(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int km1 = canonicalize(k - 1, t->z_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_z_perpendicular(t, ip1, j, k) && + install_tswitch(t, ip1, j, kp1, + tfind_3d_perpendicular(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[ip1][j][km1]))) { + return true; + } + log_no_perp(t, 0x7f4, i, j, k, ip1, j, k); + return false; +} + +/* + * 3D case 0x7f8: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: O + * b5: + * b6: + * b7: + * O + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7f8(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int km1 = canonicalize(k - 1, t->z_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (safe_z_perpendicular(t, i, j, k) && + install_tswitch(t, i, j, kp1, + tfind_3d_perpendicular(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[i][j][km1]))) { + return true; + } + log_no_perp(t, 0x7f8, i, j, k, i, j, k); + return false; +} + +/* + * Handle the cases where three existing edges meet at a corner. + */ + +/* + * 3D case 0x717: O + * . . . + * b0: . . . + * b1: . . . + * b2: . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O + * b5: t->sw[i+1][j ][k+1] + * b6: t->sw[i ][j+1][k+1] + * b7: t->sw[i+1][j+1][k+1] + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x717(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x717, i, j, k, i, j, kp1); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x717, i, j, k, ip1, j, k); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x717, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x72b: O + * . + * b0: . + * b1: . + * b2: t->sw[i ][j+1][k ] . + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x72b(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x72b, i, j, k, ip1, j, kp1); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x72b, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x72b, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x74d: O + * . + * b0: . + * b1: t->sw[i+1][j ][k ] . + * b2: . + * b3: O O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . + * O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x74d(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x74d, i, j, k, i, jp1, kp1); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x74d, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x74d, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x771: O + * . + * b0: . + * b1: t->sw[i+1][j ][k ] . + * b2: t->sw[i ][j+1][k ] . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O + * b5: . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . + * . O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x771(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[ip1][j][k]))) { + return true; + } + log_no_crnr(t, 0x771, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[ip1][jp1][kp1], + t->sw[ip1][jp1][k], + t->sw[ip1][j][k]))) { + return true; + } + log_no_crnr(t, 0x771, i, j, k, ip1, j, kp1); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[ip1][jp1][kp1], + t->sw[ip1][jp1][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x771, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x78e: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] . O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . + * O + * O . O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x78e(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x78e, i, j, k, ip1, jp1, kp1); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x78e, i, j, k, ip1, j, k); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x78e, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x7b2: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . + * . . O + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7b2(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7b2, i, j, k, ip1, j, k); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7b2, i, j, k, ip1, jp1, kp1); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[i][j][k]))) { + return true; + } + log_no_crnr(t, 0x7b2, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x7d4: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: t->sw[i+1][j+1][k ] O O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: . . + * O . . + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x7d4(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7d4, i, j, k, i, jp1, k); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[i][j][k]))) { + return true; + } + log_no_crnr(t, 0x7d4, i, j, k, i, j, kp1); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7d4, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7e8: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] O + * b5: + * b6: + * b7: + * O + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x7e8(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7e8, i, j, k, ip1, jp1, k); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x7e8, i, j, k, ip1, j, kp1); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][j][k], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x7e8, i, j, k, i, jp1, kp1); + return false; +} + +/* + * Handle the cases where four corners on a single face are missing. + */ + +/* + * 3D case 0x70f: O + * . . + * b0: . . + * b1: . . + * b2: . . + * b3: O O + * b4: t->sw[i ][j ][k+1] . O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x70f(struct torus *t, int i, int j, int k) +{ + if (handle_case_0x71f(t, i, j, k)) + return true; + + if (handle_case_0x72f(t, i, j, k)) + return true; + + if (handle_case_0x74f(t, i, j, k)) + return true; + + return handle_case_0x78f(t, i, j, k); +} + +/* + * 3D case 0x733: O + * . . + * b0: . . + * b1: . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * . . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x733(struct torus *t, int i, int j, int k) +{ + if (handle_case_0x737(t, i, j, k)) + return true; + + if (handle_case_0x73b(t, i, j, k)) + return true; + + if (handle_case_0x773(t, i, j, k)) + return true; + + return handle_case_0x7b3(t, i, j, k); +} + +/* + * 3D case 0x755: O + * . . + * b0: . . + * b1: t->sw[i+1][j ][k ] . . + * b2: . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . + * O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x755(struct torus *t, int i, int j, int k) +{ + if (handle_case_0x757(t, i, j, k)) + return true; + + if (handle_case_0x75d(t, i, j, k)) + return true; + + if (handle_case_0x775(t, i, j, k)) + return true; + + return handle_case_0x7d5(t, i, j, k); +} + +/* + * 3D case 0x7aa: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . + * . O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7aa(struct torus *t, int i, int j, int k) +{ + if (handle_case_0x7ab(t, i, j, k)) + return true; + + if (handle_case_0x7ae(t, i, j, k)) + return true; + + if (handle_case_0x7ba(t, i, j, k)) + return true; + + return handle_case_0x7ea(t, i, j, k); +} + +/* + * 3D case 0x7cc: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: . . + * O . + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7cc(struct torus *t, int i, int j, int k) +{ + if (handle_case_0x7cd(t, i, j, k)) + return true; + + if (handle_case_0x7ce(t, i, j, k)) + return true; + + if (handle_case_0x7dc(t, i, j, k)) + return true; + + return handle_case_0x7ec(t, i, j, k); +} + +/* + * 3D case 0x7f0: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: O + * b5: . . + * b6: . . + * b7: . . + * . O . + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7f0(struct torus *t, int i, int j, int k) +{ + if (handle_case_0x7f1(t, i, j, k)) + return true; + + if (handle_case_0x7f2(t, i, j, k)) + return true; + + if (handle_case_0x7f4(t, i, j, k)) + return true; + + return handle_case_0x7f8(t, i, j, k); +} + +/* + * Handle the cases where three corners on a single face are missing. + */ + + +/* + * 3D case 0x707: O + * . . . + * b0: . . . + * b1: . . . + * b2: . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x707(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x707, i, j, k, ip1, j, k); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x707, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x70b: O + * . . + * b0: . . + * b1: . . + * b2: t->sw[i ][j+1][k ] . . + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x70b(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x70b, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x70b, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x70d: O + * . . + * b0: . . + * b1: t->sw[i+1][j ][k ] . . + * b2: . . + * b3: O O + * b4: t->sw[i ][j ][k+1] . O . . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x70d(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x70d, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x70d, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x70e: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: . . + * b2: . . + * b3: O O + * b4: t->sw[i ][j ][k+1] . O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * O + * O . O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x70e(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x70e, i, j, k, ip1, j, k); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x70e, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x713: O + * . . . + * b0: . . . + * b1: . . . + * b2: t->sw[i ][j+1][k ] . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: . O + * b5: t->sw[i+1][j ][k+1] . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * . . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x713(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x713, i, j, k, ip1, j, k); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x713, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x715: O + * . . . + * b0: . . . + * b1: t->sw[i+1][j ][k ] . . . + * b2: . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x715(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x715, i, j, k, i, jp1, k); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x715, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x723: O + * . . + * b0: . . + * b1: . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x723(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x723, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x723, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x72a: O + * . + * b0: t->sw[i ][j ][k ] . + * b1: . + * b2: t->sw[i ][j+1][k ] . + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * . O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x72a(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x72a, i, j, k, ip1, jp1, k); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x72a, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x731: O + * . . + * b0: . . + * b1: t->sw[i+1][j ][k ] . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: . O + * b5: . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . . O . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x731(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x731, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x731, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x732: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . + * . . O + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x732(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x732, i, j, k, ip1, j, k); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x732, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x745: O + * . . + * b0: . . + * b1: t->sw[i+1][j ][k ] . . + * b2: . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . . + * O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x745(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x745, i, j, k, i, j, k); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x745, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x74c: O + * . + * b0: t->sw[i ][j ][k ] . + * b1: t->sw[i+1][j ][k ] . + * b2: . + * b3: O O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . + * O . + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x74c(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x74c, i, j, k, ip1, jp1, k); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x74c, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x751: O + * . . + * b0: . . + * b1: t->sw[i+1][j ][k ] . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x751(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x751, i, j, k, i, j, k); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x751, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x754: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . + * O . . + * O O + * . + * . + * . + * . + * @ + */ +static +bool handle_case_0x754(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x754, i, j, k, i, jp1, k); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x754, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x770: O + * . + * b0: t->sw[i ][j ][k ] . + * b1: t->sw[i+1][j ][k ] . + * b2: t->sw[i ][j+1][k ] . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O + * b5: . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . + * . O . + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x770(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x770, i, j, k, ip1, j, kp1); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x770, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x78a: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: . . . + * . O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x78a(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x78a, i, j, k, ip1, j, k); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x78a, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x78c: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: O O + * b4: t->sw[i ][j ][k+1] . O . . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: . . . + * O . + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x78c(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x78c, i, j, k, i, jp1, k); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x78c, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7a2: O + * + * b0: t->sw[i ][j ][k ] + * b1: + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . . + * . . O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7a2(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7a2, i, j, k, ip1, j, k); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7a2, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7a8: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[ip1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: . . + * . O + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x7a8(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7a8, i, j, k, ip1, jp1, k); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[ip1][j][k]))) { + return true; + } + log_no_crnr(t, 0x7a8, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x7b0: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: . O + * b5: . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: . . . + * . . O . + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7b0(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x7b0, i, j, k, i, j, kp1); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7b0, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7c4: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: + * b3: t->sw[i+1][j+1][k ] O O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: . . + * b7: . . . + * O . . + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7c4(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7c4, i, j, k, i, jp1, k); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7c4, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7c8: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . + * b6: . . + * b7: . . + * O . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x7c8(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7c8, i, j, k, ip1, jp1, k); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7c8, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x7d0: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: . . . + * b7: . . . + * . O . . + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x7d0(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x7d0, i, j, k, i, j, kp1); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7d0, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7e0: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: t->sw[i ][j ][k+1] O + * b5: . . + * b6: . . + * b7: . . + * . O . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x7e0(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[ip1][j][k]))) { + return true; + } + log_no_crnr(t, 0x7e0, i, j, k, ip1, j, kp1); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7e0, i, j, k, i, jp1, kp1); + return false; +} + +/* + * Handle the cases where two corners on a single edge are missing. + */ + +/* + * 3D case 0x703: O + * . . . + * b0: . . . + * b1: . . . + * b2: t->sw[i ][j+1][k ] . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . . O . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * . . O + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x703(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x703, i, j, k, i, j, k); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x703, i, j, k, ip1, j, k); + return false; +} + +/* + * 3D case 0x705: O + * . . . + * b0: . . . + * b1: t->sw[i+1][j ][k ] . . . + * b2: . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . O . . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x705(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x705, i, j, k, i, j, k); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x705, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x70a: O + * . . . + * b0: t->sw[i ][j ][k ] . . + * b1: . . + * b2: t->sw[i ][j+1][k ] . . + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x70a(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x70a, i, j, k, ip1, j, k); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x70a, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x70c: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: . . + * b3: O O + * b4: t->sw[i ][j ][k+1] . O . . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * O . + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x70c(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x70c, i, j, k, i, jp1, k); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x70c, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x711: O + * . . . + * b0: . . . + * b1: t->sw[i+1][j ][k ] . . . + * b2: t->sw[i ][j+1][k ] . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: . O . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * . . O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x711(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x711, i, j, k, i, j, k); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x711, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x722: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . . + * b6: t->sw[i ][j+1][k+1] . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . . O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x722(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x722, i, j, k, ip1, j, k); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][jp1][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x722, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x730: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: . O + * b5: . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . . O . + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x730(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][jp1][k], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x730, i, j, k, i, j, kp1); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x730, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x744: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: . . + * b7: t->sw[i+1][j+1][k+1] . . . + * O . . + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x744(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x744, i, j, k, i, jp1, k); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[ip1][j][kp1], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x744, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x750: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: O . + * b5: t->sw[i+1][j ][k+1] . . . + * b6: . . . + * b7: t->sw[i+1][j+1][k+1] . . . + * . O . . + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x750(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x750, i, j, k, i, j, kp1); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[ip1][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x750, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x788: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[ip1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O . . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . . + * b7: . . . . + * . O . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x788(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x788, i, j, k, ip1, jp1, k); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x788, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7a0: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: . . . . + * . . O . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x7a0(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[ip1][j][k]))) { + return true; + } + log_no_crnr(t, 0x7a0, i, j, k, ip1, j, kp1); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][kp1], + t->sw[i][jp1][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7a0, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * 3D case 0x7c0: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: . . . + * b7: . . . . + * . O . . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x7c0(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][j][kp1], + t->sw[i][j][k], + t->sw[i][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7c0, i, j, k, i, jp1, kp1); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[ip1][j][kp1], + t->sw[ip1][j][k], + t->sw[ip1][jp1][k]))) { + return true; + } + log_no_crnr(t, 0x7c0, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * Handle the cases where a single corner is missing. + */ + +/* + * 3D case 0x701: O + * . . . + * b0: . . . + * b1: t->sw[i+1][j ][k ] . . . + * b2: t->sw[i ][j+1][k ] . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . . O . . + * b5: t->sw[i+1][j ][k+1] . . . . . . + * b6: t->sw[i ][j+1][k+1] . . . . + * b7: t->sw[i+1][j+1][k+1] . . . . . . + * . . O . . + * O O + * + * + * + * + * @ + */ +static +bool handle_case_0x701(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (install_tswitch(t, i, j, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[ip1][jp1][k], + t->sw[ip1][j][k]))) { + return true; + } + log_no_crnr(t, 0x701, i, j, k, i, j, k); + return false; +} + +/* + * 3D case 0x702: O + * . . . + * b0: t->sw[i ][j ][k ] . . . + * b1: . . . + * b2: t->sw[i ][j+1][k ] . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . . O . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * . . O + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x702(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x702, i, j, k, ip1, j, k); + return false; +} + +/* + * 3D case 0x704: O + * . . . + * b0: t->sw[i ][j ][k ] . . . + * b1: t->sw[i+1][j ][k ] . . . + * b2: . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . O . . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * O . . + * O . O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x704(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, k, + tfind_face_corner(t->sw[i][j][k], + t->sw[i][j][kp1], + t->sw[i][jp1][kp1]))) { + return true; + } + log_no_crnr(t, 0x704, i, j, k, i, jp1, k); + return false; +} + +/* + * 3D case 0x708: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: t->sw[i ][j+1][k ] . . + * b3: O O + * b4: t->sw[i ][j ][k+1] . . O . . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * . O . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x708(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + + if (install_tswitch(t, ip1, jp1, k, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][j][k], + t->sw[ip1][j][k]))) { + return true; + } + log_no_crnr(t, 0x708, i, j, k, ip1, jp1, k); + return false; +} + +/* + * 3D case 0x710: O + * . . . + * b0: t->sw[i ][j ][k ] . . . + * b1: t->sw[i+1][j ][k ] . . . + * b2: t->sw[i ][j+1][k ] . . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: . O . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: t->sw[i ][j+1][k+1] . . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * . . O . . + * O O + * . . + * . . + * . . + * . . + * @ + */ +static +bool handle_case_0x710(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, j, kp1, + tfind_face_corner(t->sw[i][j][k], + t->sw[ip1][j][k], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x710, i, j, k, i, j, kp1); + return false; +} + +/* + * 3D case 0x720: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] . . O + * b5: . . . . + * b6: t->sw[i ][j+1][k+1] . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * . . O . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x720(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, j, kp1, + tfind_face_corner(t->sw[ip1][j][k], + t->sw[i][j][k], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x720, i, j, k, ip1, j, kp1); + return false; +} + +/* + * 3D case 0x740: O + * . . + * b0: t->sw[i ][j ][k ] . . + * b1: t->sw[i+1][j ][k ] . . + * b2: t->sw[i ][j+1][k ] . . + * b3: t->sw[i+1][j+1][k ] O . O + * b4: t->sw[i ][j ][k+1] O . . + * b5: t->sw[i+1][j ][k+1] . . . . + * b6: . . . + * b7: t->sw[i+1][j+1][k+1] . . . . + * . O . . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x740(struct torus *t, int i, int j, int k) +{ + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, i, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][k], + t->sw[i][j][k], + t->sw[i][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x740, i, j, k, i, jp1, kp1); + return false; +} + +/* + * 3D case 0x780: O + * + * b0: t->sw[i ][j ][k ] + * b1: t->sw[i+1][j ][k ] + * b2: t->sw[i ][j+1][k ] + * b3: t->sw[i+1][j+1][k ] O O + * b4: t->sw[i ][j ][k+1] . . O . . + * b5: t->sw[i+1][j ][k+1] . . . . . . + * b6: t->sw[i ][j+1][k+1] . . . . + * b7: . . . . . . + * . . O . . + * O . O + * . . . + * . . . + * . . . + * . . . + * @ + */ +static +bool handle_case_0x780(struct torus *t, int i, int j, int k) +{ + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + if (install_tswitch(t, ip1, jp1, kp1, + tfind_face_corner(t->sw[i][jp1][kp1], + t->sw[i][j][kp1], + t->sw[ip1][j][kp1]))) { + return true; + } + log_no_crnr(t, 0x780, i, j, k, ip1, jp1, kp1); + return false; +} + +/* + * Make sure links between all known torus/mesh switches are installed. + * + * We don't have to worry about links that wrap on a mesh coordinate, as + * there shouldn't be any; if there are it indicates an input error. + */ +static +void check_tlinks(struct torus *t, int i, int j, int k) +{ + struct t_switch ****sw = t->sw; + int ip1 = canonicalize(i + 1, t->x_sz); + int jp1 = canonicalize(j + 1, t->y_sz); + int kp1 = canonicalize(k + 1, t->z_sz); + + /* + * Don't waste time/code checking return status of link_tswitches() + * here. It is unlikely to fail, and the result of any failure here + * will be caught elsewhere anyway. + */ + if (sw[i][j][k] && sw[ip1][j][k]) + link_tswitches(t, 0, sw[i][j][k], sw[ip1][j][k]); + + if (sw[i][jp1][k] && sw[ip1][jp1][k]) + link_tswitches(t, 0, sw[i][jp1][k], sw[ip1][jp1][k]); + + if (sw[i][j][kp1] && sw[ip1][j][kp1]) + link_tswitches(t, 0, sw[i][j][kp1], sw[ip1][j][kp1]); + + if (sw[i][jp1][kp1] && sw[ip1][jp1][kp1]) + link_tswitches(t, 0, sw[i][jp1][kp1], sw[ip1][jp1][kp1]); + + + if (sw[i][j][k] && sw[i][jp1][k]) + link_tswitches(t, 1, sw[i][j][k], sw[i][jp1][k]); + + if (sw[ip1][j][k] && sw[ip1][jp1][k]) + link_tswitches(t, 1, sw[ip1][j][k], sw[ip1][jp1][k]); + + if (sw[i][j][kp1] && sw[i][jp1][kp1]) + link_tswitches(t, 1, sw[i][j][kp1], sw[i][jp1][kp1]); + + if (sw[ip1][j][kp1] && sw[ip1][jp1][kp1]) + link_tswitches(t, 1, sw[ip1][j][kp1], sw[ip1][jp1][kp1]); + + + if (sw[i][j][k] && sw[i][j][kp1]) + link_tswitches(t, 2, sw[i][j][k], sw[i][j][kp1]); + + if (sw[ip1][j][k] && sw[ip1][j][kp1]) + link_tswitches(t, 2, sw[ip1][j][k], sw[ip1][j][kp1]); + + if (sw[i][jp1][k] && sw[i][jp1][kp1]) + link_tswitches(t, 2, sw[i][jp1][k], sw[i][jp1][kp1]); + + if (sw[ip1][jp1][k] && sw[ip1][jp1][kp1]) + link_tswitches(t, 2, sw[ip1][jp1][k], sw[ip1][jp1][kp1]); +} + +static +void locate_sw(struct torus *t, int i, int j, int k) +{ + unsigned fp; + bool success; + + i = canonicalize(i, t->x_sz); + j = canonicalize(j, t->y_sz); + k = canonicalize(k, t->z_sz); + + /* + * By definition, if a coordinate direction is meshed, we don't + * allow it to wrap to zero. + */ + if (t->flags & X_MESH) { + int ip1 = canonicalize(i + 1, t->x_sz); + if (ip1 < i) + goto out; + } + if (t->flags & Y_MESH) { + int jp1 = canonicalize(j + 1, t->y_sz); + if (jp1 < j) + goto out; + } + if (t->flags & Z_MESH) { + int kp1 = canonicalize(k + 1, t->z_sz); + if (kp1 < k) + goto out; + } + /* + * There are various reasons that the links are not installed between + * known torus switches. These include cases where the search for + * new switches only partially succeeds due to missing switches, and + * cases where we haven't processed this position yet, but processing + * of multiple independent neighbor positions has installed switches + * into corners of our case. + * + * In any event, the topology assumptions made in handling the + * fingerprint for this position require that all links be installed + * between installed switches for this position. + */ +again: + check_tlinks(t, i, j, k); + fp = fingerprint(t, i, j, k); + + switch (fp) { + /* + * When all switches are present, we are done. Otherwise, one of + * the cases below will be unsuccessful, and we'll be done also. + * + * Note that check_tlinks() above will ensure all links that are + * present are connected, in the event that all our switches are + * present due to successful case handling in the surrounding + * torus/mesh. + */ + case 0x300: + case 0x500: + case 0x600: + case 0x700: + goto out; + /* + * Ignore the 2D cases where there isn't enough information to uniquely + * locate/place a switch into the cube. + */ + case 0x30f: /* 0 corners available */ + case 0x533: /* 0 corners available */ + case 0x655: /* 0 corners available */ + case 0x30e: /* 1 corner available */ + case 0x532: /* 1 corner available */ + case 0x654: /* 1 corner available */ + case 0x30d: /* 1 corner available */ + case 0x531: /* 1 corner available */ + case 0x651: /* 1 corner available */ + case 0x30b: /* 1 corner available */ + case 0x523: /* 1 corner available */ + case 0x645: /* 1 corner available */ + case 0x307: /* 1 corner available */ + case 0x513: /* 1 corner available */ + case 0x615: /* 1 corner available */ + goto out; + /* + * Handle the 2D cases with a single existing edge. + * + */ + case 0x30c: + success = handle_case_0x30c(t, i, j, k); + break; + case 0x303: + success = handle_case_0x303(t, i, j, k); + break; + case 0x305: + success = handle_case_0x305(t, i, j, k); + break; + case 0x30a: + success = handle_case_0x30a(t, i, j, k); + break; + case 0x503: + success = handle_case_0x503(t, i, j, k); + break; + case 0x511: + success = handle_case_0x511(t, i, j, k); + break; + case 0x522: + success = handle_case_0x522(t, i, j, k); + break; + case 0x530: + success = handle_case_0x530(t, i, j, k); + break; + case 0x605: + success = handle_case_0x605(t, i, j, k); + break; + case 0x611: + success = handle_case_0x611(t, i, j, k); + break; + case 0x644: + success = handle_case_0x644(t, i, j, k); + break; + case 0x650: + success = handle_case_0x650(t, i, j, k); + break; + /* + * Handle the 2D cases where two existing edges meet at a corner. + */ + case 0x301: + success = handle_case_0x301(t, i, j, k); + break; + case 0x302: + success = handle_case_0x302(t, i, j, k); + break; + case 0x304: + success = handle_case_0x304(t, i, j, k); + break; + case 0x308: + success = handle_case_0x308(t, i, j, k); + break; + case 0x501: + success = handle_case_0x501(t, i, j, k); + break; + case 0x502: + success = handle_case_0x502(t, i, j, k); + break; + case 0x520: + success = handle_case_0x520(t, i, j, k); + break; + case 0x510: + success = handle_case_0x510(t, i, j, k); + break; + case 0x601: + success = handle_case_0x601(t, i, j, k); + break; + case 0x604: + success = handle_case_0x604(t, i, j, k); + break; + case 0x610: + success = handle_case_0x610(t, i, j, k); + break; + case 0x640: + success = handle_case_0x640(t, i, j, k); + break; + /* + * Ignore the 3D cases where there isn't enough information to uniquely + * locate/place a switch into the cube. + */ + case 0x7ff: /* 0 corners available */ + case 0x7fe: /* 1 corner available */ + case 0x7fd: /* 1 corner available */ + case 0x7fb: /* 1 corner available */ + case 0x7f7: /* 1 corner available */ + case 0x7ef: /* 1 corner available */ + case 0x7df: /* 1 corner available */ + case 0x7bf: /* 1 corner available */ + case 0x77f: /* 1 corner available */ + case 0x7fc: /* 2 adj corners available */ + case 0x7fa: /* 2 adj corners available */ + case 0x7f5: /* 2 adj corners available */ + case 0x7f3: /* 2 adj corners available */ + case 0x7cf: /* 2 adj corners available */ + case 0x7af: /* 2 adj corners available */ + case 0x75f: /* 2 adj corners available */ + case 0x73f: /* 2 adj corners available */ + case 0x7ee: /* 2 adj corners available */ + case 0x7dd: /* 2 adj corners available */ + case 0x7bb: /* 2 adj corners available */ + case 0x777: /* 2 adj corners available */ + goto out; + /* + * Handle the 3D cases where two existing edges meet at a corner. + * + */ + case 0x71f: + success = handle_case_0x71f(t, i, j, k); + break; + case 0x72f: + success = handle_case_0x72f(t, i, j, k); + break; + case 0x737: + success = handle_case_0x737(t, i, j, k); + break; + case 0x73b: + success = handle_case_0x73b(t, i, j, k); + break; + case 0x74f: + success = handle_case_0x74f(t, i, j, k); + break; + case 0x757: + success = handle_case_0x757(t, i, j, k); + break; + case 0x75d: + success = handle_case_0x75d(t, i, j, k); + break; + case 0x773: + success = handle_case_0x773(t, i, j, k); + break; + case 0x775: + success = handle_case_0x775(t, i, j, k); + break; + case 0x78f: + success = handle_case_0x78f(t, i, j, k); + break; + case 0x7ab: + success = handle_case_0x7ab(t, i, j, k); + break; + case 0x7ae: + success = handle_case_0x7ae(t, i, j, k); + break; + case 0x7b3: + success = handle_case_0x7b3(t, i, j, k); + break; + case 0x7ba: + success = handle_case_0x7ba(t, i, j, k); + break; + case 0x7cd: + success = handle_case_0x7cd(t, i, j, k); + break; + case 0x7ce: + success = handle_case_0x7ce(t, i, j, k); + break; + case 0x7d5: + success = handle_case_0x7d5(t, i, j, k); + break; + case 0x7dc: + success = handle_case_0x7dc(t, i, j, k); + break; + case 0x7ea: + success = handle_case_0x7ea(t, i, j, k); + break; + case 0x7ec: + success = handle_case_0x7ec(t, i, j, k); + break; + case 0x7f1: + success = handle_case_0x7f1(t, i, j, k); + break; + case 0x7f2: + success = handle_case_0x7f2(t, i, j, k); + break; + case 0x7f4: + success = handle_case_0x7f4(t, i, j, k); + break; + case 0x7f8: + success = handle_case_0x7f8(t, i, j, k); + break; + /* + * Handle the cases where three existing edges meet at a corner. + * + */ + case 0x717: + success = handle_case_0x717(t, i, j, k); + break; + case 0x72b: + success = handle_case_0x72b(t, i, j, k); + break; + case 0x74d: + success = handle_case_0x74d(t, i, j, k); + break; + case 0x771: + success = handle_case_0x771(t, i, j, k); + break; + case 0x78e: + success = handle_case_0x78e(t, i, j, k); + break; + case 0x7b2: + success = handle_case_0x7b2(t, i, j, k); + break; + case 0x7d4: + success = handle_case_0x7d4(t, i, j, k); + break; + case 0x7e8: + success = handle_case_0x7e8(t, i, j, k); + break; + /* + * Handle the cases where four corners on a single face are missing. + */ + case 0x70f: + success = handle_case_0x70f(t, i, j, k); + break; + case 0x733: + success = handle_case_0x733(t, i, j, k); + break; + case 0x755: + success = handle_case_0x755(t, i, j, k); + break; + case 0x7aa: + success = handle_case_0x7aa(t, i, j, k); + break; + case 0x7cc: + success = handle_case_0x7cc(t, i, j, k); + break; + case 0x7f0: + success = handle_case_0x7f0(t, i, j, k); + break; + /* + * Handle the cases where three corners on a single face are missing. + */ + case 0x707: + success = handle_case_0x707(t, i, j, k); + break; + case 0x70b: + success = handle_case_0x70b(t, i, j, k); + break; + case 0x70d: + success = handle_case_0x70d(t, i, j, k); + break; + case 0x70e: + success = handle_case_0x70e(t, i, j, k); + break; + case 0x713: + success = handle_case_0x713(t, i, j, k); + break; + case 0x715: + success = handle_case_0x715(t, i, j, k); + break; + case 0x723: + success = handle_case_0x723(t, i, j, k); + break; + case 0x72a: + success = handle_case_0x72a(t, i, j, k); + break; + case 0x731: + success = handle_case_0x731(t, i, j, k); + break; + case 0x732: + success = handle_case_0x732(t, i, j, k); + break; + case 0x745: + success = handle_case_0x745(t, i, j, k); + break; + case 0x74c: + success = handle_case_0x74c(t, i, j, k); + break; + case 0x751: + success = handle_case_0x751(t, i, j, k); + break; + case 0x754: + success = handle_case_0x754(t, i, j, k); + break; + case 0x770: + success = handle_case_0x770(t, i, j, k); + break; + case 0x78a: + success = handle_case_0x78a(t, i, j, k); + break; + case 0x78c: + success = handle_case_0x78c(t, i, j, k); + break; + case 0x7a2: + success = handle_case_0x7a2(t, i, j, k); + break; + case 0x7a8: + success = handle_case_0x7a8(t, i, j, k); + break; + case 0x7b0: + success = handle_case_0x7b0(t, i, j, k); + break; + case 0x7c4: + success = handle_case_0x7c4(t, i, j, k); + break; + case 0x7c8: + success = handle_case_0x7c8(t, i, j, k); + break; + case 0x7d0: + success = handle_case_0x7d0(t, i, j, k); + break; + case 0x7e0: + success = handle_case_0x7e0(t, i, j, k); + break; + /* + * Handle the cases where two corners on a single edge are missing. + */ + case 0x703: + success = handle_case_0x703(t, i, j, k); + break; + case 0x705: + success = handle_case_0x705(t, i, j, k); + break; + case 0x70a: + success = handle_case_0x70a(t, i, j, k); + break; + case 0x70c: + success = handle_case_0x70c(t, i, j, k); + break; + case 0x711: + success = handle_case_0x711(t, i, j, k); + break; + case 0x722: + success = handle_case_0x722(t, i, j, k); + break; + case 0x730: + success = handle_case_0x730(t, i, j, k); + break; + case 0x744: + success = handle_case_0x744(t, i, j, k); + break; + case 0x750: + success = handle_case_0x750(t, i, j, k); + break; + case 0x788: + success = handle_case_0x788(t, i, j, k); + break; + case 0x7a0: + success = handle_case_0x7a0(t, i, j, k); + break; + case 0x7c0: + success = handle_case_0x7c0(t, i, j, k); + break; + /* + * Handle the cases where a single corner is missing. + */ + case 0x701: + success = handle_case_0x701(t, i, j, k); + break; + case 0x702: + success = handle_case_0x702(t, i, j, k); + break; + case 0x704: + success = handle_case_0x704(t, i, j, k); + break; + case 0x708: + success = handle_case_0x708(t, i, j, k); + break; + case 0x710: + success = handle_case_0x710(t, i, j, k); + break; + case 0x720: + success = handle_case_0x720(t, i, j, k); + break; + case 0x740: + success = handle_case_0x740(t, i, j, k); + break; + case 0x780: + success = handle_case_0x780(t, i, j, k); + break; + + default: + /* + * There's lots of unhandled cases still, but it's not clear + * we care. Let debugging show us what they are so we can + * learn if we care. + */ + if (t->debug) + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "Unhandled fingerprint 0x%03x @ %d %d %d\n", + fp, i, j, k); + goto out; + } + /* + * If we successfully handled a case, we may be able to make more + * progress at this position, so try again. Otherwise, even though + * we didn't successfully handle a case, we may have installed a + * switch into the torus/mesh, so try to install links as well. + * Then we'll have another go at the next position. + */ + if (success) { + if (t->debug) + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "Success on fingerprint 0x%03x @ %d %d %d\n", + fp, i, j, k); + goto again; + } else { + check_tlinks(t, i, j, k); + if (t->debug) + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "Failed on fingerprint 0x%03x @ %d %d %d\n", + fp, i, j, k); + } +out: + return; +} + +#define LINK_ERR_STR " direction link required for topology seed configuration since radix == 4! See torus-2QoS.conf(5).\n" +#define LINK_ERR2_STR " direction link required for topology seed configuration! See torus-2QoS.conf(5).\n" +#define SEED_ERR_STR " direction links for topology seed do not share a common switch! See torus-2QoS.conf(5).\n" + +static +bool verify_setup(struct torus *t, struct fabric *f) +{ + struct coord_dirs *o; + struct f_switch *sw; + unsigned p, s, n = 0; + bool success = false; + bool all_sw_present, need_seed = true; + + if (!(t->x_sz && t->y_sz && t->z_sz)) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E20: missing required torus size specification!\n"); + goto out; + } + if (t->osm->subn.min_sw_data_vls < 2) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E48: Too few data VLs to support torus routing " + "without credit loops (have switchport %d need 2)\n", + (int)t->osm->subn.min_sw_data_vls); + goto out; + } + if (t->osm->subn.min_sw_data_vls < 4) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Warning: Too few data VLs to support torus routing " + "with a failed switch without credit loops " + "(have switchport %d need 4)\n", + (int)t->osm->subn.min_sw_data_vls); + if (t->osm->subn.min_sw_data_vls < 8) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Warning: Too few data VLs to support torus routing " + "with two QoS levels (have switchport %d need 8)\n", + (int)t->osm->subn.min_sw_data_vls); + if (t->osm->subn.min_data_vls < 2) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Warning: Too few data VLs to support torus routing " + "with two QoS levels (have endport %d need 2)\n", + (int)t->osm->subn.min_data_vls); + /* + * Be sure all the switches in the torus support the port + * ordering that might have been configured. + */ + for (s = 0; s < f->switch_cnt; s++) { + sw = f->sw[s]; + for (p = 0; p < sw->port_cnt; p++) { + if (t->port_order[p] >= sw->port_cnt) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E21: port_order configured using " + "port %u, but only %u ports in " + "switch w/ GUID 0x%04"PRIx64"\n", + t->port_order[p], sw->port_cnt - 1, + cl_ntoh64(sw->n_id)); + goto out; + } + } + } + /* + * Unfortunately, there is a problem with non-unique topology for any + * torus dimension which has radix four. This problem requires extra + * input, in the form of specifying both the positive and negative + * coordinate directions from a common switch, for any torus dimension + * with radix four (see also build_torus()). + * + * Do the checking required to ensure that the required information + * is present, but more than the needed information is not required. + * + * So, verify that we learned the coordinate directions correctly for + * the fabric. The coordinate direction links get an invalid port + * set on their ends when parsed. + */ +again: + all_sw_present = true; + o = &t->seed[n]; + + if (t->x_sz == 4 && !(t->flags & X_MESH)) { + if (o->xp_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E22: Positive x" LINK_ERR_STR); + goto out; + } + if (o->xm_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E23: Negative x" LINK_ERR_STR); + goto out; + } + if (o->xp_link.end[0].n_id != o->xm_link.end[0].n_id) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E24: Positive/negative x" SEED_ERR_STR); + goto out; + } + } + if (t->y_sz == 4 && !(t->flags & Y_MESH)) { + if (o->yp_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E25: Positive y" LINK_ERR_STR); + goto out; + } + if (o->ym_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E26: Negative y" LINK_ERR_STR); + goto out; + } + if (o->yp_link.end[0].n_id != o->ym_link.end[0].n_id) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E27: Positive/negative y" SEED_ERR_STR); + goto out; + } + } + if (t->z_sz == 4 && !(t->flags & Z_MESH)) { + if (o->zp_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E28: Positive z" LINK_ERR_STR); + goto out; + } + if (o->zm_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E29: Negative z" LINK_ERR_STR); + goto out; + } + if (o->zp_link.end[0].n_id != o->zm_link.end[0].n_id) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E2A: Positive/negative z" SEED_ERR_STR); + goto out; + } + } + if (t->x_sz > 1) { + if (o->xp_link.end[0].port >= 0 && + o->xm_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E2B: Positive or negative x" LINK_ERR2_STR); + goto out; + } + if (o->xp_link.end[0].port < 0 && + !find_f_sw(f, o->xp_link.end[0].n_id)) + all_sw_present = false; + + if (o->xp_link.end[1].port < 0 && + !find_f_sw(f, o->xp_link.end[1].n_id)) + all_sw_present = false; + + if (o->xm_link.end[0].port < 0 && + !find_f_sw(f, o->xm_link.end[0].n_id)) + all_sw_present = false; + + if (o->xm_link.end[1].port < 0 && + !find_f_sw(f, o->xm_link.end[1].n_id)) + all_sw_present = false; + } + if (t->z_sz > 1) { + if (o->zp_link.end[0].port >= 0 && + o->zm_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E2C: Positive or negative z" LINK_ERR2_STR); + goto out; + } + if ((o->xp_link.end[0].port < 0 && + o->zp_link.end[0].port < 0 && + o->zp_link.end[0].n_id != o->xp_link.end[0].n_id) || + + (o->xp_link.end[0].port < 0 && + o->zm_link.end[0].port < 0 && + o->zm_link.end[0].n_id != o->xp_link.end[0].n_id) || + + (o->xm_link.end[0].port < 0 && + o->zp_link.end[0].port < 0 && + o->zp_link.end[0].n_id != o->xm_link.end[0].n_id) || + + (o->xm_link.end[0].port < 0 && + o->zm_link.end[0].port < 0 && + o->zm_link.end[0].n_id != o->xm_link.end[0].n_id)) { + + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E2D: x and z" SEED_ERR_STR); + goto out; + } + if (o->zp_link.end[0].port < 0 && + !find_f_sw(f, o->zp_link.end[0].n_id)) + all_sw_present = false; + + if (o->zp_link.end[1].port < 0 && + !find_f_sw(f, o->zp_link.end[1].n_id)) + all_sw_present = false; + + if (o->zm_link.end[0].port < 0 && + !find_f_sw(f, o->zm_link.end[0].n_id)) + all_sw_present = false; + + if (o->zm_link.end[1].port < 0 && + !find_f_sw(f, o->zm_link.end[1].n_id)) + all_sw_present = false; + } + if (t->y_sz > 1) { + if (o->yp_link.end[0].port >= 0 && + o->ym_link.end[0].port >= 0) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E2E: Positive or negative y" LINK_ERR2_STR); + goto out; + } + if ((o->xp_link.end[0].port < 0 && + o->yp_link.end[0].port < 0 && + o->yp_link.end[0].n_id != o->xp_link.end[0].n_id) || + + (o->xp_link.end[0].port < 0 && + o->ym_link.end[0].port < 0 && + o->ym_link.end[0].n_id != o->xp_link.end[0].n_id) || + + (o->xm_link.end[0].port < 0 && + o->yp_link.end[0].port < 0 && + o->yp_link.end[0].n_id != o->xm_link.end[0].n_id) || + + (o->xm_link.end[0].port < 0 && + o->ym_link.end[0].port < 0 && + o->ym_link.end[0].n_id != o->xm_link.end[0].n_id)) { + + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E2F: x and y" SEED_ERR_STR); + goto out; + } + if (o->yp_link.end[0].port < 0 && + !find_f_sw(f, o->yp_link.end[0].n_id)) + all_sw_present = false; + + if (o->yp_link.end[1].port < 0 && + !find_f_sw(f, o->yp_link.end[1].n_id)) + all_sw_present = false; + + if (o->ym_link.end[0].port < 0 && + !find_f_sw(f, o->ym_link.end[0].n_id)) + all_sw_present = false; + + if (o->ym_link.end[1].port < 0 && + !find_f_sw(f, o->ym_link.end[1].n_id)) + all_sw_present = false; + } + if (all_sw_present && need_seed) { + t->seed_idx = n; + need_seed = false; + } + if (++n < t->seed_cnt) + goto again; + + if (need_seed) + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E30: Every configured torus seed has at " + "least one switch missing in fabric! See " + "torus-2QoS.conf(5) and TORUS TOPOLOGY DISCOVERY " + "in torus-2QoS(8)\n"); + else + success = true; +out: + return success; +} + +static +bool build_torus(struct fabric *f, struct torus *t) +{ + int i, j, k; + int im1, jm1, km1; + int ip1, jp1, kp1; + unsigned nlink; + struct coord_dirs *o; + struct f_switch *fsw0, *fsw1; + struct t_switch ****sw = t->sw; + bool success = true; + + t->link_pool_sz = f->link_cnt; + t->link_pool = calloc(1, t->link_pool_sz * sizeof(*t->link_pool)); + if (!t->link_pool) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E31: Allocating torus link pool: %s\n", + strerror(errno)); + goto out; + } + t->fabric = f; + + /* + * Get things started by locating the up to seven switches that + * define the torus "seed", coordinate directions, and datelines. + */ + o = &t->seed[t->seed_idx]; + + i = canonicalize(-o->x_dateline, t->x_sz); + j = canonicalize(-o->y_dateline, t->y_sz); + k = canonicalize(-o->z_dateline, t->z_sz); + + if (o->xp_link.end[0].port < 0) { + ip1 = canonicalize(1 - o->x_dateline, t->x_sz); + fsw0 = find_f_sw(f, o->xp_link.end[0].n_id); + fsw1 = find_f_sw(f, o->xp_link.end[1].n_id); + success = + install_tswitch(t, i, j, k, fsw0) && + install_tswitch(t, ip1, j, k, fsw1) && success; + } + if (o->xm_link.end[0].port < 0) { + im1 = canonicalize(-1 - o->x_dateline, t->x_sz); + fsw0 = find_f_sw(f, o->xm_link.end[0].n_id); + fsw1 = find_f_sw(f, o->xm_link.end[1].n_id); + success = + install_tswitch(t, i, j, k, fsw0) && + install_tswitch(t, im1, j, k, fsw1) && success; + } + if (o->yp_link.end[0].port < 0) { + jp1 = canonicalize(1 - o->y_dateline, t->y_sz); + fsw0 = find_f_sw(f, o->yp_link.end[0].n_id); + fsw1 = find_f_sw(f, o->yp_link.end[1].n_id); + success = + install_tswitch(t, i, j, k, fsw0) && + install_tswitch(t, i, jp1, k, fsw1) && success; + } + if (o->ym_link.end[0].port < 0) { + jm1 = canonicalize(-1 - o->y_dateline, t->y_sz); + fsw0 = find_f_sw(f, o->ym_link.end[0].n_id); + fsw1 = find_f_sw(f, o->ym_link.end[1].n_id); + success = + install_tswitch(t, i, j, k, fsw0) && + install_tswitch(t, i, jm1, k, fsw1) && success; + } + if (o->zp_link.end[0].port < 0) { + kp1 = canonicalize(1 - o->z_dateline, t->z_sz); + fsw0 = find_f_sw(f, o->zp_link.end[0].n_id); + fsw1 = find_f_sw(f, o->zp_link.end[1].n_id); + success = + install_tswitch(t, i, j, k, fsw0) && + install_tswitch(t, i, j, kp1, fsw1) && success; + } + if (o->zm_link.end[0].port < 0) { + km1 = canonicalize(-1 - o->z_dateline, t->z_sz); + fsw0 = find_f_sw(f, o->zm_link.end[0].n_id); + fsw1 = find_f_sw(f, o->zm_link.end[1].n_id); + success = + install_tswitch(t, i, j, k, fsw0) && + install_tswitch(t, i, j, km1, fsw1) && success; + } + if (!success) + goto out; + + if (!t->seed_idx) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Using torus seed configured as default " + "(seed sw %d,%d,%d GUID 0x%04"PRIx64").\n", + i, j, k, cl_ntoh64(sw[i][j][k]->n_id)); + else + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Using torus seed configured as backup #%u " + "(seed sw %d,%d,%d GUID 0x%04"PRIx64").\n", + t->seed_idx, i, j, k, cl_ntoh64(sw[i][j][k]->n_id)); + + /* + * Search the fabric and construct the expected torus topology. + * + * The algorithm is to consider the "cube" formed by eight switch + * locations bounded by the corners i, j, k and i+1, j+1, k+1. + * For each such cube look at the topology of the switches already + * placed in the torus, and deduce which new switches can be placed + * into their proper locations in the torus. Examine each cube + * multiple times, until the number of links moved into the torus + * topology does not change. + */ +again: + nlink = t->link_cnt; + + for (k = 0; k < (int)t->z_sz; k++) + for (j = 0; j < (int)t->y_sz; j++) + for (i = 0; i < (int)t->x_sz; i++) + locate_sw(t, i, j, k); + + if (t->link_cnt != nlink) + goto again; + + /* + * Move all other endpoints into torus/mesh. + */ + for (k = 0; k < (int)t->z_sz; k++) + for (j = 0; j < (int)t->y_sz; j++) + for (i = 0; i < (int)t->x_sz; i++) + if (!link_srcsink(t, i, j, k)) { + success = false; + goto out; + } +out: + return success; +} + +/* + * Returns a count of differences between old and new switches. + */ +static +unsigned tsw_changes(struct t_switch *nsw, struct t_switch *osw) +{ + unsigned p, cnt = 0, port_cnt; + struct endpoint *npt, *opt; + struct endpoint *rnpt, *ropt; + + if (nsw && !osw) { + cnt++; + OSM_LOG(&nsw->torus->osm->log, OSM_LOG_INFO, + "New torus switch %d,%d,%d GUID 0x%04"PRIx64"\n", + nsw->i, nsw->j, nsw->k, cl_ntoh64(nsw->n_id)); + goto out; + } + if (osw && !nsw) { + cnt++; + OSM_LOG(&osw->torus->osm->log, OSM_LOG_INFO, + "Lost torus switch %d,%d,%d GUID 0x%04"PRIx64"\n", + osw->i, osw->j, osw->k, cl_ntoh64(osw->n_id)); + goto out; + } + if (!(nsw && osw)) + goto out; + + if (nsw->n_id != osw->n_id) { + cnt++; + OSM_LOG(&nsw->torus->osm->log, OSM_LOG_INFO, + "Torus switch %d,%d,%d GUID " + "was 0x%04"PRIx64", now 0x%04"PRIx64"\n", + nsw->i, nsw->j, nsw->k, + cl_ntoh64(osw->n_id), cl_ntoh64(nsw->n_id)); + } + + if (nsw->port_cnt != osw->port_cnt) { + cnt++; + OSM_LOG(&nsw->torus->osm->log, OSM_LOG_INFO, + "Torus switch %d,%d,%d GUID 0x%04"PRIx64" " + "had %d ports, now has %d\n", + nsw->i, nsw->j, nsw->k, cl_ntoh64(nsw->n_id), + osw->port_cnt, nsw->port_cnt); + } + port_cnt = nsw->port_cnt; + if (port_cnt > osw->port_cnt) + port_cnt = osw->port_cnt; + + for (p = 0; p < port_cnt; p++) { + npt = nsw->port[p]; + opt = osw->port[p]; + + if (npt && npt->link) { + if (&npt->link->end[0] == npt) + rnpt = &npt->link->end[1]; + else + rnpt = &npt->link->end[0]; + } else + rnpt = NULL; + + if (opt && opt->link) { + if (&opt->link->end[0] == opt) + ropt = &opt->link->end[1]; + else + ropt = &opt->link->end[0]; + } else + ropt = NULL; + + if (rnpt && !ropt) { + ++cnt; + OSM_LOG(&nsw->torus->osm->log, OSM_LOG_INFO, + "Torus switch %d,%d,%d GUID 0x%04"PRIx64"[%d] " + "remote now %s GUID 0x%04"PRIx64"[%d], " + "was missing\n", + nsw->i, nsw->j, nsw->k, cl_ntoh64(nsw->n_id), + p, rnpt->type == PASSTHRU ? "sw" : "node", + cl_ntoh64(rnpt->n_id), rnpt->port); + continue; + } + if (ropt && !rnpt) { + ++cnt; + OSM_LOG(&nsw->torus->osm->log, OSM_LOG_INFO, + "Torus switch %d,%d,%d GUID 0x%04"PRIx64"[%d] " + "remote now missing, " + "was %s GUID 0x%04"PRIx64"[%d]\n", + osw->i, osw->j, osw->k, cl_ntoh64(nsw->n_id), + p, ropt->type == PASSTHRU ? "sw" : "node", + cl_ntoh64(ropt->n_id), ropt->port); + continue; + } + if (!(rnpt && ropt)) + continue; + + if (rnpt->n_id != ropt->n_id) { + ++cnt; + OSM_LOG(&nsw->torus->osm->log, OSM_LOG_INFO, + "Torus switch %d,%d,%d GUID 0x%04"PRIx64"[%d] " + "remote now %s GUID 0x%04"PRIx64"[%d], " + "was %s GUID 0x%04"PRIx64"[%d]\n", + nsw->i, nsw->j, nsw->k, cl_ntoh64(nsw->n_id), + p, rnpt->type == PASSTHRU ? "sw" : "node", + cl_ntoh64(rnpt->n_id), rnpt->port, + ropt->type == PASSTHRU ? "sw" : "node", + cl_ntoh64(ropt->n_id), ropt->port); + continue; + } + } +out: + return cnt; +} + +static +void dump_torus(struct torus *t) +{ + unsigned i, j, k; + unsigned x_sz = t->x_sz; + unsigned y_sz = t->y_sz; + unsigned z_sz = t->z_sz; + char path[1024]; + FILE *file; + + snprintf(path, sizeof(path), "%s/%s", t->osm->subn.opt.dump_files_dir, + "opensm-torus.dump"); + file = fopen(path, "w"); + if (!file) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E47: cannot create file \'%s\'\n", path); + return; + } + + for (k = 0; k < z_sz; k++) + for (j = 0; j < y_sz; j++) + for (i = 0; i < x_sz; i++) + if (t->sw[i][j][k]) + fprintf(file, "switch %u,%u,%u GUID 0x%04" + PRIx64 " (%s)\n", + i, j, k, + cl_ntoh64(t->sw[i][j][k]->n_id), + t->sw[i][j][k]->osm_switch->p_node->print_desc); + fclose(file); +} + +static +void report_torus_changes(struct torus *nt, struct torus *ot) +{ + unsigned cnt = 0; + unsigned i, j, k; + unsigned x_sz = nt->x_sz; + unsigned y_sz = nt->y_sz; + unsigned z_sz = nt->z_sz; + unsigned max_changes = nt->max_changes; + + if (OSM_LOG_IS_ACTIVE_V2(&nt->osm->log, OSM_LOG_ROUTING)) + dump_torus(nt); + + if (!ot) + return; + + if (x_sz != ot->x_sz) { + cnt++; + OSM_LOG(&nt->osm->log, OSM_LOG_INFO, + "Torus x radix was %d now %d\n", + ot->x_sz, nt->x_sz); + if (x_sz > ot->x_sz) + x_sz = ot->x_sz; + } + if (y_sz != ot->y_sz) { + cnt++; + OSM_LOG(&nt->osm->log, OSM_LOG_INFO, + "Torus y radix was %d now %d\n", + ot->y_sz, nt->y_sz); + if (y_sz > ot->y_sz) + y_sz = ot->y_sz; + } + if (z_sz != ot->z_sz) { + cnt++; + OSM_LOG(&nt->osm->log, OSM_LOG_INFO, + "Torus z radix was %d now %d\n", + ot->z_sz, nt->z_sz); + if (z_sz > ot->z_sz) + z_sz = ot->z_sz; + } + + for (k = 0; k < z_sz; k++) + for (j = 0; j < y_sz; j++) + for (i = 0; i < x_sz; i++) { + cnt += tsw_changes(nt->sw[i][j][k], + ot->sw[i][j][k]); + /* + * Booting a big fabric will cause lots of + * changes as hosts come up, so don't spew. + * We want to log changes to learn more about + * bouncing links, etc, so they can be fixed. + */ + if (cnt > max_changes) { + OSM_LOG(&nt->osm->log, OSM_LOG_INFO, + "Too many torus changes; " + "stopping reporting early\n"); + return; + } + } +} + +static +void rpt_torus_missing(struct torus *t, int i, int j, int k, + struct t_switch *sw, int *missing_z) +{ + uint64_t guid_ho; + + if (!sw) { + /* + * We can have multiple missing switches without deadlock + * if and only if they are adajacent in the Z direction. + */ + if ((t->switch_cnt + 1) < t->sw_pool_sz) { + if (t->sw[i][j][canonicalize(k - 1, t->z_sz)] && + t->sw[i][j][canonicalize(k + 1, t->z_sz)]) + t->flags |= MSG_DEADLOCK; + } + /* + * There can be only one such Z-column of missing switches. + */ + if (*missing_z < 0) + *missing_z = i + j * t->x_sz; + else if (*missing_z != i + j * t->x_sz) + t->flags |= MSG_DEADLOCK; + + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Missing torus switch at %d,%d,%d\n", i, j, k); + return; + } + guid_ho = cl_ntoh64(sw->n_id); + + if (!(sw->ptgrp[0].port_cnt || (t->x_sz == 1) || + ((t->flags & X_MESH) && i == 0))) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Missing torus -x link on " + "switch %d,%d,%d GUID 0x%04"PRIx64"\n", + i, j, k, guid_ho); + if (!(sw->ptgrp[1].port_cnt || (t->x_sz == 1) || + ((t->flags & X_MESH) && (i + 1) == t->x_sz))) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Missing torus +x link on " + "switch %d,%d,%d GUID 0x%04"PRIx64"\n", + i, j, k, guid_ho); + if (!(sw->ptgrp[2].port_cnt || (t->y_sz == 1) || + ((t->flags & Y_MESH) && j == 0))) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Missing torus -y link on " + "switch %d,%d,%d GUID 0x%04"PRIx64"\n", + i, j, k, guid_ho); + if (!(sw->ptgrp[3].port_cnt || (t->y_sz == 1) || + ((t->flags & Y_MESH) && (j + 1) == t->y_sz))) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Missing torus +y link on " + "switch %d,%d,%d GUID 0x%04"PRIx64"\n", + i, j, k, guid_ho); + if (!(sw->ptgrp[4].port_cnt || (t->z_sz == 1) || + ((t->flags & Z_MESH) && k == 0))) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Missing torus -z link on " + "switch %d,%d,%d GUID 0x%04"PRIx64"\n", + i, j, k, guid_ho); + if (!(sw->ptgrp[5].port_cnt || (t->z_sz == 1) || + ((t->flags & Z_MESH) && (k + 1) == t->z_sz))) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Missing torus +z link on " + "switch %d,%d,%d GUID 0x%04"PRIx64"\n", + i, j, k, guid_ho); +} + +/* + * Returns true if the torus can be successfully routed, false otherwise. + */ +static +bool routable_torus(struct torus *t, struct fabric *f) +{ + int i, j, k, tmp = -1; + unsigned b2g_cnt, g2b_cnt; + bool success = true; + + t->flags &= ~MSG_DEADLOCK; + + if (t->link_cnt != f->link_cnt || t->switch_cnt != f->switch_cnt) + OSM_LOG(&t->osm->log, OSM_LOG_INFO, + "Warning: Could not construct torus using all " + "known fabric switches and/or links.\n"); + + for (k = 0; k < (int)t->z_sz; k++) + for (j = 0; j < (int)t->y_sz; j++) + for (i = 0; i < (int)t->x_sz; i++) + rpt_torus_missing(t, i, j, k, + t->sw[i][j][k], &tmp); + /* + * Check for multiple failures that create disjoint regions on a ring. + */ + for (k = 0; k < (int)t->z_sz; k++) + for (j = 0; j < (int)t->y_sz; j++) { + b2g_cnt = 0; + g2b_cnt = 0; + for (i = 0; i < (int)t->x_sz; i++) { + + if (!t->sw[i][j][k]) + continue; + + if (!t->sw[i][j][k]->ptgrp[0].port_cnt) + b2g_cnt++; + if (!t->sw[i][j][k]->ptgrp[1].port_cnt) + g2b_cnt++; + } + if (b2g_cnt != g2b_cnt) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E32: strange failures in " + "x ring at y=%d z=%d" + " b2g_cnt %u g2b_cnt %u\n", + j, k, b2g_cnt, g2b_cnt); + success = false; + } + if (b2g_cnt > 1) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E33: disjoint failures in " + "x ring at y=%d z=%d\n", j, k); + success = false; + } + } + + for (i = 0; i < (int)t->x_sz; i++) + for (k = 0; k < (int)t->z_sz; k++) { + b2g_cnt = 0; + g2b_cnt = 0; + for (j = 0; j < (int)t->y_sz; j++) { + + if (!t->sw[i][j][k]) + continue; + + if (!t->sw[i][j][k]->ptgrp[2].port_cnt) + b2g_cnt++; + if (!t->sw[i][j][k]->ptgrp[3].port_cnt) + g2b_cnt++; + } + if (b2g_cnt != g2b_cnt) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E34: strange failures in " + "y ring at x=%d z=%d" + " b2g_cnt %u g2b_cnt %u\n", + i, k, b2g_cnt, g2b_cnt); + success = false; + } + if (b2g_cnt > 1) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E35: disjoint failures in " + "y ring at x=%d z=%d\n", i, k); + success = false; + } + } + + for (j = 0; j < (int)t->y_sz; j++) + for (i = 0; i < (int)t->x_sz; i++) { + b2g_cnt = 0; + g2b_cnt = 0; + for (k = 0; k < (int)t->z_sz; k++) { + + if (!t->sw[i][j][k]) + continue; + + if (!t->sw[i][j][k]->ptgrp[4].port_cnt) + b2g_cnt++; + if (!t->sw[i][j][k]->ptgrp[5].port_cnt) + g2b_cnt++; + } + if (b2g_cnt != g2b_cnt) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E36: strange failures in " + "z ring at x=%d y=%d" + " b2g_cnt %u g2b_cnt %u\n", + i, j, b2g_cnt, g2b_cnt); + success = false; + } + if (b2g_cnt > 1) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E37: disjoint failures in " + "z ring at x=%d y=%d\n", i, j); + success = false; + } + } + + if (t->flags & MSG_DEADLOCK) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E38: missing switch topology " + "==> message deadlock!\n"); + success = false; + } + return success; +} + +/* + * Use this function to re-establish the pointers between a torus endpoint + * and an opensm osm_port_t. + * + * Typically this is only needed when "opensm --ucast-cache" is used, and + * a CA link bounces. When the CA port goes away, the osm_port_t object + * is destroyed, invalidating the endpoint osm_port_t pointer. When the + * link comes back, a new osm_port_t object is created with a NULL priv + * member. Thus, when osm_get_torus_sl() is called it is missing the data + * needed to do its work. Use this function to fix things up. + */ +static +struct endpoint *osm_port_relink_endpoint(const osm_port_t *osm_port) +{ + guid_t node_guid; + uint8_t port_num, r_port_num; + struct t_switch *sw; + struct endpoint *ep = NULL; + osm_switch_t *osm_sw; + osm_physp_t *osm_physp; + osm_node_t *osm_node, *r_osm_node; + + /* + * We need to find the torus endpoint that has the same GUID as + * the osm_port. Rather than search the entire set of endpoints, + * we'll try to follow pointers. + */ + osm_physp = osm_port->p_physp; + osm_node = osm_port->p_node; + port_num = osm_physp_get_port_num(osm_physp); + node_guid = osm_node_get_node_guid(osm_node); + /* + * Switch management port? + */ + if (port_num == 0 && + osm_node_get_type(osm_node) == IB_NODE_TYPE_SWITCH) { + + osm_sw = osm_node->sw; + if (osm_sw && osm_sw->priv) { + sw = osm_sw->priv; + if (sw->osm_switch == osm_sw && + sw->port[0]->n_id == node_guid) { + + ep = sw->port[0]; + goto relink_priv; + } + } + } + /* + * CA port? Try other end of link. This should also catch a + * router port if it is connected to a switch. + */ + r_osm_node = osm_node_get_remote_node(osm_node, port_num, &r_port_num); + if (!r_osm_node) + goto out; + + osm_sw = r_osm_node->sw; + if (!osm_sw) + goto out; + + sw = osm_sw->priv; + if (!(sw && sw->osm_switch == osm_sw)) + goto out; + + ep = sw->port[r_port_num]; + if (!(ep && ep->link)) + goto out; + + if (ep->link->end[0].n_id == node_guid) { + ep = &ep->link->end[0]; + goto relink_priv; + } + if (ep->link->end[1].n_id == node_guid) { + ep = &ep->link->end[1]; + goto relink_priv; + } + ep = NULL; + goto out; + +relink_priv: + /* FIXME: + * Unfortunately, we need to cast away const to rebuild the links + * between the torus endpoint and the osm_port_t. + * + * What is really needed is to check whether pr_rcv_get_path_parms() + * needs its port objects to be const. If so, why, and whether + * anything can be done about it. + */ + ((osm_port_t *)osm_port)->priv = ep; + ep->osm_port = (osm_port_t *)osm_port; +out: + return ep; +} + +/* + * Computing LFT entries and path SL values: + * + * For a pristine torus, we compute LFT entries using XYZ DOR, and select + * which direction to route on a ring (i.e., the 1-D torus for the coordinate + * in question) based on shortest path. We compute the SL to use for the + * path based on whether we crossed a dateline (where a ring coordinate + * wraps to zero) for each coordinate. + * + * When there is a link/switch failure, we want to compute LFT entries + * to route around the failure, without changing the path SL. I.e., we + * want the SL to reach a given destination from a given source to be + * independent of the presence or number of failed components in the fabric. + * + * In order to make this feasible, we will assume that no ring is broken + * into disjoint pieces by multiple failures + * + * We handle failure by attempting to take the long way around any ring + * with connectivity interrupted by failed components, unless the path + * requires a turn on a failed switch. + * + * For paths that require a turn on a failed switch, we head towards the + * failed switch, then turn when progress is blocked by a failure, using a + * turn allowed under XYZ DOR. However, such a path will also require a turn + * that is not a legal XYZ DOR turn, so we construct the SL2VL mapping tables + * such that XYZ DOR turns use one set of VLs and ZYX DOR turns use a + * separate set of VLs. + * + * Under these rules the algorithm guarantees credit-loop-free routing for a + * single failed switch, without any change in path SL values. We can also + * guarantee credit-loop-free routing for failures of multiple switches, if + * they are adjacent in the last DOR direction. Since we use XYZ-DOR, + * that means failed switches at i,j,k and i,j,k+1 will not cause credit + * loops. + * + * These failure routing rules are intended to prevent paths that cross any + * coordinate dateline twice (over and back), so we don't need to worry about + * any ambiguity over which SL to use for such a case. Also, we cannot have + * a ring deadlock when a ring is broken by failure and we route the long + * way around, so we don't need to worry about the impact of such routing + * on SL choice. + */ + +/* + * Functions to set our SL bit encoding for routing/QoS info. Combine the + * resuts of these functions with bitwise or to get final SL. + * + * SL bits 0-2 encode whether we "looped" in a given direction + * on the torus on the path from source to destination. + * + * SL bit 3 encodes the QoS level. We only support two QoS levels. + * + * Below we assume TORUS_MAX_DIM == 3 and 0 <= coord_dir < TORUS_MAX_DIM. + */ +static inline +unsigned sl_set_use_loop_vl(bool use_loop_vl, unsigned coord_dir) +{ + return (coord_dir < TORUS_MAX_DIM) + ? ((unsigned)use_loop_vl << coord_dir) : 0; +} + +static inline +unsigned sl_set_qos(unsigned qos) +{ + return (unsigned)(!!qos) << TORUS_MAX_DIM; +} + +/* + * Functions to crack our SL bit encoding for routing/QoS info. + */ +static inline +bool sl_get_use_loop_vl(unsigned sl, unsigned coord_dir) +{ + return (coord_dir < TORUS_MAX_DIM) + ? (sl >> coord_dir) & 0x1 : false; +} + +static inline +unsigned sl_get_qos(unsigned sl) +{ + return (sl >> TORUS_MAX_DIM) & 0x1; +} + +/* + * Functions to encode routing/QoS info into VL bits. Combine the resuts of + * these functions with bitwise or to get final VL. + * + * For interswitch links: + * VL bit 0 encodes whether we need to leave on the "loop" VL. + * + * VL bit 1 encodes whether turn is XYZ DOR or ZYX DOR. A 3d mesh/torus + * has 6 turn types: x-y, y-z, x-z, y-x, z-y, z-x. The first three are + * legal XYZ DOR turns, and the second three are legal ZYX DOR turns. + * Straight-through (x-x, y-y, z-z) paths are legal in both DOR variants, + * so we'll assign them to XYZ DOR VLs. + * + * Note that delivery to switch-local ports (i.e. those that source/sink + * traffic, rather than forwarding it) cannot cause a deadlock, so that + * can also use either XYZ or ZYX DOR. + * + * VL bit 2 encodes QoS level. + * + * For end port links: + * VL bit 0 encodes QoS level. + * + * Note that if VL bit encodings are changed here, the available fabric VL + * verification in verify_setup() needs to be updated as well. + */ +static inline +unsigned vl_set_loop_vl(bool use_loop_vl) +{ + return use_loop_vl; +} + +static inline +unsigned vl_set_qos_vl(unsigned qos) +{ + return (qos & 0x1) << 2; +} + +static inline +unsigned vl_set_ca_qos_vl(unsigned qos) +{ + return qos & 0x1; +} + +static inline +unsigned vl_set_turn_vl(unsigned in_coord_dir, unsigned out_coord_dir) +{ + unsigned vl = 0; + + if (in_coord_dir != TORUS_MAX_DIM && + out_coord_dir != TORUS_MAX_DIM) + vl = (in_coord_dir > out_coord_dir) + ? 0x1 << 1 : 0; + + return vl; +} + +static +unsigned sl2vl_entry(struct torus *t, struct t_switch *sw, + int input_pt, int output_pt, unsigned sl) +{ + unsigned id, od, vl, data_vls; + + if (sw && sw->port[input_pt]) + id = sw->port[input_pt]->pgrp->port_grp / 2; + else + id = TORUS_MAX_DIM; + + if (sw && sw->port[output_pt]) + od = sw->port[output_pt]->pgrp->port_grp / 2; + else + od = TORUS_MAX_DIM; + + if (sw) + data_vls = t->osm->subn.min_sw_data_vls; + else + data_vls = t->osm->subn.min_data_vls; + + vl = 0; + if (sw && od != TORUS_MAX_DIM) { + if (data_vls >= 2) + vl |= vl_set_loop_vl(sl_get_use_loop_vl(sl, od)); + if (data_vls >= 4) + vl |= vl_set_turn_vl(id, od); + if (data_vls >= 8) + vl |= vl_set_qos_vl(sl_get_qos(sl)); + } else { + if (data_vls >= 2) + vl |= vl_set_ca_qos_vl(sl_get_qos(sl)); + } + return vl; +} + +static +void torus_update_osm_sl2vl(void *context, osm_physp_t *osm_phys_port, + uint8_t iport_num, uint8_t oport_num, + ib_slvl_table_t *osm_oport_sl2vl) +{ + osm_node_t *node = osm_physp_get_node_ptr(osm_phys_port); + struct torus_context *ctx = context; + struct t_switch *sw = NULL; + int sl, vl; + + if (node->sw) { + sw = node->sw->priv; + if (sw && sw->osm_switch != node->sw) { + osm_log_t *log = &ctx->osm->log; + guid_t guid; + + guid = osm_node_get_node_guid(node); + OSM_LOG(log, OSM_LOG_INFO, + "Note: osm_switch (GUID 0x%04"PRIx64") " + "not in torus fabric description\n", + cl_ntoh64(guid)); + return; + } + } + for (sl = 0; sl < 16; sl++) { + vl = sl2vl_entry(ctx->torus, sw, iport_num, oport_num, sl); + ib_slvl_table_set(osm_oport_sl2vl, sl, vl); + } +} + +static +void torus_update_osm_vlarb(void *context, osm_physp_t *osm_phys_port, + uint8_t port_num, ib_vl_arb_table_t *block, + unsigned block_length, unsigned block_num) +{ + osm_node_t *node = osm_physp_get_node_ptr(osm_phys_port); + struct torus_context *ctx = context; + struct t_switch *sw = NULL; + unsigned i, next; + + if (node->sw) { + sw = node->sw->priv; + if (sw && sw->osm_switch != node->sw) { + osm_log_t *log = &ctx->osm->log; + guid_t guid; + + guid = osm_node_get_node_guid(node); + OSM_LOG(log, OSM_LOG_INFO, + "Note: osm_switch (GUID 0x%04"PRIx64") " + "not in torus fabric description\n", + cl_ntoh64(guid)); + return; + } + } + + /* + * If osm_phys_port is a switch port that connects to a CA, then + * we're using at most VL 0 (for QoS level 0) and VL 1 (for QoS + * level 1). We've been passed the VLarb values for a switch + * external port, so we need to fix them up to avoid unexpected + * results depending on how the switch handles VLarb values for + * unprogrammed VLs. + * + * For inter-switch links torus-2QoS uses VLs 0-3 to implement + * QoS level 0, and VLs 4-7 to implement QoS level 1. + * + * So, leave VL 0 alone, remap VL 4 to VL 1, zero out the rest, + * and compress out the zero entries to the end. + */ + if (!sw || !port_num || !sw->port[port_num] || + sw->port[port_num]->pgrp->port_grp != 2 * TORUS_MAX_DIM) + return; + + next = 0; + for (i = 0; i < block_length; i++) { + switch (block->vl_entry[i].vl) { + case 4: + block->vl_entry[i].vl = 1; + /* fall through */ + case 0: + block->vl_entry[next].vl = block->vl_entry[i].vl; + block->vl_entry[next].weight = block->vl_entry[i].weight; + next++; + /* + * If we didn't update vl_entry[i] in place, + * fall through to zero it out. + */ + if (next > i) + break; + default: + block->vl_entry[i].vl = 0; + block->vl_entry[i].weight = 0; + break; + } + } +} + +/* + * Computes the path lengths *vl0_len and *vl1_len to get from src + * to dst on a ring with count switches. + * + * *vl0_len is the path length for a direct path; it corresponds to a path + * that should be assigned to use VL0 in a switch. *vl1_len is the path + * length for a path that wraps aroung the ring, i.e. where the ring index + * goes from count to zero or from zero to count. It corresponds to the path + * that should be assigned to use VL1 in a switch. + */ +static +void get_pathlen(unsigned src, unsigned dst, unsigned count, + unsigned *vl0_len, unsigned *vl1_len) +{ + unsigned s, l; /* assume s < l */ + + if (dst > src) { + s = src; + l = dst; + } else { + s = dst; + l = src; + } + *vl0_len = l - s; + *vl1_len = s + count - l; +} + +/* + * Returns a positive number if we should take the "positive" ring direction + * to reach dst from src, a negative number if we should take the "negative" + * ring direction, and 0 if src and dst are the same. The choice is strictly + * based on which path is shorter. + */ +static +int ring_dir_idx(unsigned src, unsigned dst, unsigned count) +{ + int r; + unsigned vl0_len, vl1_len; + + if (dst == src) + return 0; + + get_pathlen(src, dst, count, &vl0_len, &vl1_len); + + if (dst > src) + r = vl0_len <= vl1_len ? 1 : -1; + else + r = vl0_len <= vl1_len ? -1 : 1; + + return r; +} + +/* + * Returns true if the VL1 path should be used to reach src from dst on a + * ring, based on which path is shorter. + */ +static +bool use_vl1(unsigned src, unsigned dst, unsigned count) +{ + unsigned vl0_len, vl1_len; + + get_pathlen(src, dst, count, &vl0_len, &vl1_len); + + return vl0_len <= vl1_len ? false : true; +} + +/* + * Returns the next switch in the ring of switches along coordinate direction + * cdir, in the positive ring direction if rdir is positive, and in the + * negative ring direction if rdir is negative. + * + * Returns NULL if rdir is zero, or there is no next switch. + */ +static +struct t_switch *ring_next_sw(struct t_switch *sw, unsigned cdir, int rdir) +{ + unsigned pt_grp, far_end = 0; + + if (!rdir) + return NULL; + /* + * Recall that links are installed into the torus so that their 1 end + * is in the "positive" coordinate direction relative to their 0 end + * (see link_tswitches() and connect_tlink()). Recall also that for + * interswitch links, all links in a given switch port group have the + * same endpoints, so we just need to look at the first link. + */ + pt_grp = 2 * cdir; + if (rdir > 0) { + pt_grp++; + far_end = 1; + } + + if (!sw->ptgrp[pt_grp].port_cnt) + return NULL; + + return sw->ptgrp[pt_grp].port[0]->link->end[far_end].sw; +} + +/* + * Returns a positive number if we should take the "positive" ring direction + * to reach dsw from ssw, a negative number if we should take the "negative" + * ring direction, and 0 if src and dst are the same, or if dsw is not + * reachable from ssw because the path is interrupted by failure. + */ +static +int ring_dir_path(struct torus *t, unsigned cdir, + struct t_switch *ssw, struct t_switch *dsw) +{ + int d = 0; + struct t_switch *sw; + + switch (cdir) { + case 0: + d = ring_dir_idx(ssw->i, dsw->i, t->x_sz); + break; + case 1: + d = ring_dir_idx(ssw->j, dsw->j, t->y_sz); + break; + case 2: + d = ring_dir_idx(ssw->k, dsw->k, t->z_sz); + break; + default: + break; + } + if (!d) + goto out; + + sw = ssw; + while (sw) { + sw = ring_next_sw(sw, cdir, d); + if (sw == dsw) + goto out; + } + d *= -1; + sw = ssw; + while (sw) { + sw = ring_next_sw(sw, cdir, d); + if (sw == dsw) + goto out; + } + d = 0; +out: + return d; +} + +/* + * Returns true, and sets *pt_grp to the port group index to use for the + * next hop, if it is possible to make progress from ssw to dsw along the + * coordinate direction cdir, taking into account whether there are + * interruptions in the path. + * + * This next hop result can be used without worrying about ring deadlocks - + * if we don't choose the shortest path it is because there is a failure in + * the ring, which removes the possibilility of a ring deadlock on that ring. + */ +static +bool next_hop_path(struct torus *t, unsigned cdir, + struct t_switch *ssw, struct t_switch *dsw, + unsigned *pt_grp) +{ + struct t_switch *tsw = NULL; + bool success = false; + int d; + + /* + * If the path from ssw to dsw turns, this is the switch where the + * turn happens. + */ + switch (cdir) { + case 0: + tsw = t->sw[dsw->i][ssw->j][ssw->k]; + break; + case 1: + tsw = t->sw[ssw->i][dsw->j][ssw->k]; + break; + case 2: + tsw = t->sw[ssw->i][ssw->j][dsw->k]; + break; + default: + goto out; + } + if (tsw) { + d = ring_dir_path(t, cdir, ssw, tsw); + cdir *= 2; + if (d > 0) + *pt_grp = cdir + 1; + else if (d < 0) + *pt_grp = cdir; + else + goto out; + success = true; + } +out: + return success; +} + +/* + * Returns true, and sets *pt_grp to the port group index to use for the + * next hop, if it is possible to make progress from ssw to dsw along the + * coordinate direction cdir. This decision is made strictly on a + * shortest-path basis without regard for path availability. + */ +static +bool next_hop_idx(struct torus *t, unsigned cdir, + struct t_switch *ssw, struct t_switch *dsw, + unsigned *pt_grp) +{ + int d; + unsigned g; + bool success = false; + + switch (cdir) { + case 0: + d = ring_dir_idx(ssw->i, dsw->i, t->x_sz); + break; + case 1: + d = ring_dir_idx(ssw->j, dsw->j, t->y_sz); + break; + case 2: + d = ring_dir_idx(ssw->k, dsw->k, t->z_sz); + break; + default: + goto out; + } + + cdir *= 2; + if (d > 0) + g = cdir + 1; + else if (d < 0) + g = cdir; + else + goto out; + + if (!ssw->ptgrp[g].port_cnt) + goto out; + + *pt_grp = g; + success = true; +out: + return success; +} + +static +void warn_on_routing(const char *msg, + struct t_switch *sw, struct t_switch *dsw) +{ + OSM_LOG(&sw->torus->osm->log, OSM_LOG_ERROR, + "%s from sw 0x%04"PRIx64" (%d,%d,%d) " + "to sw 0x%04"PRIx64" (%d,%d,%d)\n", + msg, cl_ntoh64(sw->n_id), sw->i, sw->j, sw->k, + cl_ntoh64(dsw->n_id), dsw->i, dsw->j, dsw->k); +} + +static +bool next_hop_x(struct torus *t, + struct t_switch *ssw, struct t_switch *dsw, unsigned *pt_grp) +{ + if (t->sw[dsw->i][ssw->j][ssw->k]) + /* + * The next turning switch on this path is available, + * so head towards it by the shortest available path. + */ + return next_hop_path(t, 0, ssw, dsw, pt_grp); + else + /* + * The next turning switch on this path is not + * available, so head towards it in the shortest + * path direction. + */ + return next_hop_idx(t, 0, ssw, dsw, pt_grp); +} + +static +bool next_hop_y(struct torus *t, + struct t_switch *ssw, struct t_switch *dsw, unsigned *pt_grp) +{ + if (t->sw[ssw->i][dsw->j][ssw->k]) + /* + * The next turning switch on this path is available, + * so head towards it by the shortest available path. + */ + return next_hop_path(t, 1, ssw, dsw, pt_grp); + else + /* + * The next turning switch on this path is not + * available, so head towards it in the shortest + * path direction. + */ + return next_hop_idx(t, 1, ssw, dsw, pt_grp); +} + +static +bool next_hop_z(struct torus *t, + struct t_switch *ssw, struct t_switch *dsw, unsigned *pt_grp) +{ + return next_hop_path(t, 2, ssw, dsw, pt_grp); +} + +/* + * Returns the port number on *sw to use to reach *dsw, or -1 if unable to + * route. + */ +static +int lft_port(struct torus *t, + struct t_switch *sw, struct t_switch *dsw, + bool update_port_cnt, bool ca) +{ + unsigned g, p; + struct port_grp *pg; + + /* + * The IBA does not provide a way to preserve path history for + * routing decisions and VL assignment, and the only mechanism to + * provide global fabric knowledge to the routing engine is via + * the four SL bits. This severely constrains the ability to deal + * with missing/dead switches. + * + * Also, if routing a torus with XYZ-DOR, the only way to route + * around a missing/dead switch is to introduce a turn that is + * illegal under XYZ-DOR. + * + * But here's what we can do: + * + * We have a VL bit we use to flag illegal turns, thus putting the + * hop directly after an illegal turn on a separate set of VLs. + * Unfortunately, since there is no path history, the _second_ + * and subsequent hops after an illegal turn use the standard + * XYZ-DOR VL set. This is enough to introduce credit loops in + * many cases. + * + * To minimize the number of cases such illegal turns can introduce + * credit loops, we try to introduce the illegal turn as late in a + * path as possible. + * + * Define a turning switch as a switch where a path turns from one + * coordinate direction onto another. If a turning switch in a path + * is missing, construct the LFT entries so that the path progresses + * as far as possible on the shortest path to the turning switch. + * When progress is not possible, turn onto the next coordinate + * direction. + * + * The next turn after that will be an illegal turn, after which + * point the path will continue to use a standard XYZ-DOR path. + */ + if (dsw->i != sw->i) { + + if (next_hop_x(t, sw, dsw, &g)) + goto done; + /* + * This path has made as much progress in this direction as + * is possible, so turn it now. + */ + if (dsw->j != sw->j && next_hop_y(t, sw, dsw, &g)) + goto done; + + if (dsw->k != sw->k && next_hop_z(t, sw, dsw, &g)) + goto done; + + warn_on_routing("Error: unable to route", sw, dsw); + goto no_route; + } else if (dsw->j != sw->j) { + + if (next_hop_y(t, sw, dsw, &g)) + goto done; + + if (dsw->k != sw->k && next_hop_z(t, sw, dsw, &g)) + goto done; + + warn_on_routing("Error: unable to route", sw, dsw); + goto no_route; + } else { + if (dsw->k == sw->k) + warn_on_routing("Warning: bad routing", sw, dsw); + + if (next_hop_z(t, sw, dsw, &g)) + goto done; + + warn_on_routing("Error: unable to route", sw, dsw); + goto no_route; + } +done: + pg = &sw->ptgrp[g]; + if (!pg->port_cnt) + goto no_route; + + if (update_port_cnt) { + if (ca) + p = pg->ca_dlid_cnt++ % pg->port_cnt; + else + p = pg->sw_dlid_cnt++ % pg->port_cnt; + } else { + /* + * If we're not updating port counts, then we're just running + * routes for SL path checking, and it doesn't matter which + * of several parallel links we use. Use the first one. + */ + p = 0; + } + p = pg->port[p]->port; + + return p; + +no_route: + /* + * We can't get there from here. + */ + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E39: routing on sw 0x%04"PRIx64": sending " + "traffic for dest sw 0x%04"PRIx64" to port %u\n", + cl_ntoh64(sw->n_id), cl_ntoh64(dsw->n_id), OSM_NO_PATH); + return -1; +} + +static +bool get_lid(struct port_grp *pg, unsigned p, + uint16_t *dlid_base, uint8_t *dlid_lmc, bool *ca) +{ + struct endpoint *ep; + osm_port_t *osm_port; + + if (p >= pg->port_cnt) { + OSM_LOG(&pg->sw->torus->osm->log, OSM_LOG_ERROR, + "ERR 4E3A: Port group index %u too large: sw " + "0x%04"PRIx64" pt_grp %u pt_grp_cnt %u\n", + p, cl_ntoh64(pg->sw->n_id), + (unsigned)pg->port_grp, (unsigned)pg->port_cnt); + return false; + } + if (pg->port[p]->type == SRCSINK) { + ep = pg->port[p]; + if (ca) + *ca = false; + } else if (pg->port[p]->type == PASSTHRU && + pg->port[p]->link->end[1].type == SRCSINK) { + /* + * If this port is connected via a link to a CA, then we + * know link->end[0] is the switch end and link->end[1] is + * the CA end; see build_ca_link() and link_srcsink(). + */ + ep = &pg->port[p]->link->end[1]; + if (ca) + *ca = true; + } else { + OSM_LOG(&pg->sw->torus->osm->log, OSM_LOG_ERROR, + "ERR 4E3B: Switch 0x%04"PRIx64" port %d improperly connected\n", + cl_ntoh64(pg->sw->n_id), pg->port[p]->port); + return false; + } + osm_port = ep->osm_port; + if (!(osm_port && osm_port->priv == ep)) { + OSM_LOG(&pg->sw->torus->osm->log, OSM_LOG_ERROR, + "ERR 4E3C: ep->osm_port->priv != ep " + "for sw 0x%04"PRIx64" port %d\n", + cl_ntoh64(((struct t_switch *)(ep->sw))->n_id), ep->port); + return false; + } + *dlid_base = cl_ntoh16(osm_physp_get_base_lid(osm_port->p_physp)); + *dlid_lmc = osm_physp_get_lmc(osm_port->p_physp); + + return true; +} + +static +bool torus_lft(struct torus *t, struct t_switch *sw) +{ + bool success = true; + int dp; + unsigned p, s; + uint16_t l, dlid_base; + uint8_t dlid_lmc; + bool ca; + struct port_grp *pgrp; + struct t_switch *dsw; + osm_switch_t *osm_sw; + uint8_t order[IB_NODE_NUM_PORTS_MAX+1]; + + if (!(sw->osm_switch && sw->osm_switch->priv == sw)) { + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E3D: sw->osm_switch->priv != sw " + "for sw 0x%04"PRIx64"\n", cl_ntoh64(sw->n_id)); + return false; + } + osm_sw = sw->osm_switch; + memset(osm_sw->new_lft, OSM_NO_PATH, osm_sw->lft_size); + + for (s = 0; s < t->switch_cnt; s++) { + + dsw = t->sw_pool[s]; + pgrp = &dsw->ptgrp[2 * TORUS_MAX_DIM]; + + memset(order, IB_INVALID_PORT_NUM, sizeof(order)); + for (p = 0; p < pgrp->port_cnt; p++) + order[pgrp->port[p]->port] = p; + + for (p = 0; p < ARRAY_SIZE(order); p++) { + + uint8_t px = order[t->port_order[p]]; + + if (px == IB_INVALID_PORT_NUM) + continue; + + if (!get_lid(pgrp, px, &dlid_base, &dlid_lmc, &ca)) + return false; + + if (sw->n_id == dsw->n_id) + dp = pgrp->port[px]->port; + else + dp = lft_port(t, sw, dsw, true, ca); + /* + * LMC > 0 doesn't really make sense for torus-2QoS. + * So, just make sure traffic gets delivered if + * non-zero LMC is used. + */ + if (dp >= 0) + for (l = 0; l < (1U << dlid_lmc); l++) + osm_sw->new_lft[dlid_base + l] = dp; + else + success = false; + } + } + return success; +} + +static +osm_mtree_node_t *mcast_stree_branch(struct t_switch *sw, osm_switch_t *osm_sw, + osm_mgrp_box_t *mgb, unsigned depth, + unsigned *port_cnt, unsigned *max_depth) +{ + osm_mtree_node_t *mtn = NULL; + osm_mcast_tbl_t *mcast_tbl, *ds_mcast_tbl; + osm_node_t *ds_node; + struct t_switch *ds_sw; + struct port_grp *ptgrp; + struct link *link; + struct endpoint *port; + unsigned g, p; + unsigned mcast_fwd_ports = 0, mcast_end_ports = 0; + + depth++; + + if (osm_sw->priv != sw) { + OSM_LOG(&sw->torus->osm->log, OSM_LOG_ERROR, + "ERR 4E3E: osm_sw (GUID 0x%04"PRIx64") " + "not in torus fabric description\n", + cl_ntoh64(osm_node_get_node_guid(osm_sw->p_node))); + goto out; + } + if (!osm_switch_supports_mcast(osm_sw)) { + OSM_LOG(&sw->torus->osm->log, OSM_LOG_ERROR, + "ERR 4E3F: osm_sw (GUID 0x%04"PRIx64") " + "does not support multicast\n", + cl_ntoh64(osm_node_get_node_guid(osm_sw->p_node))); + goto out; + } + mtn = osm_mtree_node_new(osm_sw); + if (!mtn) { + OSM_LOG(&sw->torus->osm->log, OSM_LOG_ERROR, + "ERR 4E46: Insufficient memory to build multicast tree\n"); + goto out; + } + mcast_tbl = osm_switch_get_mcast_tbl_ptr(osm_sw); + /* + * Recurse to downstream switches, i.e. those closer to master + * spanning tree branch tips. + * + * Note that if there are multiple ports in this port group, i.e., + * multiple parallel links, we can pick any one of them to use for + * any individual MLID without causing loops. Pick one based on MLID + * for now, until someone turns up evidence we need to be smarter. + * + * Also, it might be we got called in a window between a switch getting + * removed from the fabric, and torus-2QoS getting to rebuild its + * fabric representation. If that were to happen, our next hop + * osm_switch pointer might be stale. Look it up via opensm's fabric + * description to be sure it's not. + */ + for (g = 0; g < 2 * TORUS_MAX_DIM; g++) { + ptgrp = &sw->ptgrp[g]; + if (!ptgrp->to_stree_tip) + continue; + + p = mgb->mlid % ptgrp->port_cnt;/* port # in port group */ + p = ptgrp->port[p]->port; /* now port # in switch */ + + ds_node = osm_node_get_remote_node(osm_sw->p_node, p, NULL); + ds_sw = ptgrp->to_stree_tip->sw; + + if (!(ds_node && ds_node->sw && + ds_sw->osm_switch == ds_node->sw)) { + OSM_LOG(&sw->torus->osm->log, OSM_LOG_ERROR, + "ERR 4E40: stale pointer to osm_sw " + "(GUID 0x%04"PRIx64")\n", cl_ntoh64(ds_sw->n_id)); + continue; + } + mtn->child_array[p] = + mcast_stree_branch(ds_sw, ds_node->sw, mgb, + depth, port_cnt, max_depth); + if (!mtn->child_array[p]) + continue; + + osm_mcast_tbl_set(mcast_tbl, mgb->mlid, p); + mcast_fwd_ports++; + /* + * Since we forward traffic for this multicast group on this + * port, cause the switch on the other end of the link + * to forward traffic back to us. Do it now since have at + * hand the link used; otherwise it'll be hard to figure out + * later, and if we get it wrong we get a MC routing loop. + */ + link = sw->port[p]->link; + ds_mcast_tbl = osm_switch_get_mcast_tbl_ptr(ds_node->sw); + + if (&link->end[0] == sw->port[p]) + osm_mcast_tbl_set(ds_mcast_tbl, mgb->mlid, + link->end[1].port); + else + osm_mcast_tbl_set(ds_mcast_tbl, mgb->mlid, + link->end[0].port); + } + /* + * Add any host ports marked as in mcast group into spanning tree. + */ + ptgrp = &sw->ptgrp[2 * TORUS_MAX_DIM]; + for (p = 0; p < ptgrp->port_cnt; p++) { + port = ptgrp->port[p]; + if (port->tmp) { + port->tmp = NULL; + mtn->child_array[port->port] = OSM_MTREE_LEAF; + osm_mcast_tbl_set(mcast_tbl, mgb->mlid, port->port); + mcast_end_ports++; + } + } + if (!(mcast_end_ports || mcast_fwd_ports)) { + osm_mtree_destroy(mtn); + mtn = NULL; + } else if (depth > *max_depth) + *max_depth = depth; + + *port_cnt += mcast_end_ports; +out: + return mtn; +} + +static +osm_port_t *next_mgrp_box_port(osm_mgrp_box_t *mgb, + cl_list_item_t **list_iterator, + cl_map_item_t **map_iterator) +{ + osm_mgrp_t *mgrp; + osm_mcm_port_t *mcm_port; + osm_port_t *osm_port = NULL; + cl_map_item_t *m_item = *map_iterator; + cl_list_item_t *l_item = *list_iterator; + +next_mgrp: + if (!l_item) + l_item = cl_qlist_head(&mgb->mgrp_list); + if (l_item == cl_qlist_end(&mgb->mgrp_list)) { + l_item = NULL; + goto out; + } + mgrp = cl_item_obj(l_item, mgrp, list_item); + + if (!m_item) + m_item = cl_qmap_head(&mgrp->mcm_port_tbl); + if (m_item == cl_qmap_end(&mgrp->mcm_port_tbl)) { + m_item = NULL; + l_item = cl_qlist_next(l_item); + goto next_mgrp; + } + mcm_port = cl_item_obj(m_item, mcm_port, map_item); + m_item = cl_qmap_next(m_item); + osm_port = mcm_port->port; +out: + *list_iterator = l_item; + *map_iterator = m_item; + return osm_port; +} + +static +ib_api_status_t torus_mcast_stree(void *context, osm_mgrp_box_t *mgb) +{ + struct torus_context *ctx = context; + struct torus *t = ctx->torus; + cl_map_item_t *m_item = NULL; + cl_list_item_t *l_item = NULL; + osm_port_t *osm_port; + osm_switch_t *osm_sw; + struct endpoint *port; + unsigned port_cnt = 0, max_depth = 0; + + osm_purge_mtree(&ctx->osm->sm, mgb); + + /* + * Build a spanning tree for a multicast group by first marking + * the torus endpoints that are participating in the group. + * Then do a depth-first search of the torus master spanning + * tree to build up the spanning tree specific to this group. + * + * Since the torus master spanning tree is constructed specifically + * to guarantee that multicast will not deadlock against unicast + * when they share VLs, we can be sure that any multicast group + * spanning tree constructed this way has the same property. + */ + while ((osm_port = next_mgrp_box_port(mgb, &l_item, &m_item))) { + port = osm_port->priv; + if (!(port && port->osm_port == osm_port)) { + port = osm_port_relink_endpoint(osm_port); + if (!port) { + guid_t id; + id = osm_node_get_node_guid(osm_port->p_node); + OSM_LOG(&ctx->osm->log, OSM_LOG_ERROR, + "ERR 4E41: osm_port (GUID 0x%04"PRIx64") " + "not in torus fabric description\n", + cl_ntoh64(id)); + continue; + } + } + /* + * If this is a CA port, mark the switch port at the + * other end of this port's link. + * + * By definition, a CA port is connected to end[1] of a link, + * and the switch port is end[0]. See build_ca_link() and + * link_srcsink(). + */ + if (port->link) + port = &port->link->end[0]; + port->tmp = osm_port; + } + /* + * It might be we got called in a window between a switch getting + * removed from the fabric, and torus-2QoS getting to rebuild its + * fabric representation. If that were to happen, our + * master_stree_root->osm_switch pointer might be stale. Look up + * the osm_switch by GUID to be sure it's not. + * + * Also, call into mcast_stree_branch with depth = -1, because + * depth at root switch needs to be 0. + */ + osm_sw = (osm_switch_t *)cl_qmap_get(&ctx->osm->subn.sw_guid_tbl, + t->master_stree_root->n_id); + if (!(osm_sw && t->master_stree_root->osm_switch == osm_sw)) { + OSM_LOG(&ctx->osm->log, OSM_LOG_ERROR, + "ERR 4E42: stale pointer to osm_sw (GUID 0x%04"PRIx64")\n", + cl_ntoh64(t->master_stree_root->n_id)); + return IB_ERROR; + } + mgb->root = mcast_stree_branch(t->master_stree_root, osm_sw, + mgb, -1, &port_cnt, &max_depth); + + OSM_LOG(&ctx->osm->log, OSM_LOG_VERBOSE, + "Configured MLID 0x%X for %u ports, max tree depth = %u\n", + mgb->mlid, port_cnt, max_depth); + + return IB_SUCCESS; +} + +static +bool good_xy_ring(struct torus *t, const int x, const int y, const int z) +{ + struct t_switch ****sw = t->sw; + bool good_ring = true; + int x_tst, y_tst; + + for (x_tst = 0; x_tst < t->x_sz && good_ring; x_tst++) + good_ring = sw[x_tst][y][z]; + + for (y_tst = 0; y_tst < t->y_sz && good_ring; y_tst++) + good_ring = sw[x][y_tst][z]; + + return good_ring; +} + +static +struct t_switch *find_plane_mid(struct torus *t, const int z) +{ + int x, dx, xm = t->x_sz / 2; + int y, dy, ym = t->y_sz / 2; + struct t_switch ****sw = t->sw; + + if (good_xy_ring(t, xm, ym, z)) + return sw[xm][ym][z]; + + for (dx = 1, dy = 1; dx <= xm && dy <= ym; dx++, dy++) { + + x = canonicalize(xm - dx, t->x_sz); + y = canonicalize(ym - dy, t->y_sz); + if (good_xy_ring(t, x, y, z)) + return sw[x][y][z]; + + x = canonicalize(xm + dx, t->x_sz); + y = canonicalize(ym + dy, t->y_sz); + if (good_xy_ring(t, x, y, z)) + return sw[x][y][z]; + } + return NULL; +} + +static +struct t_switch *find_stree_root(struct torus *t) +{ + int x, y, z, dz, zm = t->z_sz / 2; + struct t_switch ****sw = t->sw; + struct t_switch *root; + bool good_plane; + + /* + * Look for a switch near the "center" (wrt. the datelines) of the + * torus, as that will be the most optimum spanning tree root. Use + * a search that is not exhaustive, on the theory that this routing + * engine isn't useful anyway if too many switches are missing. + * + * Also, want to pick an x-y plane with no missing switches, so that + * the master spanning tree construction algorithm doesn't have to + * deal with needing a turn on a missing switch. + */ + for (dz = 0; dz <= zm; dz++) { + + z = canonicalize(zm - dz, t->z_sz); + good_plane = true; + for (y = 0; y < t->y_sz && good_plane; y++) + for (x = 0; x < t->x_sz && good_plane; x++) + good_plane = sw[x][y][z]; + + if (good_plane) { + root = find_plane_mid(t, z); + if (root) + goto out; + } + if (!dz) + continue; + + z = canonicalize(zm + dz, t->z_sz); + good_plane = true; + for (y = 0; y < t->y_sz && good_plane; y++) + for (x = 0; x < t->x_sz && good_plane; x++) + good_plane = sw[x][y][z]; + + if (good_plane) { + root = find_plane_mid(t, z); + if (root) + goto out; + } + } + /* + * Note that torus-2QoS can route a torus that is missing an entire + * column (switches with x,y constant, for all z values) without + * deadlocks. + * + * if we've reached this point, we must have a column of missing + * switches, as routable_torus() would have returned false for + * any other configuration of missing switches that made it through + * the above. + * + * So any switch in the mid-z plane will do as the root. + */ + root = find_plane_mid(t, zm); +out: + return root; +} + +static +bool sw_in_master_stree(struct t_switch *sw) +{ + int g; + bool connected; + + connected = sw == sw->torus->master_stree_root; + for (g = 0; g < 2 * TORUS_MAX_DIM; g++) + connected = connected || sw->ptgrp[g].to_stree_root; + + return connected; +} + +static +void grow_master_stree_branch(struct t_switch *root, struct t_switch *tip, + unsigned to_root_pg, unsigned to_tip_pg) +{ + root->ptgrp[to_tip_pg].to_stree_tip = &tip->ptgrp[to_root_pg]; + tip->ptgrp[to_root_pg].to_stree_root = &root->ptgrp[to_tip_pg]; +} + +static +void build_master_stree_branch(struct t_switch *branch_root, int cdir) +{ + struct t_switch *sw, *n_sw, *p_sw; + unsigned l, idx, cnt, pg, ng; + + switch (cdir) { + case 0: + idx = branch_root->i; + cnt = branch_root->torus->x_sz; + break; + case 1: + idx = branch_root->j; + cnt = branch_root->torus->y_sz; + break; + case 2: + idx = branch_root->k; + cnt = branch_root->torus->z_sz; + break; + default: + goto out; + } + /* + * This algorithm intends that a spanning tree branch never crosses + * a dateline unless the 1-D ring for which we're building the branch + * is interrupted by failure. We need that guarantee to prevent + * multicast/unicast credit loops. + */ + n_sw = branch_root; /* tip of negative cdir branch */ + ng = 2 * cdir; /* negative cdir port group index */ + p_sw = branch_root; /* tip of positive cdir branch */ + pg = 2 * cdir + 1; /* positive cdir port group index */ + + for (l = idx; n_sw && l >= 1; l--) { + sw = ring_next_sw(n_sw, cdir, -1); + if (sw && !sw_in_master_stree(sw)) { + grow_master_stree_branch(n_sw, sw, pg, ng); + n_sw = sw; + } else + n_sw = NULL; + } + for (l = idx; p_sw && l < (cnt - 1); l++) { + sw = ring_next_sw(p_sw, cdir, 1); + if (sw && !sw_in_master_stree(sw)) { + grow_master_stree_branch(p_sw, sw, ng, pg); + p_sw = sw; + } else + p_sw = NULL; + } + if (n_sw && p_sw) + goto out; + /* + * At least one branch couldn't grow to the dateline for this ring. + * That means it is acceptable to grow the branch by crossing the + * dateline. + */ + for (l = 0; l < cnt; l++) { + if (n_sw) { + sw = ring_next_sw(n_sw, cdir, -1); + if (sw && !sw_in_master_stree(sw)) { + grow_master_stree_branch(n_sw, sw, pg, ng); + n_sw = sw; + } else + n_sw = NULL; + } + if (p_sw) { + sw = ring_next_sw(p_sw, cdir, 1); + if (sw && !sw_in_master_stree(sw)) { + grow_master_stree_branch(p_sw, sw, ng, pg); + p_sw = sw; + } else + p_sw = NULL; + } + if (!(n_sw || p_sw)) + break; + } +out: + return; +} + +static +bool torus_master_stree(struct torus *t) +{ + int i, j, k; + bool success = false; + struct t_switch *stree_root = find_stree_root(t); + + if (stree_root) + build_master_stree_branch(stree_root, 0); + else + goto out; + + k = stree_root->k; + for (i = 0; i < t->x_sz; i++) { + j = stree_root->j; + if (t->sw[i][j][k]) + build_master_stree_branch(t->sw[i][j][k], 1); + + for (j = 0; j < t->y_sz; j++) + if (t->sw[i][j][k]) + build_master_stree_branch(t->sw[i][j][k], 2); + } + t->master_stree_root = stree_root; + /* + * At this point we should have a master spanning tree that contains + * every present switch, for all fabrics that torus-2QoS can route + * without deadlocks. Make sure this is the case; otherwise warn + * and return failure so we get bug reports. + */ + success = true; + for (i = 0; i < t->x_sz; i++) + for (j = 0; j < t->y_sz; j++) + for (k = 0; k < t->z_sz; k++) { + struct t_switch *sw = t->sw[i][j][k]; + if (!sw || sw_in_master_stree(sw)) + continue; + + success = false; + OSM_LOG(&t->osm->log, OSM_LOG_ERROR, + "ERR 4E43: sw 0x%04"PRIx64" (%d,%d,%d) not in " + "torus multicast master spanning tree\n", + cl_ntoh64(sw->n_id), i, j, k); + } +out: + return success; +} + +int route_torus(struct torus *t) +{ + int s; + bool success = true; + + for (s = 0; s < (int)t->switch_cnt; s++) + success = torus_lft(t, t->sw_pool[s]) && success; + + success = success && torus_master_stree(t); + + return success ? 0 : -1; +} + +uint8_t torus_path_sl(void *context, uint8_t path_sl_hint, + const ib_net16_t slid, const ib_net16_t dlid) +{ + struct torus_context *ctx = context; + osm_opensm_t *p_osm = ctx->osm; + osm_log_t *log = &p_osm->log; + osm_port_t *osm_sport, *osm_dport; + struct endpoint *sport, *dport; + struct t_switch *ssw, *dsw; + struct torus *t; + guid_t guid; + unsigned sl = 0; + + osm_sport = osm_get_port_by_lid(&p_osm->subn, slid); + if (!osm_sport) + goto out; + + osm_dport = osm_get_port_by_lid(&p_osm->subn, dlid); + if (!osm_dport) + goto out; + + sport = osm_sport->priv; + if (!(sport && sport->osm_port == osm_sport)) { + sport = osm_port_relink_endpoint(osm_sport); + if (!sport) { + guid = osm_node_get_node_guid(osm_sport->p_node); + OSM_LOG(log, OSM_LOG_INFO, + "Note: osm_sport (GUID 0x%04"PRIx64") " + "not in torus fabric description\n", + cl_ntoh64(guid)); + goto out; + } + } + dport = osm_dport->priv; + if (!(dport && dport->osm_port == osm_dport)) { + dport = osm_port_relink_endpoint(osm_dport); + if (!dport) { + guid = osm_node_get_node_guid(osm_dport->p_node); + OSM_LOG(log, OSM_LOG_INFO, + "Note: osm_dport (GUID 0x%04"PRIx64") " + "not in torus fabric description\n", + cl_ntoh64(guid)); + goto out; + } + } + /* + * We're only supposed to be called for CA ports, and maybe + * switch management ports. + */ + if (sport->type != SRCSINK) { + guid = osm_node_get_node_guid(osm_sport->p_node); + OSM_LOG(log, OSM_LOG_INFO, + "Error: osm_sport (GUID 0x%04"PRIx64") " + "not a data src/sink port\n", cl_ntoh64(guid)); + goto out; + } + if (dport->type != SRCSINK) { + guid = osm_node_get_node_guid(osm_dport->p_node); + OSM_LOG(log, OSM_LOG_INFO, + "Error: osm_dport (GUID 0x%04"PRIx64") " + "not a data src/sink port\n", cl_ntoh64(guid)); + goto out; + } + /* + * By definition, a CA port is connected to end[1] of a link, and + * the switch port is end[0]. See build_ca_link() and link_srcsink(). + */ + if (sport->link) { + ssw = sport->link->end[0].sw; + } else { + ssw = sport->sw; + } + if (dport->link) + dsw = dport->link->end[0].sw; + else + dsw = dport->sw; + + t = ssw->torus; + + sl = sl_set_use_loop_vl(use_vl1(ssw->i, dsw->i, t->x_sz), 0); + sl |= sl_set_use_loop_vl(use_vl1(ssw->j, dsw->j, t->y_sz), 1); + sl |= sl_set_use_loop_vl(use_vl1(ssw->k, dsw->k, t->z_sz), 2); + sl |= sl_set_qos(sl_get_qos(path_sl_hint)); +out: + return sl; +} + +static +void sum_vlarb_weights(const char *vlarb_str, + unsigned total_weight[IB_MAX_NUM_VLS]) +{ + unsigned i = 0, v, vl = 0; + char *end; + + while (*vlarb_str && i++ < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) { + v = strtoul(vlarb_str, &end, 0); + if (*end) + end++; + vlarb_str = end; + if (i & 0x1) + vl = v & 0xf; + else + total_weight[vl] += v & 0xff; + } +} + +static +int uniform_vlarb_weight_value(unsigned *weight, unsigned count) +{ + int i, v = weight[0]; + + for (i = 1; i < count; i++) { + if (v != weight[i]) + return -1; + } + return v; +} + +static +void check_vlarb_config(const char *vlarb_str, bool is_default, + const char *str, const char *pri, osm_log_t *log) +{ + unsigned total_weight[IB_MAX_NUM_VLS] = {0,}; + + sum_vlarb_weights(vlarb_str, total_weight); + if (!(uniform_vlarb_weight_value(&total_weight[0], 4) >= 0 && + uniform_vlarb_weight_value(&total_weight[4], 4) >= 0)) + OSM_LOG(log, OSM_LOG_INFO, + "Warning: torus-2QoS requires same VLarb weights for " + "VLs 0-3; also for VLs 4-7: not true for %s " + "%s_vlarb_%s\n", + (is_default ? "default" : "configured"), str, pri); +} + +/* + * Use this to check the qos_config for switch external ports. + */ +static +void check_qos_swe_config(osm_qos_options_t *opt, + osm_qos_options_t *def, osm_log_t *log) +{ + const char *vlarb_str, *tstr; + bool is_default; + unsigned max_vls; + + max_vls = def->max_vls; + if (opt->max_vls > 0) + max_vls = opt->max_vls; + + if (max_vls > 0 && max_vls < 8) + OSM_LOG(log, OSM_LOG_INFO, + "Warning: full torus-2QoS functionality not available " + "for configured %s_max_vls = %d\n", + (opt->max_vls > 0 ? "qos_swe" : "qos"), opt->max_vls); + + vlarb_str = opt->vlarb_high; + is_default = false; + tstr = "qos_swe"; + if (!vlarb_str) { + vlarb_str = def->vlarb_high; + tstr = "qos"; + } + if (!vlarb_str) { + vlarb_str = OSM_DEFAULT_QOS_VLARB_HIGH; + is_default = true; + } + check_vlarb_config(vlarb_str, is_default, tstr, "high", log); + + vlarb_str = opt->vlarb_low; + is_default = false; + tstr = "qos_swe"; + if (!vlarb_str) { + vlarb_str = def->vlarb_low; + tstr = "qos"; + } + if (!vlarb_str) { + vlarb_str = OSM_DEFAULT_QOS_VLARB_LOW; + is_default = true; + } + check_vlarb_config(vlarb_str, is_default, tstr, "low", log); + + if (opt->sl2vl) + OSM_LOG(log, OSM_LOG_INFO, + "Warning: torus-2QoS must override configured " + "qos_swe_sl2vl to generate deadlock-free routes\n"); +} + +static +void check_ep_vlarb_config(const char *vlarb_str, + bool is_default, bool is_specific, + const char *str, const char *pri, osm_log_t *log) +{ + unsigned i, total_weight[IB_MAX_NUM_VLS] = {0,}; + int val = 0; + + sum_vlarb_weights(vlarb_str, total_weight); + for (i = 2; i < 8; i++) { + val += total_weight[i]; + } + if (!val) + return; + + if (is_specific) + OSM_LOG(log, OSM_LOG_INFO, + "Warning: torus-2QoS recommends 0 VLarb weights" + " for VLs 2-7 on endpoint links; not true for " + " configured %s_vlarb_%s\n", str, pri); + else + OSM_LOG(log, OSM_LOG_INFO, + "Warning: torus-2QoS recommends 0 VLarb weights " + "for VLs 2-7 on endpoint links; not true for %s " + "qos_vlarb_%s values used for %s_vlarb_%s\n", + (is_default ? "default" : "configured"), pri, str, pri); +} + +/* + * Use this to check the qos_config for endports + */ +static +void check_qos_ep_config(osm_qos_options_t *opt, osm_qos_options_t *def, + const char *str, osm_log_t *log) +{ + const char *vlarb_str; + bool is_default, is_specific; + unsigned max_vls; + + max_vls = def->max_vls; + if (opt->max_vls > 0) + max_vls = opt->max_vls; + + if (max_vls > 0 && max_vls < 2) + OSM_LOG(log, OSM_LOG_INFO, + "Warning: full torus-2QoS functionality not available " + "for configured %s_max_vls = %d\n", + (opt->max_vls > 0 ? str : "qos"), opt->max_vls); + + vlarb_str = opt->vlarb_high; + is_default = false; + is_specific = true; + if (!vlarb_str) { + vlarb_str = def->vlarb_high; + is_specific = false; + } + if (!vlarb_str) { + vlarb_str = OSM_DEFAULT_QOS_VLARB_HIGH; + is_default = true; + } + check_ep_vlarb_config(vlarb_str, is_default, is_specific, + str, "high", log); + + vlarb_str = opt->vlarb_low; + is_default = false; + is_specific = true; + if (!vlarb_str) { + vlarb_str = def->vlarb_low; + is_specific = false; + } + if (!vlarb_str) { + vlarb_str = OSM_DEFAULT_QOS_VLARB_LOW; + is_default = true; + } + check_ep_vlarb_config(vlarb_str, is_default, is_specific, + str, "low", log); + + if (opt->sl2vl) + OSM_LOG(log, OSM_LOG_INFO, + "Warning: torus-2QoS must override configured " + "%s_sl2vl to generate deadlock-free routes\n", str); +} + +static +int torus_build_lfts(void *context) +{ + int status = -1; + struct torus_context *ctx = context; + struct fabric *fabric; + struct torus *torus; + + if (!ctx->osm->subn.opt.qos) { + OSM_LOG(&ctx->osm->log, OSM_LOG_ERROR, + "ERR 4E44: Routing engine list contains torus-2QoS. " + "Enable QoS for correct operation " + "(-Q or 'qos TRUE' in opensm.conf).\n"); + return status; + } + + fabric = &ctx->fabric; + teardown_fabric(fabric); + + torus = calloc(1, sizeof(*torus)); + if (!torus) { + OSM_LOG(&ctx->osm->log, OSM_LOG_ERROR, + "ERR 4E45: allocating torus: %s\n", strerror(errno)); + goto out; + } + torus->osm = ctx->osm; + fabric->osm = ctx->osm; + + if (!parse_config(ctx->osm->subn.opt.torus_conf_file, + fabric, torus)) + goto out; + + if (!capture_fabric(fabric)) + goto out; + + OSM_LOG(&torus->osm->log, OSM_LOG_INFO, + "Found fabric w/ %d links, %d switches, %d CA ports, " + "minimum data VLs: endport %d, switchport %d\n", + (int)fabric->link_cnt, (int)fabric->switch_cnt, + (int)fabric->ca_cnt, (int)ctx->osm->subn.min_data_vls, + (int)ctx->osm->subn.min_sw_data_vls); + + if (!verify_setup(torus, fabric)) + goto out; + + OSM_LOG(&torus->osm->log, OSM_LOG_INFO, + "Looking for %d x %d x %d %s\n", + (int)torus->x_sz, (int)torus->y_sz, (int)torus->z_sz, + (ALL_MESH(torus->flags) ? "mesh" : "torus")); + + if (!build_torus(fabric, torus)) { + OSM_LOG(&torus->osm->log, OSM_LOG_ERROR, "ERR 4E57: " + "build_torus finished with errors\n"); + goto out; + } + + OSM_LOG(&torus->osm->log, OSM_LOG_INFO, + "Built %d x %d x %d %s w/ %d links, %d switches, %d CA ports\n", + (int)torus->x_sz, (int)torus->y_sz, (int)torus->z_sz, + (ALL_MESH(torus->flags) ? "mesh" : "torus"), + (int)torus->link_cnt, (int)torus->switch_cnt, + (int)torus->ca_cnt); + + diagnose_fabric(fabric); + /* + * Since we found some sort of torus fabric, report on any topology + * changes vs. the last torus we found. + */ + if (torus->flags & NOTIFY_CHANGES) + report_torus_changes(torus, ctx->torus); + + if (routable_torus(torus, fabric)) + status = route_torus(torus); + +out: + if (status) { /* bad torus!! */ + if (torus) + teardown_torus(torus); + } else { + osm_subn_opt_t *opt = &torus->osm->subn.opt; + osm_log_t *log = &torus->osm->log; + + if (ctx->torus) + teardown_torus(ctx->torus); + ctx->torus = torus; + + check_qos_swe_config(&opt->qos_swe_options, &opt->qos_options, + log); + + check_qos_ep_config(&opt->qos_ca_options, + &opt->qos_options, "qos_ca", log); + check_qos_ep_config(&opt->qos_sw0_options, + &opt->qos_options, "qos_sw0", log); + check_qos_ep_config(&opt->qos_rtr_options, + &opt->qos_options, "qos_rtr", log); + } + teardown_fabric(fabric); + return status; +} + +int osm_ucast_torus2QoS_setup(struct osm_routing_engine *r, + osm_opensm_t *osm) +{ + struct torus_context *ctx; + + ctx = torus_context_create(osm); + if (!ctx) + return -1; + + r->context = ctx; + r->ucast_build_fwd_tables = torus_build_lfts; + r->build_lid_matrices = ucast_dummy_build_lid_matrices; + r->update_sl2vl = torus_update_osm_sl2vl; + r->update_vlarb = torus_update_osm_vlarb; + r->path_sl = torus_path_sl; + r->mcast_build_stree = torus_mcast_stree; + r->destroy = torus_context_delete; + return 0; +} diff --git a/opensm/osm_trap_rcv.c b/opensm/osm_trap_rcv.c new file mode 100644 index 0000000..527454b --- /dev/null +++ b/opensm/osm_trap_rcv.c @@ -0,0 +1,700 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_trap_rcv_t. + * This object represents the Trap Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_TRAP_RCV_C +#include +#include +#include +#include +#include +#include +#include + +extern void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t *p_physp); + +/********************************************************************** + * + * TRAP HANDLING: + * + * Assuming traps can be caused by bad hardware we should provide + * a mechanism for filtering their propagation into the actual logic + * of OpenSM such that it is not overloaded by them. + * + * We will provide a trap filtering mechanism with "Aging" capability. + * This mechanism will track incoming traps, clasify them by their + * source and content and provide back their age. + * + * A timer running in the background will toggle a timer counter + * that should be referenced by the aging algorithm. + * To provide an efficient handling of aging, we also track all traps + * in a sorted list by their aging. + * + * The generic Aging Tracker mechanism is implemented in the + * cl_aging_tracker object. + * + **********************************************************************/ + +static osm_physp_t *get_physp_by_lid_and_num(IN osm_sm_t * sm, + IN ib_net16_t lid, IN uint8_t num) +{ + osm_port_t *p_port = osm_get_port_by_lid(sm->p_subn, lid); + if (!p_port) + return NULL; + + if (osm_node_get_num_physp(p_port->p_node) <= num) + return NULL; + + return osm_node_get_physp_ptr(p_port->p_node, num); +} + +static uint64_t aging_tracker_callback(IN uint64_t key, IN uint32_t num_regs, + IN void *context) +{ + osm_sm_t *sm = context; + ib_net16_t lid; + uint8_t port_num; + osm_physp_t *p_physp; + + OSM_LOG_ENTER(sm->p_log); + + if (osm_exit_flag) + /* We got an exit flag - do nothing */ + return 0; + + lid = (ib_net16_t) ((key & 0x0000FFFF00000000ULL) >> 32); + port_num = (uint8_t) ((key & 0x00FF000000000000ULL) >> 48); + + CL_PLOCK_ACQUIRE(sm->p_lock); + + p_physp = get_physp_by_lid_and_num(sm, lid, port_num); + if (!p_physp) + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Cannot find port num:%u with lid:%u\n", + port_num, cl_ntoh16(lid)); + /* make sure the physp is still valid */ + /* If the health port was false - set it to true */ + else if (!osm_physp_is_healthy(p_physp)) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Clearing health bit of port num:%u with lid:%u\n", + port_num, cl_ntoh16(lid)); + + /* Clear its health bit */ + osm_physp_set_health(p_physp, TRUE); + } + + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG_EXIT(sm->p_log); + + /* We want to remove the event from the tracker - so + need to return zero. */ + return 0; +} + +/********************************************************************** + * CRC calculation for notice identification + **********************************************************************/ + +#define CRC32_POLYNOMIAL 0xEDB88320L + +/* calculate the crc for a given buffer */ +static uint32_t trap_calc_crc32(void *buffer, uint32_t count) +{ + uint32_t temp1, temp2; + uint32_t crc = -1L; + unsigned char *p = (unsigned char *)buffer; + /* precalculated table for faster crc calculation */ + static uint32_t crc_table[256]; + static boolean_t first = TRUE; + int i, j; + + /* if we need to initialize the lookup table */ + if (first) { + /* calc the CRC table */ + for (i = 0; i <= 255; i++) { + crc = i; + for (j = 8; j > 0; j--) + if (crc & 1) + crc = (crc >> 1) ^ CRC32_POLYNOMIAL; + else + crc >>= 1; + crc_table[i] = crc; + } + first = FALSE; + } + + crc = -1L; + /* do the calculation */ + while (count-- != 0) { + temp1 = (crc >> 8) & 0x00FFFFFFL; + temp2 = crc_table[((int)crc ^ *p++) & 0xFF]; + crc = temp1 ^ temp2; + } + return crc; +} + +/* The key is created in the following manner: + port_num lid crc + \______/ \___/ \___/ + 16b 16b 32b +*/ +static uint64_t trap_get_key(IN uint16_t lid, IN uint8_t port_num, + IN ib_mad_notice_attr_t * p_ntci) +{ + uint32_t crc = trap_calc_crc32(p_ntci, sizeof(ib_mad_notice_attr_t)); + return ((uint64_t) port_num << 48) | ((uint64_t) lid << 32) | crc; +} + +static int print_num_received(IN uint32_t num_received) +{ + uint32_t i; + + /* Series is 10, 20, 50, 100, 200, 500, ... */ + i = num_received; + while (i >= 10) { + if (i % 10) + break; + i = i / 10; + } + + if (i == 1 || i == 2 || i == 5) + return 1; + else + return 0; +} + +static int disable_port(osm_sm_t *sm, osm_physp_t *p) +{ + uint8_t payload[IB_SMP_DATA_SIZE]; + osm_madw_context_t context; + ib_port_info_t *pi = (ib_port_info_t *)payload; + osm_physp_t *physp0; + osm_port_t *p_port; + ib_net64_t m_key; + ib_api_status_t status; + + /* select the nearest port to master opensm */ + if (p->p_remote_physp && + p->dr_path.hop_count > p->p_remote_physp->dr_path.hop_count) + p = p->p_remote_physp; + + /* If trap 131, might want to disable peer port if available */ + /* but peer port has been observed not to respond to SM requests */ + + memcpy(payload, &p->port_info, sizeof(ib_port_info_t)); + + /* Set port to disabled/down */ + ib_port_info_set_port_state(pi, IB_LINK_DOWN); + ib_port_info_set_port_phys_state(IB_PORT_PHYS_STATE_DISABLED, pi); + + /* Issue set of PortInfo */ + context.pi_context.node_guid = osm_node_get_node_guid(p->p_node); + context.pi_context.port_guid = osm_physp_get_port_guid(p); + context.pi_context.set_method = TRUE; + context.pi_context.light_sweep = FALSE; + context.pi_context.active_transition = FALSE; + context.pi_context.client_rereg = FALSE; + if (osm_node_get_type(p->p_node) == IB_NODE_TYPE_SWITCH && + osm_physp_get_port_num(p) != 0) { + physp0 = osm_node_get_physp_ptr(p->p_node, 0); + m_key = ib_port_info_get_m_key(&physp0->port_info); + } else + m_key = ib_port_info_get_m_key(&p->port_info); + + if (osm_node_get_type(p->p_node) != IB_NODE_TYPE_SWITCH) { + if (!pi->base_lid) { + p_port = osm_get_port_by_guid(sm->p_subn, + osm_physp_get_port_guid(p)); + if (p_port) + pi->base_lid = p_port->lid; + else { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3804: Port 0x%" PRIx64 + " not found, port set failed\n", + cl_ntoh64(osm_physp_get_port_guid(p))); + status = IB_ERROR; + goto EXIT; + } + } + pi->master_sm_base_lid = sm->p_subn->sm_base_lid; + } + + status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p), + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, + cl_hton32(osm_physp_get_port_num(p)), + FALSE, m_key, + 0, CL_DISP_MSGID_NONE, &context); + +EXIT: + return status; +} + +static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, + ib_net16_t source_lid, ib_net64_t trans_id) +{ + if (!OSM_LOG_IS_ACTIVE_V2(p_log, OSM_LOG_ERROR)) + return; + + if (ib_notice_is_generic(p_ntci)) { + char str[32]; + + if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_LINK_INTEGRITY_THRESHOLD_TRAP)) || + (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BUFFER_OVERRUN_THRESHOLD_TRAP)) || + (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_WATCHDOG_TIMER_EXPIRED_TRAP))) + snprintf(str, sizeof(str), " Port %u", + p_ntci->data_details.ntc_129_131.port_num); + else + str[0] = '\0'; + + OSM_LOG(p_log, OSM_LOG_ERROR, + "Received Generic Notice type:%u " + "num:%u (%s) Producer:%u (%s) " + "from LID:%u%s TID:0x%016" PRIx64 "\n", + ib_notice_get_type(p_ntci), + cl_ntoh16(p_ntci->g_or_v.generic.trap_num), + ib_get_trap_str(p_ntci->g_or_v.generic.trap_num), + cl_ntoh32(ib_notice_get_prod_type(p_ntci)), + ib_get_producer_type_str(ib_notice_get_prod_type(p_ntci)), + cl_hton16(source_lid), str, cl_ntoh64(trans_id)); + if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BAD_PKEY_TRAP)) || + (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BAD_QKEY_TRAP))) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "Bad %s_Key:0x%x on SL:%d from " + "LID1:%u QP1:0x%x to " + "LID2:%u QP2:0x%x\n", + (p_ntci->g_or_v.generic.trap_num == CL_HTON16(257)) ? "P" : "Q", + cl_ntoh32(p_ntci->data_details.ntc_257_258.key), + cl_ntoh32(p_ntci->data_details.ntc_257_258.qp1) >> 28, + cl_ntoh16(p_ntci->data_details.ntc_257_258.lid1), + cl_ntoh32(p_ntci->data_details.ntc_257_258.qp1) & 0xfff, + cl_ntoh16(p_ntci->data_details.ntc_257_258.lid2), + cl_ntoh32(p_ntci->data_details.ntc_257_258.qp2)); + } + } else + OSM_LOG(p_log, OSM_LOG_ERROR, + "Received Vendor Notice type:%u vend:0x%06X " + "dev:%u from LID:%u TID:0x%016" PRIx64 "\n", + ib_notice_get_type(p_ntci), + cl_ntoh32(ib_notice_get_vend_id(p_ntci)), + cl_ntoh16(p_ntci->g_or_v.vend.dev_id), + cl_ntoh16(source_lid), cl_ntoh64(trans_id)); +} + +static int shutup_noisy_port(osm_sm_t *sm, ib_net16_t lid, uint8_t port, + unsigned num) +{ + osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port); + if (!p) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: " + "Failed to find physical port by lid:%u num:%u\n", + cl_ntoh16(lid), port); + return -1; + } + + /* When babbling port policy option is enabled and + Threshold for disabling a "babbling" port is exceeded */ + if (sm->p_subn->opt.babbling_port_policy && num >= 250) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Disabling noisy physical port 0x%016" PRIx64 + ": lid %u, num %u\n", + cl_ntoh64(osm_physp_get_port_guid(p)), + cl_ntoh16(lid), port); + if (disable_port(sm, p)) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " + "Failed to disable noisy physical port 0x%016" + PRIx64 ": lid %u, num %u\n", + cl_ntoh64(osm_physp_get_port_guid(p)), + cl_ntoh16(lid), port); + else + return 1; + } + + /* check if the current state of the p_physp is healthy. If + it is - then this is a first change of state. Run a heavy sweep. */ + if (osm_physp_is_healthy(p)) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Marking unhealthy physical port by lid:%u num:%u\n", + cl_ntoh16(lid), port); + osm_physp_set_health(p, FALSE); + return 2; + } + return 0; +} + +static void trap_rcv_process_request(IN osm_sm_t * sm, + IN const osm_madw_t * p_madw) +{ + uint8_t payload[sizeof(ib_mad_notice_attr_t)]; + ib_smp_t *p_smp; + ib_mad_notice_attr_t *p_ntci = (ib_mad_notice_attr_t *) payload; + ib_api_status_t status; + osm_madw_t tmp_madw; /* we need a copy to last after repress */ + uint64_t trap_key; + uint32_t num_received; + osm_physp_t *p_physp; + osm_port_t *p_port; + ib_net16_t source_lid = 0; + boolean_t is_gsi = TRUE; + uint8_t port_num = 0; + boolean_t physp_change_trap = FALSE; + uint64_t event_wheel_timeout = OSM_DEFAULT_TRAP_SUPPRESSION_TIMEOUT; + boolean_t run_heavy_sweep = FALSE; + char buf[1024]; + osm_dr_path_t *p_path; + unsigned n; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + if (osm_exit_flag) + /* + We got an exit flag - do nothing + Otherwise we start a sweep on the trap 144 caused by + cleaning up SM Cap bit... + */ + goto Exit2; + + /* update the is_gsi flag according to the mgmt_class field */ + if (p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_LID || + p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) + is_gsi = FALSE; + + /* No real need to grab the lock for this function. */ + memset(payload, 0, sizeof(payload)); + memset(&tmp_madw, 0, sizeof(tmp_madw)); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + if (p_smp->method != IB_MAD_METHOD_TRAP) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3801: " + "Unsupported method 0x%X\n", p_smp->method); + goto Exit2; + } + + /* + * The NOTICE Attribute is part of the SMP CLASS attributes + * As such the actual attribute data resides inside the SMP + * payload. + */ + + memcpy(payload, &p_smp->data, IB_SMP_DATA_SIZE); + memcpy(&tmp_madw, p_madw, sizeof(tmp_madw)); + + if (is_gsi == FALSE) { + /* We are in smi flow */ + /* + * When we receive a TRAP with dlid = 0 - it means it + * came from our own node. So we need to fix it. + */ + + if (p_madw->mad_addr.addr_type.smi.source_lid == 0) { + /* Check if the sm_base_lid is 0. If yes - this means + that the local lid wasn't configured yet. Don't send + a response to the trap. */ + if (sm->p_subn->sm_base_lid == 0) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Received SLID=0 Trap with local LID=0. Ignoring MAD\n"); + goto Exit2; + } + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Received SLID=0 Trap. Using local LID:%u instead\n", + cl_ntoh16(sm->p_subn->sm_base_lid)); + tmp_madw.mad_addr.addr_type.smi.source_lid = + sm->p_subn->sm_base_lid; + } + + source_lid = tmp_madw.mad_addr.addr_type.smi.source_lid; + + /* Print some info about the incoming Trap */ + log_trap_info(sm->p_log, p_ntci, source_lid, p_smp->trans_id); + } + + osm_dump_notice_v2(sm->p_log, p_ntci, FILE_ID, OSM_LOG_VERBOSE); + CL_PLOCK_ACQUIRE(sm->p_lock); + p_physp = osm_get_physp_by_mad_addr(sm->p_log, sm->p_subn, + &tmp_madw.mad_addr); + if (p_physp) + p_smp->m_key = ib_port_info_get_m_key(&p_physp->port_info); + else + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3809: " + "Failed to find source physical port for trap\n"); + + status = osm_resp_send(sm, &tmp_madw, 0, payload); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3802: " + "Error sending response (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * We would like to filter out recurring Traps so we track them by + * their source lid and content. If the same trap was already + * received within the aging time window more than 10 times, + * we simply ignore it. This is done only if we are in smi mode + */ + + if (is_gsi == FALSE) { + if (ib_notice_is_generic(p_ntci) && + (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_LINK_INTEGRITY_THRESHOLD_TRAP) || + p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BUFFER_OVERRUN_THRESHOLD_TRAP) || + p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_WATCHDOG_TIMER_EXPIRED_TRAP))) { + /* If this is a trap 129, 130, or 131 - then this is a + * trap signaling a change on a physical port. + * Mark the physp_change_trap flag as TRUE. + */ + physp_change_trap = TRUE; + /* The source_lid should be based on the source_lid from the trap */ + source_lid = p_ntci->data_details.ntc_129_131.lid; + port_num = p_ntci->data_details.ntc_129_131.port_num; + } + + /* try to find it in the aging tracker */ + trap_key = trap_get_key(source_lid, port_num, p_ntci); + num_received = cl_event_wheel_num_regs(&sm->trap_aging_tracker, + trap_key); + + /* Now we know how many times it provided this trap */ + if (num_received >= 10) { + if (print_num_received(num_received)) + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Received trap %u times consecutively\n", + num_received); + /* + * If the trap provides info about a bad port + * we mark it as unhealthy. + */ + if (physp_change_trap == TRUE) { + int ret = shutup_noisy_port(sm, source_lid, + port_num, + num_received); + if (ret == 1) /* port disabled */ + goto Exit; + else if (ret == 2) /* unhealthy - run sweep */ + run_heavy_sweep = TRUE; + /* in any case increase timeout interval */ + event_wheel_timeout = + OSM_DEFAULT_UNHEALTHY_TIMEOUT; + } + } + + /* restart the aging anyway */ + /* If physp_change_trap is TRUE - then use a callback to unset + the healthy bit. If not - no need to use a callback. */ + if (physp_change_trap == TRUE) + cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, + cl_get_time_stamp() + event_wheel_timeout, + aging_tracker_callback, sm); + else + cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, + cl_get_time_stamp() + event_wheel_timeout, + NULL, NULL); + + /* If was already registered do nothing more */ + if (num_received >= 10 && run_heavy_sweep == FALSE) { + if (print_num_received(num_received)) + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Ignoring noisy traps.\n"); + goto Exit; + } + } + + /* Check for node description update. IB Spec v1.2.1 pg 823 */ + if (!ib_notice_is_generic(p_ntci)) + goto check_sweep; + if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LOCAL_CHANGES_TRAP && + p_ntci->data_details.ntc_144.local_changes & TRAP_144_MASK_OTHER_LOCAL_CHANGES && + p_ntci->data_details.ntc_144.change_flgs & TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) { + OSM_LOG(sm->p_log, OSM_LOG_INFO, "Trap 144 Node description update\n"); + + if (p_physp) { + osm_req_get_node_desc(sm, p_physp); + if (!(p_ntci->data_details.ntc_144.change_flgs & ~TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) && + p_ntci->data_details.ntc_144.new_cap_mask == p_physp->port_info.capability_mask) + goto check_report; + } else + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3812: No physical port found for " + "trap 144: \"node description update\"\n"); + goto check_sweep; + } else if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_SYS_IMG_GUID_CHANGED_TRAP) { + if (p_physp) { + CL_PLOCK_RELEASE(sm->p_lock); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); + p_physp = osm_get_physp_by_mad_addr(sm->p_log, + sm->p_subn, + &tmp_madw.mad_addr); + if (p_physp) { + /* this assumes that trap 145 content is not broken? */ + p_physp->p_node->node_info.sys_guid = + p_ntci->data_details.ntc_145.new_sys_guid; + } + CL_PLOCK_RELEASE(sm->p_lock); + CL_PLOCK_ACQUIRE(sm->p_lock); + } else + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3813: No physical port found for " + "trap 145: \"SystemImageGUID update\"\n"); + goto check_report; + } + +check_sweep: + if (osm_log_is_active_v2(sm->p_log, OSM_LOG_INFO, FILE_ID)) { + if (ib_notice_is_generic(p_ntci) && + cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LINK_STATE_CHANGED_TRAP) { + p_path = (p_physp) ? + osm_physp_get_dr_path_ptr(p_physp) : NULL; + if (p_path) { + n = sprintf(buf, "SM class trap %u: ", + cl_ntoh16(p_ntci->g_or_v.generic.trap_num)); + n += snprintf(buf + n, sizeof(buf) - n, + "Directed Path Dump of %u hop path: " + "Path = ", p_path->hop_count); + + osm_dump_dr_path_as_buf(sizeof(buf) - n, p_path, + buf + n); + + osm_log_v2(sm->p_log, OSM_LOG_INFO, FILE_ID, + "%s\n", buf); + } + } + } + + /* do a sweep if we received a trap */ + if (sm->p_subn->opt.sweep_on_trap) { + /* if this is trap number 128 or run_heavy_sweep is TRUE - + update the force_heavy_sweep flag of the subnet. + Sweep also on traps 144 - these traps signal a change of + certain port capabilities. + TODO: In the future this can be changed to just getting + PortInfo on this port instead of sweeping the entire subnet. */ + if (ib_notice_is_generic(p_ntci) && + (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LINK_STATE_CHANGED_TRAP || + cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LOCAL_CHANGES_TRAP || + run_heavy_sweep)) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Forcing heavy sweep. Received trap:%u\n", + cl_ntoh16(p_ntci->g_or_v.generic.trap_num)); + + sm->p_subn->force_heavy_sweep = TRUE; + } + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); + } + + /* If we reached here due to trap 129/130/131 - do not need to do + the notice report. Just goto exit. We know this is the case + if physp_change_trap is TRUE. */ + if (physp_change_trap == TRUE) + goto Exit; + +check_report: + /* We are going to report the notice - so need to fix the IssuerGID + accordingly. See IBA 1.2 p.739 or IBA 1.1 p.653 for details. */ + if (is_gsi) { + if (!tmp_madw.mad_addr.addr_type.gsi.global_route) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3806: " + "Received gsi trap with global_route FALSE. " + "Cannot update issuer_gid!\n"); + goto Exit; + } + memcpy(&p_ntci->issuer_gid, + &tmp_madw.mad_addr.addr_type.gsi.grh_info.src_gid, + sizeof(ib_gid_t)); + } else { + /* Need to use the IssuerLID */ + p_port = osm_get_port_by_lid(sm->p_subn, source_lid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Cannot find port corresponding to lid:%u\n", + cl_ntoh16(source_lid)); + + goto Exit; + } + + p_ntci->issuer_gid.unicast.prefix = + sm->p_subn->opt.subnet_prefix; + p_ntci->issuer_gid.unicast.interface_id = p_port->guid; + } + + /* we need a lock here as the InformInfo DB must be stable */ + status = osm_report_notice(sm->p_log, sm->p_subn, p_ntci); + if (status != IB_SUCCESS) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3803: " + "Error sending trap reports (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + CL_PLOCK_RELEASE(sm->p_lock); +Exit2: + OSM_LOG_EXIT(sm->p_log); +} + +void osm_trap_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_smp_t __attribute__((unused)) *p_smp; + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + /* Only Trap requests get here */ + CL_ASSERT(!ib_smp_is_response(p_smp)); + trap_rcv_process_request(sm, p_madw); + + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/osm_ucast_cache.c b/opensm/osm_ucast_cache.c new file mode 100644 index 0000000..3caf318 --- /dev/null +++ b/opensm/osm_ucast_cache.c @@ -0,0 +1,1059 @@ +/* + * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2008-2009 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of OpenSM Cached Unicast Routing + * + * Environment: + * Linux User Mode + * + */ + +#if HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_CACHE_C +#include +#include +#include +#include +#include +#include + +typedef struct cache_port { + boolean_t is_leaf; + uint16_t remote_lid_ho; +} cache_port_t; + +typedef struct cache_switch { + cl_map_item_t map_item; + boolean_t dropped; + uint16_t max_lid_ho; + uint16_t num_hops; + uint8_t **hops; + uint8_t *lft; + uint8_t num_ports; + cache_port_t ports[0]; +} cache_switch_t; + +static uint16_t cache_sw_get_base_lid_ho(cache_switch_t * p_sw) +{ + return p_sw->ports[0].remote_lid_ho; +} + +static boolean_t cache_sw_is_leaf(cache_switch_t * p_sw) +{ + return p_sw->ports[0].is_leaf; +} + +static void cache_sw_set_leaf(cache_switch_t * p_sw) +{ + p_sw->ports[0].is_leaf = TRUE; +} + +static cache_switch_t *cache_sw_new(uint16_t lid_ho, unsigned num_ports) +{ + cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) + + num_ports * sizeof(cache_port_t)); + if (!p_cache_sw) + return NULL; + + memset(p_cache_sw, 0, + sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t)); + + p_cache_sw->num_ports = num_ports; + + /* port[0] fields represent this switch details - lid and type */ + p_cache_sw->ports[0].remote_lid_ho = lid_ho; + p_cache_sw->ports[0].is_leaf = FALSE; + + return p_cache_sw; +} + +static void cache_sw_destroy(cache_switch_t * p_sw) +{ + unsigned i; + + if (!p_sw) + return; + + if (p_sw->lft) + free(p_sw->lft); + if (p_sw->hops) { + for (i = 0; i < p_sw->num_hops; i++) + if (p_sw->hops[i]) + free(p_sw->hops[i]); + free(p_sw->hops); + } + free(p_sw); +} + +static cache_switch_t *cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho) +{ + cache_switch_t *p_cache_sw = (cache_switch_t *) + cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho); + if (p_cache_sw == (cache_switch_t *) + cl_qmap_end(&p_mgr->cache_sw_tbl)) + p_cache_sw = NULL; + + return p_cache_sw; +} + +static void cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p, + uint16_t remote_lid_ho, boolean_t is_ca) +{ + cache_switch_t *p_cache_sw; + uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0)); + + OSM_LOG_ENTER(p_mgr->p_log); + + if (!lid_ho || !remote_lid_ho || !p->port_num) + goto Exit; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Caching switch port: lid %u [port %u] -> lid %u (%s)\n", + lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW"); + + p_cache_sw = cache_get_sw(p_mgr, lid_ho); + if (!p_cache_sw) { + p_cache_sw = cache_sw_new(lid_ho, p->p_node->sw->num_ports); + if (!p_cache_sw) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD01: Out of memory - cache is invalid\n"); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho, + &p_cache_sw->map_item); + } + + if (p->port_num >= p_cache_sw->num_ports) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD02: Wrong switch? - cache is invalid\n"); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if (is_ca) + cache_sw_set_leaf(p_cache_sw); + + if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) { + /* cache this link only if it hasn't been already cached */ + p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho; + p_cache_sw->ports[p->port_num].is_leaf = is_ca; + } +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} + +static void cache_cleanup_switches(osm_ucast_mgr_t * p_mgr) +{ + cache_switch_t *p_sw; + cache_switch_t *p_next_sw; + unsigned port_num; + boolean_t found_port; + + if (!p_mgr->cache_valid) + return; + + p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); + while (p_next_sw != + (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item); + + found_port = FALSE; + for (port_num = 1; port_num < p_sw->num_ports; port_num++) + if (p_sw->ports[port_num].remote_lid_ho) + found_port = TRUE; + + if (!found_port) { + cl_qmap_remove_item(&p_mgr->cache_sw_tbl, + &p_sw->map_item); + cache_sw_destroy(p_sw); + } + } +} + +static void +cache_check_link_change(osm_ucast_mgr_t * p_mgr, + osm_physp_t * p_physp_1, osm_physp_t * p_physp_2) +{ + OSM_LOG_ENTER(p_mgr->p_log); + CL_ASSERT(p_physp_1 && p_physp_2); + + if (!p_mgr->cache_valid) + goto Exit; + + if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp) + /* both ports were down - new link */ + goto Exit; + + /* unicast cache cannot tolerate any link location change */ + + if ((p_physp_1->p_remote_physp && + p_physp_1->p_remote_physp->p_remote_physp) || + (p_physp_2->p_remote_physp && + p_physp_2->p_remote_physp->p_remote_physp)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Link location change discovered\n"); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} + +static void cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho, + uint8_t port_num, uint16_t remote_lid_ho, + boolean_t is_ca) +{ + cache_switch_t *p_cache_sw; + + OSM_LOG_ENTER(p_mgr->p_log); + + if (!p_mgr->cache_valid) + goto Exit; + + p_cache_sw = cache_get_sw(p_mgr, lid_ho); + if (!p_cache_sw) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Found uncached switch/link (lid %u, port %u)\n", + lid_ho, port_num); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if (port_num >= p_cache_sw->num_ports || + !p_cache_sw->ports[port_num].remote_lid_ho) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Found uncached switch link (lid %u, port %u)\n", + lid_ho, port_num); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Remote lid change on switch lid %u, port %u " + "(was %u, now %u)\n", lid_ho, port_num, + p_cache_sw->ports[port_num].remote_lid_ho, + remote_lid_ho); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) || + (!p_cache_sw->ports[port_num].is_leaf && is_ca)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Remote node type change on switch lid %u, port %u\n", + lid_ho, port_num); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "New link from lid %u, port %u to lid %u - " + "found in cache\n", lid_ho, port_num, remote_lid_ho); + + /* the new link was cached - clean it from the cache */ + + p_cache_sw->ports[port_num].remote_lid_ho = 0; + p_cache_sw->ports[port_num].is_leaf = FALSE; +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} /* cache_remove_port() */ + +static void +cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr, + cache_switch_t * p_cache_sw, osm_switch_t * p_sw) +{ + if (!p_mgr->cache_valid) + return; + + /* when seting unicast info, the cached port + should have all the required info */ + CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft && + p_cache_sw->num_hops && p_cache_sw->hops); + + p_sw->max_lid_ho = p_cache_sw->max_lid_ho; + + if (p_sw->new_lft) + free(p_sw->new_lft); + p_sw->new_lft = p_cache_sw->lft; + p_cache_sw->lft = NULL; + + p_sw->num_hops = p_cache_sw->num_hops; + p_cache_sw->num_hops = 0; + if (p_sw->hops) + free(p_sw->hops); + p_sw->hops = p_cache_sw->hops; + p_cache_sw->hops = NULL; + + p_sw->need_update = 2; +} + +static void ucast_cache_dump(osm_ucast_mgr_t * p_mgr) +{ + cache_switch_t *p_sw; + unsigned i; + + OSM_LOG_ENTER(p_mgr->p_log); + + if (!OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) + goto Exit; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Dumping missing nodes/links as logged by unicast cache:\n"); + for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); + p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl); + p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) { + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "\t Switch lid %u %s%s\n", + cache_sw_get_base_lid_ho(p_sw), + (cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "", + (p_sw->dropped) ? "[whole switch missing]" : ""); + + for (i = 1; i < p_sw->num_ports; i++) + if (p_sw->ports[i].remote_lid_ho > 0) + OSM_LOG(p_mgr->p_log, + OSM_LOG_DEBUG, + "\t - port %u -> lid %u %s\n", + i, p_sw->ports[i].remote_lid_ho, + (p_sw->ports[i].is_leaf) ? + "[remote node is leaf]" : ""); + } +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} + +void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr) +{ + cache_switch_t *p_sw; + cache_switch_t *p_next_sw; + + OSM_LOG_ENTER(p_mgr->p_log); + + if (!p_mgr->cache_valid) + goto Exit; + + p_mgr->cache_valid = FALSE; + + p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); + while (p_next_sw != + (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item); + cache_sw_destroy(p_sw); + } + cl_qmap_remove_all(&p_mgr->cache_sw_tbl); + + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Unicast Cache invalidated\n"); +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} + +static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr) +{ + cache_switch_t *p_cache_sw; + cache_switch_t *p_remote_cache_sw; + unsigned port_num; + unsigned max_ports; + uint8_t remote_node_type; + uint16_t lid_ho; + uint16_t remote_lid_ho; + osm_switch_t *p_sw; + osm_switch_t *p_remote_sw; + osm_node_t *p_node; + osm_physp_t *p_physp; + osm_physp_t *p_remote_physp; + osm_port_t *p_remote_port; + cl_qmap_t *p_sw_tbl; + + OSM_LOG_ENTER(p_mgr->p_log); + if (!p_mgr->cache_valid) + goto Exit; + + /* If there are no switches in the subnet, we are done */ + p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + if (cl_qmap_count(p_sw_tbl) == 0) { + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + /* + * Scan all the physical switch ports in the subnet. + * If the port need_update flag is on, check whether + * it's just some node/port reset or a cached topology + * change. Otherwise the cache is invalid. + */ + for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); + p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl); + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) { + + p_node = p_sw->p_node; + + lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0)); + p_cache_sw = cache_get_sw(p_mgr, lid_ho); + + max_ports = osm_node_get_num_physp(p_node); + + /* skip port 0 */ + for (port_num = 1; port_num < max_ports; port_num++) { + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + + if (!p_physp || !p_physp->p_remote_physp || + !osm_physp_link_exists(p_physp, + p_physp->p_remote_physp)) + /* no valid link */ + continue; + + /* + * While scanning all the physical ports in the subnet, + * mark corresponding leaf switches in the cache. + */ + if (p_cache_sw && + !p_cache_sw->dropped && + !cache_sw_is_leaf(p_cache_sw) && + p_physp->p_remote_physp->p_node && + osm_node_get_type(p_physp->p_remote_physp-> + p_node) != IB_NODE_TYPE_SWITCH) + cache_sw_set_leaf(p_cache_sw); + + if (!p_physp->need_update) + continue; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Checking switch lid %u, port %u\n", + lid_ho, port_num); + + p_remote_physp = osm_physp_get_remote(p_physp); + remote_node_type = + osm_node_get_type(p_remote_physp->p_node); + + if (remote_node_type == IB_NODE_TYPE_SWITCH) + remote_lid_ho = + cl_ntoh16(osm_node_get_base_lid + (p_remote_physp->p_node, 0)); + else + remote_lid_ho = + cl_ntoh16(osm_node_get_base_lid + (p_remote_physp->p_node, + osm_physp_get_port_num + (p_remote_physp))); + + if (!p_cache_sw || + port_num >= p_cache_sw->num_ports || + !p_cache_sw->ports[port_num].remote_lid_ho) { + /* + * There is some uncached change on the port. + * In general, the reasons might be as follows: + * - switch reset + * - port reset (or port down/up) + * - quick connection location change + * - new link (or new switch) + * + * First two reasons allow cache usage, while + * the last two reasons should invalidate cache. + * + * In case of quick connection location change, + * cache would have been invalidated by + * osm_ucast_cache_check_new_link() function. + * + * In case of new link between two known nodes, + * cache also would have been invalidated by + * osm_ucast_cache_check_new_link() function. + * + * Another reason is cached link between two + * known switches went back. In this case the + * osm_ucast_cache_check_new_link() function would + * clear both sides of the link from the cache + * during the discovery process, so effectively + * this would be equivalent to port reset. + * + * So three possible reasons remain: + * - switch reset + * - port reset (or port down/up) + * - link of a new switch + * + * To validate cache, we need to check only the + * third reason - link of a new node/switch: + * - If this is the local switch that is new, + * then it should have (p_sw->need_update == 2). + * - If the remote node is switch and it's new, + * then it also should have + * (p_sw->need_update == 2). + * - If the remote node is CA/RTR and it's new, + * then its port should have is_new flag on. + */ + if (p_sw->need_update == 2) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "New switch found (lid %u)\n", + lid_ho); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if (remote_node_type == IB_NODE_TYPE_SWITCH) { + + p_remote_sw = + p_remote_physp->p_node->sw; + if (p_remote_sw->need_update == 2) { + /* this could also be case of + switch coming back with an + additional link that it + didn't have before */ + OSM_LOG(p_mgr->p_log, + OSM_LOG_DEBUG, + "New switch/link found (lid %u)\n", + remote_lid_ho); + osm_ucast_cache_invalidate + (p_mgr); + goto Exit; + } + } else { + /* + * Remote node is CA/RTR. + * Get p_port of the remote node and + * check its p_port->is_new flag. + */ + p_remote_port = + osm_get_port_by_guid(p_mgr->p_subn, + osm_physp_get_port_guid + (p_remote_physp)); + if (!p_remote_port) { + OSM_LOG(p_mgr->p_log, + OSM_LOG_ERROR, + "ERR AD04: No port was found for " + "port GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid + (p_remote_physp))); + osm_ucast_cache_invalidate + (p_mgr); + goto Exit; + } + if (p_remote_port->is_new) { + OSM_LOG(p_mgr->p_log, + OSM_LOG_DEBUG, + "New CA/RTR found (lid %u)\n", + remote_lid_ho); + osm_ucast_cache_invalidate + (p_mgr); + goto Exit; + } + } + } else { + /* + * The change on the port is cached. + * In general, the reasons might be as follows: + * - link between two known nodes went back + * - one or more nodes went back, causing all + * the links to reappear + * + * If it was link that went back, then this case + * would have been taken care of during the + * discovery by osm_ucast_cache_check_new_link(), + * so it's some node that went back. + */ + if ((p_cache_sw->ports[port_num].is_leaf && + remote_node_type == IB_NODE_TYPE_SWITCH) || + (!p_cache_sw->ports[port_num].is_leaf && + remote_node_type != IB_NODE_TYPE_SWITCH)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Remote node type change on switch lid %u, port %u\n", + lid_ho, port_num); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if (p_cache_sw->ports[port_num].remote_lid_ho != + remote_lid_ho) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Remote lid change on switch lid %u, port %u" + "(was %u, now %u)\n", + lid_ho, port_num, + p_cache_sw->ports[port_num]. + remote_lid_ho, remote_lid_ho); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + /* + * We don't care who is the node that has + * reappeared in the subnet (local or remote). + * What's important that the cached link matches + * the real fabrics link. + * Just clean it from cache. + */ + + p_cache_sw->ports[port_num].remote_lid_ho = 0; + p_cache_sw->ports[port_num].is_leaf = FALSE; + if (p_cache_sw->dropped) { + cache_restore_ucast_info(p_mgr, + p_cache_sw, + p_sw); + p_cache_sw->dropped = FALSE; + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Restored link from cache: lid %u, port %u to lid %u\n", + lid_ho, port_num, remote_lid_ho); + } + } + } + + /* Remove all the cached switches that + have all their ports restored */ + cache_cleanup_switches(p_mgr); + + /* + * Done scanning all the physical switch ports in the subnet. + * Now we need to check the other side: + * Scan all the cached switches and their ports: + * - If the cached switch is missing in the subnet + * (dropped flag is on), check that it's a leaf switch. + * If it's not a leaf, the cache is invalid, because + * cache can tolerate only leaf switch removal. + * - If the cached switch exists in fabric, check all + * its cached ports. These cached ports represent + * missing link in the fabric. + * The missing links that can be tolerated are: + * + link to missing CA/RTR + * + link to missing leaf switch + */ + for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); + p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl); + p_cache_sw = + (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) { + + if (p_cache_sw->dropped) { + if (!cache_sw_is_leaf(p_cache_sw)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Missing non-leaf switch (lid %u)\n", + cache_sw_get_base_lid_ho(p_cache_sw)); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Missing leaf switch (lid %u) - " + "continuing validation\n", + cache_sw_get_base_lid_ho(p_cache_sw)); + continue; + } + + for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) { + if (!p_cache_sw->ports[port_num].remote_lid_ho) + continue; + + if (p_cache_sw->ports[port_num].is_leaf) { + CL_ASSERT(cache_sw_is_leaf(p_cache_sw)); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Switch lid %u, port %u: missing link to CA/RTR - " + "continuing validation\n", + cache_sw_get_base_lid_ho(p_cache_sw), + port_num); + continue; + } + + p_remote_cache_sw = cache_get_sw(p_mgr, + p_cache_sw-> + ports[port_num]. + remote_lid_ho); + + if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Switch lid %u, port %u: missing link to existing switch\n", + cache_sw_get_base_lid_ho(p_cache_sw), + port_num); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if (!cache_sw_is_leaf(p_remote_cache_sw)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Switch lid %u, port %u: missing link to non-leaf switch\n", + cache_sw_get_base_lid_ho(p_cache_sw), + port_num); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + /* + * At this point we know that the missing link is to + * a leaf switch. However, one case deserves a special + * treatment. If there was a link between two leaf + * switches, then missing leaf switch might break + * routing. It is possible that there are routes + * that use leaf switches to get from switch to switch + * and not just to get to the CAs behind the leaf switch. + */ + if (cache_sw_is_leaf(p_cache_sw) && + cache_sw_is_leaf(p_remote_cache_sw)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Switch lid %u, port %u: missing leaf-2-leaf link\n", + cache_sw_get_base_lid_ho(p_cache_sw), + port_num); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Switch lid %u, port %u: missing remote leaf switch - " + "continuing validation\n", + cache_sw_get_base_lid_ho(p_cache_sw), + port_num); + } + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n"); + ucast_cache_dump(p_mgr); +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} /* osm_ucast_cache_validate() */ + +void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr, + osm_node_t * p_node_1, uint8_t port_num_1, + osm_node_t * p_node_2, uint8_t port_num_2) +{ + uint16_t lid_ho_1; + uint16_t lid_ho_2; + + OSM_LOG_ENTER(p_mgr->p_log); + + if (!p_mgr->cache_valid) + goto Exit; + + cache_check_link_change(p_mgr, + osm_node_get_physp_ptr(p_node_1, port_num_1), + osm_node_get_physp_ptr(p_node_2, port_num_2)); + + if (!p_mgr->cache_valid) + goto Exit; + + if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH && + osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Found CA-2-CA link\n"); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + /* for code simplicity, we want the first node to be switch */ + if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) { + osm_node_t *tmp_node = p_node_1; + uint8_t tmp_port_num = port_num_1; + p_node_1 = p_node_2; + port_num_1 = port_num_2; + p_node_2 = tmp_node; + port_num_2 = tmp_port_num; + } + + lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0)); + + if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) + lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0)); + else + lid_ho_2 = + cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2)); + + if (!lid_ho_1 || !lid_ho_2) { + /* + * No lid assigned, which means that one of the nodes is new. + * Need to wait for lid manager to process this node. + * The switches and their links will be checked later when + * the whole cache validity will be verified. + */ + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Link port %u <-> %u reveals new node - cache will " + "be validated later\n", port_num_1, port_num_2); + goto Exit; + } + + cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2, + (osm_node_get_type(p_node_2) != + IB_NODE_TYPE_SWITCH)); + + /* if node_2 is a switch, the link should be cleaned from its cache */ + + if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) + cache_remove_port(p_mgr, lid_ho_2, + port_num_2, lid_ho_1, FALSE); + +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} /* osm_ucast_cache_check_new_link() */ + +void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr, + osm_physp_t * p_physp1, osm_physp_t * p_physp2) +{ + osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node; + uint16_t lid_ho_1, lid_ho_2; + + OSM_LOG_ENTER(p_mgr->p_log); + + if (!p_mgr->cache_valid) + goto Exit; + + if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH && + osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Dropping CA-2-CA link\n"); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH && + !osm_node_get_physp_ptr(p_node_1, 0)) || + (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH && + !osm_node_get_physp_ptr(p_node_2, 0))) { + /* we're caching a link when one of the nodes + has already been dropped and cached */ + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Port %u <-> port %u: port0 on one of the nodes " + "has already been dropped and cached\n", + p_physp1->port_num, p_physp2->port_num); + goto Exit; + } + + /* One of the nodes is switch. Just for code + simplicity, make sure that it's the first node. */ + + if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) { + osm_physp_t *tmp = p_physp1; + p_physp1 = p_physp2; + p_physp2 = tmp; + p_node_1 = p_physp1->p_node; + p_node_2 = p_physp2->p_node; + } + + if (!p_node_1->sw) { + /* something is wrong - we'd better not use cache */ + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0)); + + if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) { + + if (!p_node_2->sw) { + /* something is wrong - we'd better not use cache */ + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0)); + + /* lost switch-2-switch link - cache both sides */ + cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE); + cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE); + } else { + lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2)); + + /* lost link to CA/RTR - cache only switch side */ + cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE); + } + +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} /* osm_ucast_cache_add_link() */ + +void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node) +{ + uint16_t lid_ho; + uint8_t max_ports; + uint8_t port_num; + osm_physp_t *p_physp; + cache_switch_t *p_cache_sw; + + OSM_LOG_ENTER(p_mgr->p_log); + + if (!p_mgr->cache_valid) + goto Exit; + + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { + + lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0)); + + if (!lid_ho) { + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Skip caching. Switch dropped before " + "it gets a valid lid.\n"); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Caching dropped switch lid %u\n", lid_ho); + + if (!p_node->sw) { + /* something is wrong - forget about cache */ + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD03: no switch info for node lid %u - " + "clearing cache\n", lid_ho); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + /* unlink (add to cache) all the ports of this switch */ + max_ports = osm_node_get_num_physp(p_node); + for (port_num = 1; port_num < max_ports; port_num++) { + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp || !p_physp->p_remote_physp) + continue; + + osm_ucast_cache_add_link(p_mgr, p_physp, + p_physp->p_remote_physp); + } + + /* + * All the ports have been dropped (cached). + * If one of the ports was connected to CA/RTR, + * then the cached switch would be marked as leaf. + * If it isn't, then the dropped switch isn't a leaf, + * and cache can't handle it. + */ + + p_cache_sw = cache_get_sw(p_mgr, lid_ho); + + /* p_cache_sw could be NULL if it has no remote phys ports */ + if (!p_cache_sw || !cache_sw_is_leaf(p_cache_sw)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Dropped non-leaf switch (lid %u)\n", lid_ho); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + p_cache_sw->dropped = TRUE; + + if (!p_node->sw->num_hops || !p_node->sw->hops) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "No LID matrices for switch lid %u\n", lid_ho); + osm_ucast_cache_invalidate(p_mgr); + goto Exit; + } + + /* lid matrices */ + + p_cache_sw->num_hops = p_node->sw->num_hops; + p_node->sw->num_hops = 0; + p_cache_sw->hops = p_node->sw->hops; + p_node->sw->hops = NULL; + + /* linear forwarding table */ + + if (p_node->sw->new_lft) { + /* LFT buffer exists - we use it, because + it is more updated than the switch's LFT */ + p_cache_sw->lft = p_node->sw->new_lft; + p_node->sw->new_lft = NULL; + } else { + /* no LFT buffer, so we use the switch's LFT */ + p_cache_sw->lft = p_node->sw->lft; + p_node->sw->lft = NULL; + p_node->sw->lft_size = 0; + } + p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho; + } else { + /* dropping CA/RTR: add to cache all the ports of this node */ + max_ports = osm_node_get_num_physp(p_node); + for (port_num = 1; port_num < max_ports; port_num++) { + + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp || !p_physp->p_remote_physp) + continue; + + CL_ASSERT(osm_node_get_type + (p_physp->p_remote_physp->p_node) == + IB_NODE_TYPE_SWITCH); + + osm_ucast_cache_add_link(p_mgr, + p_physp->p_remote_physp, + p_physp); + } + } +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} /* osm_ucast_cache_add_node() */ + +int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr) +{ + cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl; + cl_map_item_t *item; + osm_switch_t *p_sw; + uint16_t lft_size; + + if (!p_mgr->p_subn->opt.use_ucast_cache) + return 1; + + ucast_cache_validate(p_mgr); + if (!p_mgr->cache_valid) + return 1; + + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "Configuring switch tables using cached routing\n"); + + for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *) item; + CL_ASSERT(p_sw->new_lft); + if (!p_sw->lft) { + lft_size = (p_sw->max_lid_ho / IB_SMP_DATA_SIZE + 1) + * IB_SMP_DATA_SIZE; + p_sw->lft = malloc(lft_size); + if (!p_sw->lft) + return IB_INSUFFICIENT_MEMORY; + p_sw->lft_size = lft_size; + memset(p_sw->lft, OSM_NO_PATH, p_sw->lft_size); + } + + } + + osm_ucast_mgr_set_fwd_tables(p_mgr); + + return 0; +} diff --git a/opensm/osm_ucast_dfsssp.c b/opensm/osm_ucast_dfsssp.c new file mode 100644 index 0000000..068acfa --- /dev/null +++ b/opensm/osm_ucast_dfsssp.c @@ -0,0 +1,2780 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009-2015 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of OpenSM (deadlock-free) single-source-shortest-path routing + * (with dijkstra algorithm) + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_DFSSSP_C +#include +#include +#include +#include +#include + +/* "infinity" for dijkstra */ +#define INF 0x7FFFFFFF + +enum { + UNDISCOVERED = 0, + DISCOVERED +}; + +enum { + UNKNOWN = 0, + GRAY, + BLACK, +}; + +typedef struct link { + uint64_t guid; /* guid of the neighbor behind the link */ + uint32_t from; /* base_index in the adjazenz list (start of the link) */ + uint8_t from_port; /* port on the base_side (needed for weight update to identify the correct link for multigraphs) */ + uint32_t to; /* index of the neighbor in the adjazenz list (end of the link) */ + uint8_t to_port; /* port on the side of the neighbor (needed for the LFT) */ + uint64_t weight; /* link weight */ + struct link *next; +} link_t; + +typedef struct vertex { + /* informations of the fabric */ + uint64_t guid; + uint16_t lid; /* for lft filling */ + uint32_t num_hca; /* numbers of Hca/LIDs on the switch, for weight calculation */ + link_t *links; + uint8_t hops; + /* for dijkstra routing */ + link_t *used_link; /* link between the vertex discovered before and this vertex */ + uint64_t distance; /* distance from source to this vertex */ + uint8_t state; + /* for the d-ary heap */ + size_t heap_index; + /* for LFT writing and debug */ + osm_switch_t *sw; /* selfpointer */ + boolean_t dropped; /* indicate dropped switches (w/ ucast cache) */ +} vertex_t; + +typedef struct vltable { + uint64_t num_lids; /* size of the lids array */ + uint16_t *lids; /* sorted array of all lids in the subnet */ + uint8_t *vls; /* matrix form assignment lid X lid -> virtual lane */ +} vltable_t; + +typedef struct cdg_link { + struct cdg_node *node; + uint32_t num_pairs; /* number of src->dest pairs incremented in path adding step */ + uint32_t max_len; /* length of the srcdest array */ + uint32_t removed; /* number of pairs removed in path deletion step */ + uint32_t *srcdest_pairs; + struct cdg_link *next; +} cdg_link_t; + +/* struct for a node of a binary tree with additional parent pointer */ +typedef struct cdg_node { + uint64_t channelID; /* unique key consist of src lid + port + dest lid + port */ + cdg_link_t *linklist; /* edges to adjazent nodes */ + uint8_t status; /* node status in cycle search to avoid recursive function */ + uint8_t visited; /* needed to traverse the binary tree */ + struct cdg_node *pre; /* to save the path in cycle detection algorithm */ + struct cdg_node *left, *right, *parent; +} cdg_node_t; + +typedef struct dfsssp_context { + osm_routing_engine_type_t routing_type; + osm_ucast_mgr_t *p_mgr; + vertex_t *adj_list; + uint32_t adj_list_size; + vltable_t *srcdest2vl_table; + uint8_t *vl_split_count; +} dfsssp_context_t; + +/**************** set initial values for structs ********************** + **********************************************************************/ +static inline void set_default_link(link_t * link) +{ + link->guid = 0; + link->from = 0; + link->from_port = 0; + link->to = 0; + link->to_port = 0; + link->weight = 0; + link->next = NULL; +} + +static inline void set_default_vertex(vertex_t * vertex) +{ + vertex->guid = 0; + vertex->lid = 0; + vertex->num_hca = 0; + vertex->links = NULL; + vertex->hops = 0; + vertex->used_link = NULL; + vertex->distance = 0; + vertex->state = UNDISCOVERED; + vertex->heap_index = 0; + vertex->sw = NULL; + vertex->dropped = FALSE; +} + +static inline void set_default_cdg_node(cdg_node_t * node) +{ + node->channelID = 0; + node->linklist = NULL; + node->status = UNKNOWN; + node->visited = 0; + node->pre = NULL; + node->left = NULL; + node->right = NULL; + node->parent = NULL; +} + +/********************************************************************** + **********************************************************************/ + +/************ helper functions to save src/dest X vl combination ****** + **********************************************************************/ +/* compare function of two lids for stdlib qsort */ +static int cmp_lids(const void *l1, const void *l2) +{ + ib_net16_t lid1 = *((ib_net16_t *) l1), lid2 = *((ib_net16_t *) l2); + + if (lid1 < lid2) + return -1; + else if (lid1 > lid2) + return 1; + else + return 0; +} + +/* use stdlib to sort the lid array */ +static inline void vltable_sort_lids(vltable_t * vltable) +{ + qsort(vltable->lids, vltable->num_lids, sizeof(ib_net16_t), cmp_lids); +} + +/* use stdlib to get index of key in lid array; + return -1 if lid isn't found in lids array +*/ +static inline int64_t vltable_get_lidindex(ib_net16_t * key, vltable_t * vltable) +{ + ib_net16_t *found_lid = NULL; + + found_lid = + (ib_net16_t *) bsearch(key, vltable->lids, vltable->num_lids, + sizeof(ib_net16_t), cmp_lids); + if (found_lid) + return found_lid - vltable->lids; + else + return -1; +} + +/* get virtual lane from src lid X dest lid combination; + return -1 for invalid lids +*/ +static int32_t vltable_get_vl(vltable_t * vltable, ib_net16_t slid, ib_net16_t dlid) +{ + int64_t ind1 = vltable_get_lidindex(&slid, vltable); + int64_t ind2 = vltable_get_lidindex(&dlid, vltable); + + if (ind1 > -1 && ind2 > -1) + return (int32_t) (vltable-> + vls[ind1 + ind2 * vltable->num_lids]); + else + return -1; +} + +/* set a virtual lane in the matrix */ +static inline void vltable_insert(vltable_t * vltable, ib_net16_t slid, + ib_net16_t dlid, uint8_t vl) +{ + int64_t ind1 = vltable_get_lidindex(&slid, vltable); + int64_t ind2 = vltable_get_lidindex(&dlid, vltable); + + if (ind1 > -1 && ind2 > -1) + vltable->vls[ind1 + ind2 * vltable->num_lids] = vl; +} + +/* change a number of lanes from lane xy to lane yz */ +static void vltable_change_vl(vltable_t * vltable, uint8_t from, uint8_t to, + uint64_t count) +{ + uint64_t set = 0, stop = 0; + uint64_t ind1 = 0, ind2 = 0; + + for (ind1 = 0; ind1 < vltable->num_lids; ind1++) { + for (ind2 = 0; ind2 < vltable->num_lids; ind2++) { + if (set == count) { + stop = 1; + break; + } + if (ind1 != ind2) { + if (vltable-> + vls[ind1 + ind2 * vltable->num_lids] == + from) { + vltable->vls[ind1 + + ind2 * vltable->num_lids] = + to; + set++; + } + } + } + if (stop) + break; + } +} + +static void vltable_print(osm_ucast_mgr_t * p_mgr, vltable_t * vltable) +{ + uint64_t ind1 = 0, ind2 = 0; + + for (ind1 = 0; ind1 < vltable->num_lids; ind1++) { + for (ind2 = 0; ind2 < vltable->num_lids; ind2++) { + if (ind1 != ind2) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " route from src_lid=%" PRIu16 + " to dest_lid=%" PRIu16 " on vl=%" PRIu8 + "\n", cl_ntoh16(vltable->lids[ind1]), + cl_ntoh16(vltable->lids[ind2]), + vltable->vls[ind1 + + ind2 * vltable->num_lids]); + } + } + } +} + +static void vltable_dealloc(vltable_t ** vltable) +{ + if (*vltable) { + if ((*vltable)->lids) + free((*vltable)->lids); + if ((*vltable)->vls) + free((*vltable)->vls); + free(*vltable); + *vltable = NULL; + } +} + +static int vltable_alloc(vltable_t ** vltable, uint64_t size) +{ + /* allocate VL table and indexing array */ + *vltable = (vltable_t *) malloc(sizeof(vltable_t)); + if (!(*vltable)) + goto ERROR; + (*vltable)->num_lids = size; + (*vltable)->lids = (ib_net16_t *) malloc(size * sizeof(ib_net16_t)); + if (!((*vltable)->lids)) + goto ERROR; + (*vltable)->vls = (uint8_t *) malloc(size * size * sizeof(uint8_t)); + if (!((*vltable)->vls)) + goto ERROR; + memset((*vltable)->vls, OSM_DEFAULT_SL, size * size); + + return 0; + +ERROR: + vltable_dealloc(vltable); + + return 1; +} + +/********************************************************************** + **********************************************************************/ + +/************ helper functions to save/manage the channel dep. graph ** + **********************************************************************/ +/* update the srcdest array; + realloc array (double the size) if size is not large enough +*/ +static void set_next_srcdest_pair(cdg_link_t * link, uint32_t srcdest) +{ + uint32_t new_size = 0, start_size = 2; + uint32_t *tmp = NULL, *tmp2 = NULL; + + if (link->num_pairs == 0) { + link->srcdest_pairs = + (uint32_t *) malloc(start_size * sizeof(uint32_t)); + link->srcdest_pairs[link->num_pairs] = srcdest; + link->max_len = start_size; + link->removed = 0; + } else if (link->num_pairs == link->max_len) { + new_size = link->max_len << 1; + tmp = (uint32_t *) malloc(new_size * sizeof(uint32_t)); + tmp = + memcpy(tmp, link->srcdest_pairs, + link->max_len * sizeof(uint32_t)); + tmp2 = link->srcdest_pairs; + link->srcdest_pairs = tmp; + link->srcdest_pairs[link->num_pairs] = srcdest; + free(tmp2); + link->max_len = new_size; + } else { + link->srcdest_pairs[link->num_pairs] = srcdest; + } + link->num_pairs++; +} + +static inline uint32_t get_next_srcdest_pair(cdg_link_t * link, uint32_t index) +{ + return link->srcdest_pairs[index]; +} + +/* traverse binary tree to find a node */ +static cdg_node_t *cdg_search(cdg_node_t * root, uint64_t channelID) +{ + while (root) { + if (channelID < root->channelID) + root = root->left; + else if (channelID > root->channelID) + root = root->right; + else if (channelID == root->channelID) + return root; + } + return NULL; +} + +/* insert new node into the binary tree */ +static void cdg_insert(cdg_node_t ** root, cdg_node_t * new_node) +{ + cdg_node_t *current = *root; + + if (!current) { + current = new_node; + *root = current; + return; + } + + while (current) { + if (new_node->channelID < current->channelID) { + if (current->left) { + current = current->left; + } else { + current->left = new_node; + new_node->parent = current; + break; + } + } else if (new_node->channelID > current->channelID) { + if (current->right) { + current = current->right; + } else { + current->right = new_node; + new_node->parent = current; + break; + } + } else if (new_node->channelID == current->channelID) { + /* not really possible, maybe programming error */ + break; + } + } +} + +static void cdg_node_dealloc(cdg_node_t * node) +{ + cdg_link_t *link = node->linklist, *tmp = NULL; + + /* dealloc linklist */ + while (link) { + tmp = link; + link = link->next; + + if (tmp->num_pairs) + free(tmp->srcdest_pairs); + free(tmp); + } + /* dealloc node */ + free(node); +} + +static void cdg_dealloc(cdg_node_t ** root) +{ + cdg_node_t *current = *root; + + while (current) { + if (current->left) { + current = current->left; + } else if (current->right) { + current = current->right; + } else { + if (current->parent == NULL) { + cdg_node_dealloc(current); + *root = NULL; + break; + } + if (current->parent->left == current) { + current = current->parent; + cdg_node_dealloc(current->left); + current->left = NULL; + } else if (current->parent->right == current) { + current = current->parent; + cdg_node_dealloc(current->right); + current->right = NULL; + } + } + } +} + +/* search for a edge in the cdg which should be removed to break a cycle */ +static cdg_link_t *get_weakest_link_in_cycle(cdg_node_t * cycle) +{ + cdg_node_t *current = cycle, *node_with_weakest_link = NULL; + cdg_link_t *link = NULL, *weakest_link = NULL; + + link = current->linklist; + while (link) { + if (link->node->status == GRAY) { + weakest_link = link; + node_with_weakest_link = current; + current = link->node; + break; + } + link = link->next; + } + + while (1) { + current->status = UNKNOWN; + link = current->linklist; + while (link) { + if (link->node->status == GRAY) { + if ((link->num_pairs - link->removed) < + (weakest_link->num_pairs - + weakest_link->removed)) { + weakest_link = link; + node_with_weakest_link = current; + } + current = link->node; + break; + } + link = link->next; + } + /* if complete cycle is traversed */ + if (current == cycle) { + current->status = UNKNOWN; + break; + } + } + + if (node_with_weakest_link->linklist == weakest_link) { + node_with_weakest_link->linklist = weakest_link->next; + } else { + link = node_with_weakest_link->linklist; + while (link) { + if (link->next == weakest_link) { + link->next = weakest_link->next; + break; + } + link = link->next; + } + } + + return weakest_link; +} + +/* search for nodes in the cdg not yet reached in the cycle search process; + (some nodes are unreachable, e.g. a node is a source or the cdg has not connected parts) +*/ +static cdg_node_t *get_next_cdg_node(cdg_node_t * root) +{ + cdg_node_t *current = root, *res = NULL; + + while (current) { + current->visited = 1; + if (current->status == UNKNOWN) { + res = current; + break; + } + if (current->left && !current->left->visited) { + current = current->left; + } else if (current->right && !current->right->visited) { + current = current->right; + } else { + if (current->left) + current->left->visited = 0; + if (current->right) + current->right->visited = 0; + if (current->parent == NULL) + break; + else + current = current->parent; + } + } + + /* Clean up */ + while (current) { + current->visited = 0; + if (current->left) + current->left->visited = 0; + if (current->right) + current->right->visited = 0; + current = current->parent; + } + + return res; +} + +/* make a DFS on the cdg to check for a cycle */ +static cdg_node_t *search_cycle_in_channel_dep_graph(cdg_node_t * cdg, + cdg_node_t * start_node) +{ + cdg_node_t *cycle = NULL; + cdg_node_t *current = start_node, *next_node = NULL, *tmp = NULL; + cdg_link_t *link = NULL; + + while (current) { + current->status = GRAY; + link = current->linklist; + next_node = NULL; + while (link) { + if (link->node->status == UNKNOWN) { + next_node = link->node; + break; + } + if (link->node->status == GRAY) { + cycle = link->node; + goto Exit; + } + link = link->next; + } + if (next_node) { + next_node->pre = current; + current = next_node; + } else { + /* found a sink in the graph, go to last node */ + current->status = BLACK; + + /* srcdest_pairs of this node aren't relevant, free the allocated memory */ + link = current->linklist; + while (link) { + if (link->num_pairs) + free(link->srcdest_pairs); + link->srcdest_pairs = NULL; + link->num_pairs = 0; + link->removed = 0; + link = link->next; + } + + if (current->pre) { + tmp = current; + current = current->pre; + tmp->pre = NULL; + } else { + /* search for other subgraphs in cdg */ + current = get_next_cdg_node(cdg); + if (!current) + break; /* all relevant nodes traversed, no more cycles found */ + } + } + } + +Exit: + return cycle; +} + +/* calculate the path from source to destination port; + new channels are added directly to the cdg +*/ +static int update_channel_dep_graph(cdg_node_t ** cdg_root, + osm_port_t * src_port, uint16_t slid, + osm_port_t * dest_port, uint16_t dlid) +{ + osm_node_t *local_node = NULL, *remote_node = NULL; + uint16_t local_lid = 0, remote_lid = 0; + uint32_t srcdest = 0; + uint8_t local_port = 0, remote_port = 0; + uint64_t channelID = 0; + + cdg_node_t *channel_head = NULL, *channel = NULL, *last_channel = NULL; + cdg_link_t *linklist = NULL; + + /* set the identifier for the src/dest pair to save this on each edge of the cdg */ + srcdest = (((uint32_t) slid) << 16) + ((uint32_t) dlid); + + channel_head = (cdg_node_t *) malloc(sizeof(cdg_node_t)); + if (!channel_head) + goto ERROR; + set_default_cdg_node(channel_head); + last_channel = channel_head; + + /* if src is a Hca, then the channel from Hca to switch would be a source in the graph + sources can't be part of a cycle -> skip this channel + */ + remote_node = + osm_node_get_remote_node(src_port->p_node, + src_port->p_physp->port_num, &remote_port); + + while (remote_node && remote_node->sw) { + local_node = remote_node; + local_port = local_node->sw->new_lft[dlid]; + /* sanity check: local_port must be set or routing is broken */ + if (local_port == OSM_NO_PATH) + goto ERROR; + local_lid = cl_ntoh16(osm_node_get_base_lid(local_node, 0)); + /* each port belonging to a switch has lmc==0 -> get_base_lid is fine + (local/remote port in this function are always part of a switch) + */ + + remote_node = + osm_node_get_remote_node(local_node, local_port, + &remote_port); + /* if remote_node is a Hca, then the last channel from switch to Hca would be a sink in the cdg -> skip */ + if (!remote_node || !remote_node->sw) + break; + remote_lid = cl_ntoh16(osm_node_get_base_lid(remote_node, 0)); + + channelID = + (((uint64_t) local_lid) << 48) + + (((uint64_t) local_port) << 32) + + (((uint64_t) remote_lid) << 16) + ((uint64_t) remote_port); + channel = cdg_search(*cdg_root, channelID); + if (channel) { + /* check whether last channel has connection to this channel, i.e. subpath already exists in cdg */ + linklist = last_channel->linklist; + while (linklist && linklist->node != channel + && linklist->next) + linklist = linklist->next; + /* if there is no connection, add one */ + if (linklist) { + if (linklist->node == channel) { + set_next_srcdest_pair(linklist, + srcdest); + } else { + linklist->next = + (cdg_link_t *) + malloc(sizeof(cdg_link_t)); + if (!linklist->next) + goto ERROR; + linklist = linklist->next; + linklist->node = channel; + linklist->num_pairs = 0; + linklist->srcdest_pairs = NULL; + set_next_srcdest_pair(linklist, + srcdest); + linklist->next = NULL; + } + } else { + /* either this is the first channel of the path, or the last channel was a new channel, or last channel was a sink */ + last_channel->linklist = + (cdg_link_t *) malloc(sizeof(cdg_link_t)); + if (!last_channel->linklist) + goto ERROR; + last_channel->linklist->node = channel; + last_channel->linklist->num_pairs = 0; + last_channel->linklist->srcdest_pairs = NULL; + set_next_srcdest_pair(last_channel->linklist, + srcdest); + last_channel->linklist->next = NULL; + } + } else { + /* create new channel */ + channel = (cdg_node_t *) malloc(sizeof(cdg_node_t)); + if (!channel) + goto ERROR; + set_default_cdg_node(channel); + channel->channelID = channelID; + cdg_insert(cdg_root, channel); + + /* go to end of link list of last channel */ + linklist = last_channel->linklist; + while (linklist && linklist->next) + linklist = linklist->next; + if (linklist) { + /* update last link of an existing channel */ + linklist->next = + (cdg_link_t *) malloc(sizeof(cdg_link_t)); + if (!linklist->next) + goto ERROR; + linklist = linklist->next; + linklist->node = channel; + linklist->num_pairs = 0; + linklist->srcdest_pairs = NULL; + set_next_srcdest_pair(linklist, srcdest); + linklist->next = NULL; + } else { + /* either this is the first channel of the path, or the last channel was a new channel, or last channel was a sink */ + last_channel->linklist = + (cdg_link_t *) malloc(sizeof(cdg_link_t)); + if (!last_channel->linklist) + goto ERROR; + last_channel->linklist->node = channel; + last_channel->linklist->num_pairs = 0; + last_channel->linklist->srcdest_pairs = NULL; + set_next_srcdest_pair(last_channel->linklist, + srcdest); + last_channel->linklist->next = NULL; + } + } + last_channel = channel; + } + + if (channel_head->linklist) { + if (channel_head->linklist->srcdest_pairs) + free(channel_head->linklist->srcdest_pairs); + free(channel_head->linklist); + } + free(channel_head); + + return 0; + +ERROR: + /* cleanup data and exit */ + if (channel_head) { + if (channel_head->linklist) + free(channel_head->linklist); + free(channel_head); + } + + return 1; +} + +/* calculate the path from source to destination port; + the links in the cdg representing this path are decremented to simulate the removal +*/ +static int remove_path_from_cdg(cdg_node_t ** cdg_root, osm_port_t * src_port, + uint16_t slid, osm_port_t * dest_port, + uint16_t dlid) +{ + osm_node_t *local_node = NULL, *remote_node = NULL; + uint16_t local_lid = 0, remote_lid = 0; + uint8_t local_port = 0, remote_port = 0; + uint64_t channelID = 0; + + cdg_node_t *channel_head = NULL, *channel = NULL, *last_channel = NULL; + cdg_link_t *linklist = NULL; + + channel_head = (cdg_node_t *) malloc(sizeof(cdg_node_t)); + if (!channel_head) + goto ERROR; + set_default_cdg_node(channel_head); + last_channel = channel_head; + + /* if src is a Hca, then the channel from Hca to switch would be a source in the graph + sources can't be part of a cycle -> skip this channel + */ + remote_node = + osm_node_get_remote_node(src_port->p_node, + src_port->p_physp->port_num, &remote_port); + + while (remote_node && remote_node->sw) { + local_node = remote_node; + local_port = local_node->sw->new_lft[dlid]; + /* sanity check: local_port must be set or routing is broken */ + if (local_port == OSM_NO_PATH) + goto ERROR; + local_lid = cl_ntoh16(osm_node_get_base_lid(local_node, 0)); + + remote_node = + osm_node_get_remote_node(local_node, local_port, + &remote_port); + /* if remote_node is a Hca, then the last channel from switch to Hca would be a sink in the cdg -> skip */ + if (!remote_node || !remote_node->sw) + break; + remote_lid = cl_ntoh16(osm_node_get_base_lid(remote_node, 0)); + + channelID = + (((uint64_t) local_lid) << 48) + + (((uint64_t) local_port) << 32) + + (((uint64_t) remote_lid) << 16) + ((uint64_t) remote_port); + channel = cdg_search(*cdg_root, channelID); + if (channel) { + /* check whether last channel has connection to this channel, i.e. subpath already exists in cdg */ + linklist = last_channel->linklist; + while (linklist && linklist->node != channel + && linklist->next) + linklist = linklist->next; + /* remove the srcdest from the link */ + if (linklist) { + if (linklist->node == channel) { + linklist->removed++; + } else { + /* may happen if the link is missing (thru cycle detect algorithm) */ + } + } else { + /* may happen if the link is missing (thru cycle detect algorithm or last_channel==channel_head (dummy channel)) */ + } + } else { + /* must be an error, channels for the path are added before, so a missing channel would be a corrupt data structure */ + goto ERROR; + } + last_channel = channel; + } + + if (channel_head->linklist) + free(channel_head->linklist); + free(channel_head); + + return 0; + +ERROR: + /* cleanup data and exit */ + if (channel_head) { + if (channel_head->linklist) + free(channel_head->linklist); + free(channel_head); + } + + return 1; +} + +/********************************************************************** + **********************************************************************/ + +/************ helper functions to generate an ordered list of ports *** + ************ (functions copied from osm_ucast_mgr.c and modified) **** + **********************************************************************/ +static void add_sw_endports_to_order_list(osm_switch_t * sw, + osm_ucast_mgr_t * m, + cl_qmap_t * guid_tbl, + boolean_t add_guids) +{ + osm_port_t *port; + ib_net64_t port_guid; + uint64_t sw_guid; + osm_physp_t *p; + int i; + boolean_t found; + + for (i = 1; i < sw->num_ports; i++) { + p = osm_node_get_physp_ptr(sw->p_node, i); + if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw) { + port_guid = p->p_remote_physp->port_guid; + /* check if link is healthy, otherwise ignore CA */ + if (!osm_link_is_healthy(p)) { + sw_guid = + cl_ntoh64(osm_node_get_node_guid + (sw->p_node)); + OSM_LOG(m->p_log, OSM_LOG_INFO, + "WRN AD40: ignoring CA due to unhealthy" + " link from switch 0x%016" PRIx64 + " port %" PRIu8 " to CA 0x%016" PRIx64 + "\n", sw_guid, i, cl_ntoh64(port_guid)); + } + port = osm_get_port_by_guid(m->p_subn, port_guid); + if (!port) + continue; + if (!cl_is_qmap_empty(guid_tbl)) { + found = (cl_qmap_get(guid_tbl, port_guid) + != cl_qmap_end(guid_tbl)); + if ((add_guids && !found) + || (!add_guids && found)) + continue; + } + if (!cl_is_item_in_qlist(&m->port_order_list, + &port->list_item)) + cl_qlist_insert_tail(&m->port_order_list, + &port->list_item); + else + OSM_LOG(m->p_log, OSM_LOG_INFO, + "WRN AD37: guid 0x%016" PRIx64 + " already in list\n", port_guid); + } + } +} + +static void add_guid_to_order_list(uint64_t guid, osm_ucast_mgr_t * m) +{ + osm_port_t *port = osm_get_port_by_guid(m->p_subn, cl_hton64(guid)); + + if (!port) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "port guid not found: 0x%016" PRIx64 "\n", guid); + } + + if (!cl_is_item_in_qlist(&m->port_order_list, &port->list_item)) + cl_qlist_insert_tail(&m->port_order_list, &port->list_item); + else + OSM_LOG(m->p_log, OSM_LOG_INFO, + "WRN AD38: guid 0x%016" PRIx64 " already in list\n", + guid); +} + +/* compare function of #Hca attached to a switch for stdlib qsort */ +static int cmp_num_hca(const void * l1, const void * l2) +{ + vertex_t *sw1 = *((vertex_t **) l1); + vertex_t *sw2 = *((vertex_t **) l2); + uint32_t num_hca1 = 0, num_hca2 = 0; + + if (sw1) + num_hca1 = sw1->num_hca; + if (sw2) + num_hca2 = sw2->num_hca; + + if (num_hca1 > num_hca2) + return -1; + else if (num_hca1 < num_hca2) + return 1; + else + return 0; +} + +/* use stdlib to sort the switch array depending on num_hca */ +static inline void sw_list_sort_by_num_hca(vertex_t ** sw_list, + uint32_t sw_list_size) +{ + qsort(sw_list, sw_list_size, sizeof(vertex_t *), cmp_num_hca); +} + +/********************************************************************** + **********************************************************************/ + +/************ helper functions to manage a map of CN and I/O guids **** + **********************************************************************/ +static int add_guid_to_map(void * cxt, uint64_t guid, char * p) +{ + cl_qmap_t *map = cxt; + name_map_item_t *item; + name_map_item_t *inserted_item; + + item = malloc(sizeof(*item)); + if (!item) + return -1; + + item->guid = cl_hton64(guid); /* internal: network byte order */ + item->name = NULL; /* name isn't needed */ + inserted_item = (name_map_item_t *) cl_qmap_insert(map, item->guid, &item->item); + if (inserted_item != item) + free(item); + + return 0; +} + +static void destroy_guid_map(cl_qmap_t * guid_tbl) +{ + name_map_item_t *p_guid = NULL, *p_next_guid = NULL; + + p_next_guid = (name_map_item_t *) cl_qmap_head(guid_tbl); + while (p_next_guid != (name_map_item_t *) cl_qmap_end(guid_tbl)) { + p_guid = p_next_guid; + p_next_guid = (name_map_item_t *) cl_qmap_next(&p_guid->item); + free(p_guid); + } + cl_qmap_remove_all(guid_tbl); +} + +/********************************************************************** + **********************************************************************/ + +static void dfsssp_print_graph(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list, + uint32_t size) +{ + uint32_t i = 0, c = 0; + link_t *link = NULL; + + /* index 0 is for the source in dijkstra -> ignore */ + for (i = 1; i < size; i++) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "adj_list[%" PRIu32 "]:\n", + i); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " guid = 0x%" PRIx64 " lid = %" PRIu16 " (%s)\n", + adj_list[i].guid, adj_list[i].lid, + adj_list[i].sw->p_node->print_desc); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " num_hca = %" PRIu32 "\n", adj_list[i].num_hca); + + c = 1; + for (link = adj_list[i].links; link != NULL; + link = link->next, c++) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " link[%" PRIu32 "]:\n", c); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " to guid = 0x%" PRIx64 " (%s) port %" + PRIu8 "\n", link->guid, + adj_list[link->to].sw->p_node->print_desc, + link->to_port); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " weight on this link = %" PRIu64 "\n", + link->weight); + } + } +} + +/* predefine, to use this in next function */ +static void dfsssp_context_destroy(void *context); +static int dijkstra(osm_ucast_mgr_t * p_mgr, cl_heap_t * p_heap, + vertex_t * adj_list, uint32_t adj_list_size, + osm_port_t * port, uint16_t lid); + +/* traverse subnet to gather information about the connected switches */ +static int dfsssp_build_graph(void *context) +{ + dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context; + osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) (dfsssp_ctx->p_mgr); + boolean_t has_fdr10 = (1 == p_mgr->p_subn->opt.fdr10) ? TRUE : FALSE; + cl_qmap_t *port_tbl = &p_mgr->p_subn->port_guid_tbl; /* 1 management port per switch + 1 or 2 ports for each Hca */ + osm_port_t *p_port = NULL; + cl_qmap_t *sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + cl_map_item_t *item = NULL; + osm_switch_t *sw = NULL; + osm_node_t *remote_node = NULL; + uint8_t port = 0, remote_port = 0; + uint32_t i = 0, j = 0, err = 0, undiscov = 0, max_num_undiscov = 0; + uint64_t total_num_hca = 0; + vertex_t *adj_list = NULL; + osm_physp_t *p_physp = NULL; + link_t *link = NULL, *head = NULL; + uint32_t num_sw = 0, adj_list_size = 0; + uint8_t lmc = 0; + uint16_t sm_lid = 0; + cl_heap_t heap; + + OSM_LOG_ENTER(p_mgr->p_log); + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Building graph for df-/sssp routing\n"); + + /* if this pointer isn't NULL, this is a reroute step; + old context will be destroyed (adj_list and srcdest2vl_table) + */ + if (dfsssp_ctx->adj_list) + dfsssp_context_destroy(context); + + /* construct the generic heap opject to use it in dijkstra */ + cl_heap_construct(&heap); + + num_sw = cl_qmap_count(sw_tbl); + adj_list_size = num_sw + 1; + /* allocate an adjazenz list (array), 0. element is reserved for the source (Hca) in the routing algo, others are switches */ + adj_list = (vertex_t *) malloc(adj_list_size * sizeof(vertex_t)); + if (!adj_list) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD02: cannot allocate memory for adj_list\n"); + goto ERROR; + } + for (i = 0; i < adj_list_size; i++) + set_default_vertex(&adj_list[i]); + + dfsssp_ctx->adj_list = adj_list; + dfsssp_ctx->adj_list_size = adj_list_size; + + /* count the total number of Hca / LIDs (for lmc>0) in the fabric; + even include base/enhanced switch port 0; base SP0 will have lmc=0 + */ + for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl); + item = cl_qmap_next(item)) { + p_port = (osm_port_t *) item; + if (osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_CA || + osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_SWITCH) { + lmc = osm_port_get_lmc(p_port); + total_num_hca += (1 << lmc); + } + } + + i = 1; /* fill adj_list -> start with index 1 */ + for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl); + item = cl_qmap_next(item), i++) { + sw = (osm_switch_t *) item; + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Processing switch with GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(sw->p_node))); + + adj_list[i].guid = + cl_ntoh64(osm_node_get_node_guid(sw->p_node)); + adj_list[i].lid = + cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0)); + adj_list[i].sw = sw; + + link = (link_t *) malloc(sizeof(link_t)); + if (!link) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD03: cannot allocate memory for a link\n"); + goto ERROR; + } + head = link; + head->next = NULL; + + /* add SP0 to number of CA connected to a switch */ + lmc = osm_node_get_lmc(sw->p_node, 0); + adj_list[i].num_hca += (1 << lmc); + + /* iterate over all ports in the switch, start with port 1 (port 0 is a management port) */ + for (port = 1; port < sw->num_ports; port++) { + /* get the node behind the port */ + remote_node = + osm_node_get_remote_node(sw->p_node, port, + &remote_port); + /* if there is no remote node on this port or it's the same switch -> try next port */ + if (!remote_node || remote_node->sw == sw) + continue; + /* make sure the link is healthy */ + p_physp = osm_node_get_physp_ptr(sw->p_node, port); + if (!p_physp || !osm_link_is_healthy(p_physp)) + continue; + /* if there is a Hca connected -> count and cycle */ + if (!remote_node->sw) { + lmc = osm_node_get_lmc(remote_node, (uint32_t)remote_port); + adj_list[i].num_hca += (1 << lmc); + continue; + } + /* filter out throttled links to improve performance */ + if (p_mgr->p_subn->opt.avoid_throttled_links && + osm_link_is_throttled(p_physp, has_fdr10)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "Detected and ignoring throttled link:" + " 0x%" PRIx64 "/P%" PRIu8 + " <--> 0x%" PRIx64 "/P%" PRIu8 "\n", + cl_ntoh64(osm_node_get_node_guid(sw->p_node)), + port, + cl_ntoh64(osm_node_get_node_guid(remote_node)), + remote_port); + continue; + } + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Node 0x%" PRIx64 ", remote node 0x%" PRIx64 + ", port %" PRIu8 ", remote port %" PRIu8 "\n", + cl_ntoh64(osm_node_get_node_guid(sw->p_node)), + cl_ntoh64(osm_node_get_node_guid(remote_node)), + port, remote_port); + + link->next = (link_t *) malloc(sizeof(link_t)); + if (!link->next) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD08: cannot allocate memory for a link\n"); + while (head) { + link = head; + head = head->next; + free(link); + } + goto ERROR; + } + link = link->next; + set_default_link(link); + link->guid = + cl_ntoh64(osm_node_get_node_guid(remote_node)); + link->from = i; + link->from_port = port; + link->to_port = remote_port; + link->weight = total_num_hca * total_num_hca; /* initialize with P^2 to force shortest paths */ + } + + adj_list[i].links = head->next; + free(head); + } + /* connect the links with it's second adjacent node in the list */ + for (i = 1; i < adj_list_size; i++) { + link = adj_list[i].links; + while (link) { + for (j = 1; j < adj_list_size; j++) { + if (link->guid == adj_list[j].guid) { + link->to = j; + break; + } + } + link = link->next; + } + } + + /* do one dry run to determine connectivity issues */ + sm_lid = p_mgr->p_subn->master_sm_base_lid; + p_port = osm_get_port_by_lid(p_mgr->p_subn, sm_lid); + err = dijkstra(p_mgr, &heap, adj_list, adj_list_size, p_port, sm_lid); + if (err) { + goto ERROR; + } else { + /* if sm is running on a switch, then dijkstra doesn't + initialize the used_link for this switch + */ + if (osm_node_get_type(p_port->p_node) != IB_NODE_TYPE_CA) + max_num_undiscov = 1; + for (i = 1; i < adj_list_size; i++) + undiscov += (adj_list[i].used_link) ? 0 : 1; + if (max_num_undiscov < undiscov) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD0C: unsupported network state (detached" + " and inaccessible switches found; gracefully" + " shutdown this routing engine)\n"); + goto ERROR; + } + } + /* delete the heap which is not needed anymore */ + cl_heap_destroy(&heap); + + /* print the discovered graph */ + if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) + dfsssp_print_graph(p_mgr, adj_list, adj_list_size); + + OSM_LOG_EXIT(p_mgr->p_log); + return 0; + +ERROR: + if (cl_is_heap_inited(&heap)) + cl_heap_destroy(&heap); + dfsssp_context_destroy(context); + return -1; +} + +static void print_routes(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list, + uint32_t adj_list_size, osm_port_t * port) +{ + uint32_t i = 0, j = 0; + + for (i = 1; i < adj_list_size; i++) { + if (adj_list[i].state == DISCOVERED) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Route from 0x%" PRIx64 " (%s) to 0x%" PRIx64 + " (%s):\n", adj_list[i].guid, + adj_list[i].sw->p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(port->p_node)), + port->p_node->print_desc); + j = i; + while (adj_list[j].used_link) { + if (j > 0) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " 0x%" PRIx64 + " (%s) routes thru port %" PRIu8 + "\n", adj_list[j].guid, + adj_list[j].sw->p_node-> + print_desc, + adj_list[j].used_link->to_port); + } else { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " 0x%" PRIx64 + " (%s) routes thru port %" PRIu8 + "\n", adj_list[j].guid, + port->p_node->print_desc, + adj_list[j].used_link->to_port); + } + j = adj_list[j].used_link->from; + } + } + } +} + +/* callback function for the cl_heap to update the index */ +static void apply_index_update(const void * context, const size_t new_index) +{ + vertex_t *heap_elem = (vertex_t *) context; + if (heap_elem) + heap_elem->heap_index = new_index; +} + +/* dijkstra step from one source to all switches in the df-/sssp graph */ +static int dijkstra(osm_ucast_mgr_t * p_mgr, cl_heap_t * p_heap, + vertex_t * adj_list, uint32_t adj_list_size, + osm_port_t * port, uint16_t lid) +{ + uint32_t i = 0, j = 0, index = 0; + osm_node_t *remote_node = NULL; + uint8_t remote_port = 0; + vertex_t *current = NULL; + link_t *link = NULL; + uint64_t guid = 0; + cl_status_t ret = CL_SUCCESS; + + OSM_LOG_ENTER(p_mgr->p_log); + + /* build an 4-ary heap to find the node with minimum distance */ + if (!cl_is_heap_inited(p_heap)) + ret = cl_heap_init(p_heap, adj_list_size, 4, + &apply_index_update, NULL); + else + ret = cl_heap_resize(p_heap, adj_list_size); + if (ret != CL_SUCCESS) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD09: cannot allocate memory or resize heap\n"); + return ret; + } + + /* reset all switches for new round with a new source for dijkstra */ + for (i = 1; i < adj_list_size; i++) { + adj_list[i].hops = 0; + adj_list[i].used_link = NULL; + adj_list[i].distance = INF; + adj_list[i].state = UNDISCOVERED; + ret = cl_heap_insert(p_heap, INF, &adj_list[i]); + if (ret != CL_SUCCESS) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD11: cl_heap_insert failed\n"); + return ret; + } + } + + /* if behind port is a Hca -> set adj_list[0] */ + if (osm_node_get_type(port->p_node) == IB_NODE_TYPE_CA) { + /* save old link to prevent many mallocs after set_default_... */ + link = adj_list[0].links; + /* initialize adj_list[0] (the source for the routing, a Hca) */ + set_default_vertex(&adj_list[0]); + adj_list[0].guid = + cl_ntoh64(osm_node_get_node_guid(port->p_node)); + adj_list[0].lid = lid; + index = 0; + /* write saved link back to new adj_list[0] */ + adj_list[0].links = link; + + /* initialize link to neighbor for adj_list[0]; + make sure the link is healthy + */ + if (port->p_physp && osm_link_is_healthy(port->p_physp)) { + remote_node = + osm_node_get_remote_node(port->p_node, + port->p_physp->port_num, + &remote_port); + /* if there is no remote node on this port or it's the same Hca -> ignore */ + if (remote_node + && (osm_node_get_type(remote_node) == + IB_NODE_TYPE_SWITCH)) { + if (!(adj_list[0].links)) { + adj_list[0].links = + (link_t *) malloc(sizeof(link_t)); + if (!(adj_list[0].links)) { + OSM_LOG(p_mgr->p_log, + OSM_LOG_ERROR, + "ERR AD07: cannot allocate memory for a link\n"); + return 1; + } + } + set_default_link(adj_list[0].links); + adj_list[0].links->guid = + cl_ntoh64(osm_node_get_node_guid + (remote_node)); + adj_list[0].links->from_port = + port->p_physp->port_num; + adj_list[0].links->to_port = remote_port; + adj_list[0].links->weight = 1; + for (j = 1; j < adj_list_size; j++) { + if (adj_list[0].links->guid == + adj_list[j].guid) { + adj_list[0].links->to = j; + break; + } + } + } + } else { + /* if link is unhealthy then there's a severe issue */ + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD0B: unsupported network state (CA with" + " unhealthy link state discovered; should have" + " been filtered out before already; gracefully" + " shutdown this routing engine)\n"); + return 1; + } + ret = cl_heap_insert(p_heap, INF, &adj_list[0]); + if (ret != CL_SUCCESS) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD13: cl_heap_insert failed\n"); + return ret; + } + /* if behind port is a switch -> search switch in adj_list */ + } else { + /* reset adj_list[0], if links=NULL reset was done before, then skip */ + if (adj_list[0].links) { + free(adj_list[0].links); + set_default_vertex(&adj_list[0]); + } + /* search for the switch which is the source in this round */ + guid = cl_ntoh64(osm_node_get_node_guid(port->p_node)); + for (i = 1; i < adj_list_size; i++) { + if (guid == adj_list[i].guid) { + index = i; + break; + } + } + } + + /* source in dijkstra */ + adj_list[index].distance = 0; + adj_list[index].state = DISCOVERED; + adj_list[index].hops = 0; /* the source has hop count = 0 */ + ret = cl_heap_modify_key(p_heap, adj_list[index].distance, + adj_list[index].heap_index); + if (ret != CL_SUCCESS) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD10: index out of bounds in cl_heap_modify_key\n"); + return ret; + } + + current = (vertex_t *) cl_heap_extract_root(p_heap); + while (current) { + current->state = DISCOVERED; + if (current->used_link) /* increment the number of hops to the source for each new node */ + current->hops = + adj_list[current->used_link->from].hops + 1; + + /* add/update nodes which aren't discovered but accessible */ + for (link = current->links; link != NULL; link = link->next) { + if ((adj_list[link->to].state != DISCOVERED) + && (current->distance + link->weight < + adj_list[link->to].distance)) { + adj_list[link->to].used_link = link; + adj_list[link->to].distance = + current->distance + link->weight; + ret = cl_heap_modify_key(p_heap, + adj_list[link->to].distance, + adj_list[link->to].heap_index); + if (ret != CL_SUCCESS) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD12: index out of bounds in cl_heap_modify_key\n"); + return ret; + } + } + } + + current = (vertex_t *) cl_heap_extract_root(p_heap); + } + + OSM_LOG_EXIT(p_mgr->p_log); + return 0; +} + +/* update the linear forwarding tables of all switches with the informations + from the last dijsktra step +*/ +static int update_lft(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list, + uint32_t adj_list_size, osm_port_t * p_port, uint16_t lid) +{ + uint32_t i = 0; + uint8_t port = 0; + uint8_t hops = 0; + osm_switch_t *p_sw = NULL; + boolean_t is_ignored_by_port_prof = FALSE; + osm_physp_t *p = NULL; + cl_status_t ret; + + OSM_LOG_ENTER(p_mgr->p_log); + + for (i = 1; i < adj_list_size; i++) { + /* if no route goes thru this switch -> cycle */ + if (!(adj_list[i].used_link)) + continue; + + p_sw = adj_list[i].sw; + hops = adj_list[i].hops; + port = adj_list[i].used_link->to_port; + /* the used_link is the link that was used in dijkstra to reach this node, + so the to_port is the local port on this node + */ + + if (port == OSM_NO_PATH) { /* if clause shouldn't be possible in this routing, but who cares */ + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD06: No path to get to LID %" PRIu16 + " from switch 0x%" PRIx64 "\n", lid, + cl_ntoh64(osm_node_get_node_guid + (p_sw->p_node))); + + /* do not try to overwrite the ppro of non existing port ... */ + is_ignored_by_port_prof = TRUE; + return 1; + } else { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Routing LID %" PRIu16 " to port %" PRIu8 + " for switch 0x%" PRIx64 "\n", lid, port, + cl_ntoh64(osm_node_get_node_guid + (p_sw->p_node))); + + p = osm_node_get_physp_ptr(p_sw->p_node, port); + if (!p) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD0A: Physical port %d of Node GUID 0x%" + PRIx64 "not found\n", port, + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); + return 1; + } + + /* we would like to optionally ignore this port in equalization + as in the case of the Mellanox Anafa Internal PCI TCA port + */ + is_ignored_by_port_prof = p->is_prof_ignored; + + /* We also would ignore this route if the target lid is of + a switch and the port_profile_switch_node is not TRUE + */ + if (!p_mgr->p_subn->opt.port_profile_switch_nodes) + is_ignored_by_port_prof |= + (osm_node_get_type(p_port->p_node) == + IB_NODE_TYPE_SWITCH); + } + + /* to support lmc > 0 the functions alloc_ports_priv, free_ports_priv, find_and_add_remote_sys + from minhop aren't needed cause osm_switch_recommend_path is implicitly calculated + for each LID pair thru dijkstra; + for each port the dijkstra algorithm calculates (max_lid_ho - min_lid_ho)-times maybe + disjoint routes to spread the bandwidth -> diffent routes for one port and lmc>0 + */ + + /* set port in LFT */ + p_sw->new_lft[lid] = port; + if (!is_ignored_by_port_prof) { + /* update the number of path routing thru this port */ + osm_switch_count_path(p_sw, port); + } + /* set the hop count from this switch to the lid */ + ret = osm_switch_set_hops(p_sw, lid, port, hops); + if (ret != CL_SUCCESS) + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD05: cannot set hops for LID %" PRIu16 + " at switch 0x%" PRIx64 "\n", lid, + cl_ntoh64(osm_node_get_node_guid + (p_sw->p_node))); + } + + OSM_LOG_EXIT(p_mgr->p_log); + return 0; +} + +/* the function updates the multicast group membership information + similar to create_mgrp_switch_map (osm_mcast_mgr.c) + => with it we can identify if a switch needs to be processed + or not in update_mcft +*/ +static void update_mgrp_membership(cl_qlist_t * port_list) +{ + osm_mcast_work_obj_t *wobj = NULL; + osm_port_t *port = NULL; + osm_switch_t *sw = NULL; + cl_list_item_t *i = NULL; + + for (i = cl_qlist_head(port_list); i != cl_qlist_end(port_list); + i = cl_qlist_next(i)) { + wobj = cl_item_obj(i, wobj, list_item); + port = wobj->p_port; + if (port->p_node->sw) { + sw = port->p_node->sw; + sw->is_mc_member = 1; + } else { + sw = port->p_physp->p_remote_physp->p_node->sw; + sw->num_of_mcm++; + } + } +} + +/* reset is_mc_member and num_of_mcm for future computations */ +static void reset_mgrp_membership(vertex_t * adj_list, uint32_t adj_list_size) +{ + uint32_t i = 0; + + for (i = 1; i < adj_list_size; i++) { + if (adj_list[i].dropped) + continue; + + adj_list[i].sw->is_mc_member = 0; + adj_list[i].sw->num_of_mcm = 0; + } +} + +/* update the multicast forwarding tables of all switches with the informations + from the previous dijsktra step for the current mlid +*/ +static int update_mcft(osm_sm_t * p_sm, vertex_t * adj_list, + uint32_t adj_list_size, uint16_t mlid_ho, + cl_qmap_t * port_map, osm_switch_t * root_sw) +{ + uint32_t i = 0; + uint8_t port = 0, remote_port = 0; + uint8_t upstream_port = 0, downstream_port = 0; + ib_net64_t guid = 0; + osm_switch_t *p_sw = NULL; + osm_node_t *remote_node = NULL; + osm_physp_t *p_physp = NULL; + osm_mcast_tbl_t *p_tbl = NULL; + vertex_t *curr_adj = NULL; + + OSM_LOG_ENTER(p_sm->p_log); + + for (i = 1; i < adj_list_size; i++) { + if (adj_list[i].dropped) + continue; + + p_sw = adj_list[i].sw; + OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE, + "Processing switch 0x%016" PRIx64 + " (%s) for MLID 0x%X\n", cl_ntoh64(adj_list[i].guid), + p_sw->p_node->print_desc, mlid_ho); + + /* if a) the switch does not support mcast or + b) no ports of this switch are part or the mcast group + then cycle + */ + if (osm_switch_supports_mcast(p_sw) == FALSE || + (p_sw->num_of_mcm == 0 && !(p_sw->is_mc_member))) + continue; + + p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); + + /* add all ports of this sw to the mcast table, + if they are part of the mcast grp + */ + if (p_sw->is_mc_member) + osm_mcast_tbl_set(p_tbl, mlid_ho, 0); + for (port = 1; port < p_sw->num_ports; port++) { + /* get the node behind the port */ + remote_node = + osm_node_get_remote_node(p_sw->p_node, port, + &remote_port); + /* check if connected and its not the same switch */ + if (!remote_node || remote_node->sw == p_sw) + continue; + /* make sure the link is healthy */ + p_physp = osm_node_get_physp_ptr(p_sw->p_node, port); + if (!p_physp || !osm_link_is_healthy(p_physp)) + continue; + /* we don't add upstream ports in this step */ + if (osm_node_get_type(remote_node) != IB_NODE_TYPE_CA) + continue; + + guid = osm_physp_get_port_guid(osm_node_get_physp_ptr( + remote_node, + remote_port)); + if (cl_qmap_get(port_map, guid) + != cl_qmap_end(port_map)) + osm_mcast_tbl_set(p_tbl, mlid_ho, port); + } + + /* now we have to add the upstream port of 'this' switch and + the downstream port of the next switch to the mcast table + until we reach the root_sw + */ + curr_adj = &adj_list[i]; + while (curr_adj->sw != root_sw) { + /* the used_link is the link that was used in dijkstra to reach this node, + so the to_port is the local (upstream) port on curr_adj->sw + */ + upstream_port = curr_adj->used_link->to_port; + osm_mcast_tbl_set(p_tbl, mlid_ho, upstream_port); + + /* now we go one step in direction root_sw and add the + downstream port for the spanning tree + */ + downstream_port = curr_adj->used_link->from_port; + p_tbl = osm_switch_get_mcast_tbl_ptr( + adj_list[curr_adj->used_link->from].sw); + osm_mcast_tbl_set(p_tbl, mlid_ho, downstream_port); + + curr_adj = &adj_list[curr_adj->used_link->from]; + } + } + + OSM_LOG_EXIT(p_sm->p_log); + return 0; +} + +/* increment the edge weights of the df-/sssp graph which represent the number + of paths on this link +*/ +static void update_weights(osm_ucast_mgr_t * p_mgr, vertex_t * adj_list, + uint32_t adj_list_size) +{ + uint32_t i = 0, j = 0; + uint32_t additional_weight = 0; + + OSM_LOG_ENTER(p_mgr->p_log); + + for (i = 1; i < adj_list_size; i++) { + /* if no route goes thru this switch -> cycle */ + if (!(adj_list[i].used_link)) + continue; + additional_weight = adj_list[i].num_hca; + + j = i; + while (adj_list[j].used_link) { + /* update the link from pre to this node */ + adj_list[j].used_link->weight += additional_weight; + + j = adj_list[j].used_link->from; + } + } + + OSM_LOG_EXIT(p_mgr->p_log); +} + +/* get the largest number of virtual lanes which is supported by all switches + in the subnet +*/ +static uint8_t get_avail_vl_in_subn(osm_ucast_mgr_t * p_mgr) +{ + uint32_t i = 0; + uint8_t vls_avail = 0xFF, port_vls_avail = 0; + cl_qmap_t *sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + cl_map_item_t *item = NULL; + osm_switch_t *sw = NULL; + + /* traverse all switches to get the number of available virtual lanes in the subnet */ + for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl); + item = cl_qmap_next(item)) { + sw = (osm_switch_t *) item; + + /* ignore management port 0 */ + for (i = 1; i < osm_node_get_num_physp(sw->p_node); i++) { + osm_physp_t *p_physp = + osm_node_get_physp_ptr(sw->p_node, i); + + if (p_physp && p_physp->p_remote_physp) { + port_vls_avail = + ib_port_info_get_op_vls(&p_physp-> + port_info); + if (port_vls_avail + && port_vls_avail < vls_avail) + vls_avail = port_vls_avail; + } + } + } + + /* ib_port_info_get_op_vls gives values 1 ... 5 (s. IBAS 14.2.5.6) */ + vls_avail = 1 << (vls_avail - 1); + + /* set boundaries (s. IBAS 3.5.7) */ + if (vls_avail > 15) + vls_avail = 15; + if (vls_avail < 1) + vls_avail = 1; + + return vls_avail; +} + +/* search for cycles in the channel dependency graph to identify possible + deadlocks in the network; + assign new virtual lanes to some paths to break the deadlocks +*/ +static int dfsssp_remove_deadlocks(dfsssp_context_t * dfsssp_ctx) +{ + osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr; + + cl_qlist_t *port_tbl = &p_mgr->port_order_list; /* 1 management port per switch + 1 or 2 ports for each Hca */ + cl_list_item_t *item1 = NULL, *item2 = NULL; + osm_port_t *src_port = NULL, *dest_port = NULL; + + uint32_t i = 0, j = 0, err = 0; + uint8_t vl = 0, test_vl = 0, vl_avail = 0, vl_needed = 1; + double most_avg_paths = 0.0; + cdg_node_t **cdg = NULL, *start_here = NULL, *cycle = NULL; + cdg_link_t *weakest_link = NULL; + uint32_t srcdest = 0; + + vltable_t *srcdest2vl_table = NULL; + uint8_t lmc = 0; + uint16_t slid = 0, dlid = 0, min_lid_ho = 0, max_lid_ho = + 0, min_lid_ho2 = 0, max_lid_ho2 = 0;; + uint64_t *paths_per_vl = NULL; + uint64_t from = 0, to = 0, count = 0; + uint8_t *split_count = NULL; + uint8_t ntype = 0; + + OSM_LOG_ENTER(p_mgr->p_log); + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Assign each src/dest pair a Virtual Lanes, to remove deadlocks in the routing\n"); + + vl_avail = get_avail_vl_in_subn(p_mgr); + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "Virtual Lanes available: %" PRIu8 "\n", vl_avail); + + paths_per_vl = (uint64_t *) malloc(vl_avail * sizeof(uint64_t)); + if (!paths_per_vl) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD22: cannot allocate memory for paths_per_vl\n"); + return 1; + } + memset(paths_per_vl, 0, vl_avail * sizeof(uint64_t)); + + cdg = (cdg_node_t **) malloc(vl_avail * sizeof(cdg_node_t *)); + if (!cdg) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD23: cannot allocate memory for cdg\n"); + free(paths_per_vl); + return 1; + } + for (i = 0; i < vl_avail; i++) + cdg[i] = NULL; + + count = 0; + /* count all ports (also multiple LIDs) of type CA or SP0 for size of VL table */ + for (item1 = cl_qlist_head(port_tbl); item1 != cl_qlist_end(port_tbl); + item1 = cl_qlist_next(item1)) { + dest_port = (osm_port_t *)cl_item_obj(item1, dest_port, + list_item); + ntype = osm_node_get_type(dest_port->p_node); + if (ntype == IB_NODE_TYPE_CA || ntype == IB_NODE_TYPE_SWITCH) { + /* only SP0 with SLtoVLMapping support will be processed */ + if (ntype == IB_NODE_TYPE_SWITCH + && !(dest_port->p_physp->port_info.capability_mask + & IB_PORT_CAP_HAS_SL_MAP)) + continue; + + lmc = osm_port_get_lmc(dest_port); + count += (1 << lmc); + } + } + /* allocate VL table and indexing array */ + err = vltable_alloc(&srcdest2vl_table, count); + if (err) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD26: cannot allocate memory for srcdest2vl_table\n"); + goto ERROR; + } + + i = 0; + /* fill lids into indexing array */ + for (item1 = cl_qlist_head(port_tbl); item1 != cl_qlist_end(port_tbl); + item1 = cl_qlist_next(item1)) { + dest_port = (osm_port_t *)cl_item_obj(item1, dest_port, + list_item); + ntype = osm_node_get_type(dest_port->p_node); + if (ntype == IB_NODE_TYPE_CA || ntype == IB_NODE_TYPE_SWITCH) { + /* only SP0 with SLtoVLMapping support will be processed */ + if (ntype == IB_NODE_TYPE_SWITCH + && !(dest_port->p_physp->port_info.capability_mask + & IB_PORT_CAP_HAS_SL_MAP)) + continue; + + osm_port_get_lid_range_ho(dest_port, &min_lid_ho, + &max_lid_ho); + for (dlid = min_lid_ho; dlid <= max_lid_ho; dlid++, i++) + srcdest2vl_table->lids[i] = cl_hton16(dlid); + } + } + /* sort lids */ + vltable_sort_lids(srcdest2vl_table); + + test_vl = 0; + /* fill cdg[0] with routes from each src/dest port combination for all Hca/SP0 in the subnet */ + for (item1 = cl_qlist_head(port_tbl); item1 != cl_qlist_end(port_tbl); + item1 = cl_qlist_next(item1)) { + dest_port = (osm_port_t *)cl_item_obj(item1, dest_port, + list_item); + ntype = osm_node_get_type(dest_port->p_node); + if ((ntype != IB_NODE_TYPE_CA && ntype != IB_NODE_TYPE_SWITCH) + || !(dest_port->p_physp->port_info.capability_mask + & IB_PORT_CAP_HAS_SL_MAP)) + continue; + + for (item2 = cl_qlist_head(port_tbl); + item2 != cl_qlist_end(port_tbl); + item2 = cl_qlist_next(item2)) { + src_port = (osm_port_t *)cl_item_obj(item2, src_port, + list_item); + ntype = osm_node_get_type(src_port->p_node); + if ((ntype != IB_NODE_TYPE_CA + && ntype != IB_NODE_TYPE_SWITCH) + || !(src_port->p_physp->port_info.capability_mask + & IB_PORT_CAP_HAS_SL_MAP)) + continue; + + if (src_port != dest_port) { + /* iterate over LIDs of src and dest port */ + osm_port_get_lid_range_ho(src_port, &min_lid_ho, + &max_lid_ho); + for (slid = min_lid_ho; slid <= max_lid_ho; + slid++) { + osm_port_get_lid_range_ho + (dest_port, &min_lid_ho2, + &max_lid_ho2); + for (dlid = min_lid_ho2; + dlid <= max_lid_ho2; + dlid++) { + + /* try to add the path to cdg[0] */ + err = + update_channel_dep_graph + (&(cdg[test_vl]), + src_port, slid, + dest_port, dlid); + if (err) { + OSM_LOG(p_mgr-> + p_log, + OSM_LOG_ERROR, + "ERR AD14: cannot allocate memory for cdg node or link in update_channel_dep_graph(...)\n"); + goto ERROR; + } + /* add the combination / corresponding virtual lane to the VL table */ + vltable_insert + (srcdest2vl_table, + cl_hton16(slid), + cl_hton16(dlid), + test_vl); + paths_per_vl[test_vl]++; + + } + + } + } + + } + } + dfsssp_ctx->srcdest2vl_table = srcdest2vl_table; + + /* test all cdg for cycles and break the cycles by moving paths on the weakest link to the next cdg */ + for (test_vl = 0; test_vl < vl_avail - 1; test_vl++) { + start_here = cdg[test_vl]; + while (start_here) { + cycle = + search_cycle_in_channel_dep_graph(cdg[test_vl], + start_here); + + if (cycle) { + vl_needed = test_vl + 2; + + /* calc weakest link n cycle */ + weakest_link = get_weakest_link_in_cycle(cycle); + if (!weakest_link) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD27: something went wrong in get_weakest_link_in_cycle(...)\n"); + err = 1; + goto ERROR; + } + + paths_per_vl[test_vl] -= + weakest_link->num_pairs; + paths_per_vl[test_vl + 1] += + weakest_link->num_pairs; + + /* move all paths on this link to the next cdg */ + for (i = 0; i < weakest_link->num_pairs; i++) { + srcdest = + get_next_srcdest_pair(weakest_link, + i); + slid = (uint16_t) (srcdest >> 16); + dlid = + (uint16_t) ((srcdest << 16) >> 16); + + /* only move if not moved in a previous step */ + if (test_vl != + (uint8_t) + vltable_get_vl(srcdest2vl_table, + cl_hton16(slid), + cl_hton16(dlid))) { + /* this path has been moved + before -> don't count + */ + paths_per_vl[test_vl]++; + paths_per_vl[test_vl + 1]--; + continue; + } + + src_port = + osm_get_port_by_lid(p_mgr->p_subn, + cl_hton16 + (slid)); + dest_port = + osm_get_port_by_lid(p_mgr->p_subn, + cl_hton16 + (dlid)); + + /* remove path from current cdg / vl */ + err = + remove_path_from_cdg(& + (cdg[test_vl]), + src_port, slid, + dest_port, + dlid); + if (err) { + OSM_LOG(p_mgr->p_log, + OSM_LOG_ERROR, + "ERR AD44: something went wrong in remove_path_from_cdg(...)\n"); + goto ERROR; + } + + /* add path to next cdg / vl */ + err = + update_channel_dep_graph(& + (cdg + [test_vl + + 1]), + src_port, + slid, + dest_port, + dlid); + if (err) { + OSM_LOG(p_mgr->p_log, + OSM_LOG_ERROR, + "ERR AD14: cannot allocate memory for cdg node or link in update_channel_dep_graph(...)\n"); + goto ERROR; + } + vltable_insert(srcdest2vl_table, + cl_hton16(slid), + cl_hton16(dlid), + test_vl + 1); + } + + if (weakest_link->num_pairs) + free(weakest_link->srcdest_pairs); + if (weakest_link) + free(weakest_link); + } + + start_here = cycle; + } + } + + /* test the last avail cdg for a cycle; + if there is one, than vl_needed > vl_avail + */ + start_here = cdg[vl_avail - 1]; + if (start_here) { + cycle = + search_cycle_in_channel_dep_graph(cdg[vl_avail - 1], + start_here); + if (cycle) { + vl_needed = vl_avail + 1; + } + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "Virtual Lanes needed: %" PRIu8 "\n", vl_needed); + if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_INFO)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "Paths per VL (before balancing):\n"); + for (i = 0; i < vl_avail; i++) + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + " %" PRIu32 ". lane: %" PRIu64 "\n", i, + paths_per_vl[i]); + } + + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Balancing the paths on the available Virtual Lanes\n"); + + /* optimal balancing virtual lanes, under condition: no additional cycle checks; + sl/vl != 0 might be assigned to loopback packets (i.e. slid/dlid on the + same port for lmc>0), but thats no problem, see IBAS 10.2.2.3 + */ + split_count = (uint8_t *) calloc(vl_avail, sizeof(uint8_t)); + if (!split_count) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD24: cannot allocate memory for split_count, skip balancing\n"); + err = 1; + goto ERROR; + } + /* initial state: paths for VLs won't be separated */ + for (i = 0; i < ((vl_needed < vl_avail) ? vl_needed : vl_avail); i++) + split_count[i] = 1; + dfsssp_ctx->vl_split_count = split_count; + /* balancing is necessary if we have empty VLs */ + if (vl_needed < vl_avail) { + /* split paths of VLs until we find an equal distribution */ + for (i = vl_needed; i < vl_avail; i++) { + /* find VL with most paths in it */ + vl = 0; + most_avg_paths = 0.0; + for (test_vl = 0; test_vl < vl_needed; test_vl++) { + if (most_avg_paths < + ((double)paths_per_vl[test_vl] / + split_count[test_vl])) { + vl = test_vl; + most_avg_paths = + (double)paths_per_vl[test_vl] / + split_count[test_vl]; + } + } + split_count[vl]++; + } + /* change the VL assignment depending on split_count for + all VLs except VL 0 + */ + for (from = vl_needed - 1; from > 0; from--) { + /* how much space needed for others? */ + to = 0; + for (i = 0; i < from; i++) + to += split_count[i]; + count = paths_per_vl[from]; + vltable_change_vl(srcdest2vl_table, from, to, count); + /* change also the information within the split_count + array; this is important for fast calculation later + */ + split_count[to] = split_count[from]; + split_count[from] = 0; + paths_per_vl[to] = paths_per_vl[from]; + paths_per_vl[from] = 0; + } + } else if (vl_needed > vl_avail) { + /* routing not possible, a further development would be the LASH-TOR approach (update: LASH-TOR isn't possible, there is a mistake in the theory) */ + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD25: Not enough VLs available (avail=%d, needed=%d); Stopping dfsssp routing!\n", + vl_avail, vl_needed); + err = 1; + goto ERROR; + } + /* else { no balancing } */ + + if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Virtual Lanes per src/dest combination after balancing:\n"); + vltable_print(p_mgr, srcdest2vl_table); + } + if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_INFO)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "Approx. #paths per VL (after balancing):\n"); + j = 0; + count = 1; /* to prevent div. by 0 */ + for (i = 0; i < vl_avail; i++) { + if (split_count[i] > 0) { + j = i; + count = split_count[i]; + } + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + " %" PRIu32 ". lane: %" PRIu64 "\n", i, + paths_per_vl[j] / count); + } + } + + free(paths_per_vl); + + /* deallocate channel dependency graphs */ + for (i = 0; i < vl_avail; i++) + cdg_dealloc(&cdg[i]); + free(cdg); + + OSM_LOG_EXIT(p_mgr->p_log); + return 0; + +ERROR: + free(paths_per_vl); + + for (i = 0; i < vl_avail; i++) + cdg_dealloc(&cdg[i]); + free(cdg); + + vltable_dealloc(&srcdest2vl_table); + dfsssp_ctx->srcdest2vl_table = NULL; + + return err; +} + +/* meta function which calls subfunctions for dijkstra, update lft and weights, + (and remove deadlocks) to calculate the routing for the subnet +*/ +static int dfsssp_do_dijkstra_routing(void *context) +{ + dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context; + osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr; + vertex_t *adj_list = (vertex_t *) dfsssp_ctx->adj_list; + uint32_t adj_list_size = dfsssp_ctx->adj_list_size; + cl_heap_t heap; + + vertex_t **sw_list = NULL; + uint32_t sw_list_size = 0; + uint64_t guid = 0; + cl_qlist_t *qlist = NULL; + cl_list_item_t *qlist_item = NULL; + + cl_qmap_t *sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + cl_qmap_t cn_tbl, io_tbl, *p_mixed_tbl = NULL; + cl_map_item_t *item = NULL; + osm_switch_t *sw = NULL; + osm_port_t *port = NULL; + uint32_t i = 0, err = 0; + uint16_t lid = 0, min_lid_ho = 0, max_lid_ho = 0; + uint8_t lmc = 0; + boolean_t cn_nodes_provided = FALSE, io_nodes_provided = FALSE; + + OSM_LOG_ENTER(p_mgr->p_log); + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Calculating shortest path from all Hca/switches to all\n"); + + cl_qmap_init(&cn_tbl); + cl_qmap_init(&io_tbl); + p_mixed_tbl = &cn_tbl; + + cl_qlist_init(&p_mgr->port_order_list); + + /* reset the new_lft for each switch */ + for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl); + item = cl_qmap_next(item)) { + sw = (osm_switch_t *) item; + /* initialize LIDs in buffer to invalid port number */ + memset(sw->new_lft, OSM_NO_PATH, sw->max_lid_ho + 1); + /* initialize LFT and hop count for bsp0/esp0 of the switch */ + min_lid_ho = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0)); + lmc = osm_node_get_lmc(sw->p_node, 0); + for (i = min_lid_ho; i < min_lid_ho + (1 << lmc); i++) { + /* for each switch the port to the 'self'lid is the management port 0 */ + sw->new_lft[i] = 0; + /* the hop count to the 'self'lid is 0 for each switch */ + osm_switch_set_hops(sw, i, 0, 0); + } + } + + /* construct the generic heap opject to use it in dijkstra */ + cl_heap_construct(&heap); + + /* we need an intermediate array of pointers to switches in adj_list; + this array will be sorted in respect to num_hca (descending) + */ + sw_list_size = adj_list_size - 1; + sw_list = (vertex_t **)malloc(sw_list_size * sizeof(vertex_t *)); + if (!sw_list) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD29: cannot allocate memory for sw_list in dfsssp_do_dijkstra_routing\n"); + goto ERROR; + } + memset(sw_list, 0, sw_list_size * sizeof(vertex_t *)); + + /* fill the array with references to the 'real' sw in adj_list */ + for (i = 0; i < sw_list_size; i++) + sw_list[i] = &(adj_list[i + 1]); + + /* sort the sw_list in descending order */ + sw_list_sort_by_num_hca(sw_list, sw_list_size); + + /* parse compute node guid file, if provided by the user */ + if (p_mgr->p_subn->opt.cn_guid_file) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Parsing compute nodes from file %s\n", + p_mgr->p_subn->opt.cn_guid_file); + + if (parse_node_map(p_mgr->p_subn->opt.cn_guid_file, + add_guid_to_map, &cn_tbl)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD33: Problem parsing compute node guid file\n"); + goto ERROR; + } + + if (cl_is_qmap_empty(&cn_tbl)) + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "WRN AD34: compute node guids file contains no valid guids\n"); + else + cn_nodes_provided = TRUE; + } + + /* parse I/O guid file, if provided by the user */ + if (p_mgr->p_subn->opt.io_guid_file) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Parsing I/O nodes from file %s\n", + p_mgr->p_subn->opt.io_guid_file); + + if (parse_node_map(p_mgr->p_subn->opt.io_guid_file, + add_guid_to_map, &io_tbl)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD35: Problem parsing I/O guid file\n"); + goto ERROR; + } + + if (cl_is_qmap_empty(&io_tbl)) + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "WRN AD36: I/O node guids file contains no valid guids\n"); + else + io_nodes_provided = TRUE; + } + + /* if we mix Hca/Tca/SP0 during the dijkstra routing, we might end up + in rare cases with a bad balancing for Hca<->Hca connections, i.e. + some inter-switch links get oversubscribed with paths; + therefore: add Hca ports first to ensure good Hca<->Hca balancing + */ + if (cn_nodes_provided) { + for (i = 0; i < adj_list_size - 1; i++) { + if (sw_list[i] && sw_list[i]->sw) { + sw = (osm_switch_t *)(sw_list[i]->sw); + add_sw_endports_to_order_list(sw, p_mgr, + &cn_tbl, TRUE); + } else { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD30: corrupted sw_list array in dfsssp_do_dijkstra_routing\n"); + goto ERROR; + } + } + } + /* then: add Tca ports to ensure good Hca->Tca balancing and separate + paths towards I/O nodes on the same switch (if possible) + */ + if (io_nodes_provided) { + for (i = 0; i < adj_list_size - 1; i++) { + if (sw_list[i] && sw_list[i]->sw) { + sw = (osm_switch_t *)(sw_list[i]->sw); + add_sw_endports_to_order_list(sw, p_mgr, + &io_tbl, TRUE); + } else { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD32: corrupted sw_list array in dfsssp_do_dijkstra_routing\n"); + goto ERROR; + } + } + } + /* then: add anything else, such as administration nodes, ... */ + if (cn_nodes_provided && io_nodes_provided) { + cl_qmap_merge(&cn_tbl, &io_tbl); + } else if (io_nodes_provided) { + p_mixed_tbl = &io_tbl; + } + for (i = 0; i < adj_list_size - 1; i++) { + if (sw_list[i] && sw_list[i]->sw) { + sw = (osm_switch_t *)(sw_list[i]->sw); + add_sw_endports_to_order_list(sw, p_mgr, p_mixed_tbl, + FALSE); + } else { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD39: corrupted sw_list array in dfsssp_do_dijkstra_routing\n"); + goto ERROR; + } + } + /* last: add SP0 afterwards which have lower priority for balancing */ + for (i = 0; i < sw_list_size; i++) { + if (sw_list[i] && sw_list[i]->sw) { + sw = (osm_switch_t *)(sw_list[i]->sw); + guid = cl_ntoh64(osm_node_get_node_guid(sw->p_node)); + add_guid_to_order_list(guid, p_mgr); + } else { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD31: corrupted sw_list array in dfsssp_do_dijkstra_routing\n"); + goto ERROR; + } + } + + /* the intermediate array lived long enough */ + free(sw_list); + sw_list = NULL; + /* same is true for the compute node and I/O guid map */ + destroy_guid_map(&cn_tbl); + cn_nodes_provided = FALSE; + destroy_guid_map(&io_tbl); + io_nodes_provided = FALSE; + + /* do the routing for the each Hca in the subnet and each switch + in the subnet (to add the routes to base/enhanced SP0) + */ + qlist = &p_mgr->port_order_list; + for (qlist_item = cl_qlist_head(qlist); + qlist_item != cl_qlist_end(qlist); + qlist_item = cl_qlist_next(qlist_item)) { + port = (osm_port_t *)cl_item_obj(qlist_item, port, list_item); + + /* calculate shortest path with dijkstra from node to all switches/Hca */ + if (osm_node_get_type(port->p_node) == IB_NODE_TYPE_CA) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Processing Hca with GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid + (port->p_node))); + } else if (osm_node_get_type(port->p_node) == IB_NODE_TYPE_SWITCH) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Processing switch with GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid + (port->p_node))); + } else { + /* we don't handle routers, in case they show up */ + continue; + } + + /* distribute the LID range across the ports that can reach those LIDs + to have disjoint paths for one destination port with lmc>0; + for switches with bsp0: min=max; with esp0: max>min if lmc>0 + */ + osm_port_get_lid_range_ho(port, &min_lid_ho, + &max_lid_ho); + for (lid = min_lid_ho; lid <= max_lid_ho; lid++) { + /* do dijkstra from this Hca/LID/SP0 to each switch */ + err = + dijkstra(p_mgr, &heap, adj_list, adj_list_size, + port, lid); + if (err) + goto ERROR; + if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) + print_routes(p_mgr, adj_list, adj_list_size, + port); + + /* make an update for the linear forwarding tables of the switches */ + err = + update_lft(p_mgr, adj_list, adj_list_size, port, lid); + if (err) + goto ERROR; + + /* add weights for calculated routes to adjust the weights for the next cycle */ + update_weights(p_mgr, adj_list, adj_list_size); + + if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) + dfsssp_print_graph(p_mgr, adj_list, + adj_list_size); + } + } + + /* try deadlock removal only for the dfsssp routing (not for the sssp case, which is a subset of the dfsssp algorithm) */ + if (dfsssp_ctx->routing_type == OSM_ROUTING_ENGINE_TYPE_DFSSSP) { + /* remove potential deadlocks by assigning different virtual lanes to src/dest paths and balance the lanes */ + err = dfsssp_remove_deadlocks(dfsssp_ctx); + if (err) + goto ERROR; + } else if (dfsssp_ctx->routing_type == OSM_ROUTING_ENGINE_TYPE_SSSP) { + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "SSSP routing specified -> skipping deadlock removal thru dfsssp_remove_deadlocks(...)\n"); + } else { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR AD28: wrong routing engine specified in dfsssp_ctx\n"); + goto ERROR; + } + + /* list not needed after the dijkstra steps and deadlock removal */ + cl_qlist_remove_all(&p_mgr->port_order_list); + + /* delete the heap which is not needed anymore */ + cl_heap_destroy(&heap); + + /* print the new_lft for each switch after routing is done */ + if (OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) { + for (item = cl_qmap_head(sw_tbl); item != cl_qmap_end(sw_tbl); + item = cl_qmap_next(item)) { + sw = (osm_switch_t *) item; + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Summary of the (new) LFT for switch 0x%" PRIx64 + " (%s):\n", + cl_ntoh64(osm_node_get_node_guid(sw->p_node)), + sw->p_node->print_desc); + for (i = 0; i < sw->max_lid_ho + 1; i++) + if (sw->new_lft[i] != OSM_NO_PATH) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + " for LID=%" PRIu32 + " use port=%" PRIu8 "\n", i, + sw->new_lft[i]); + } + } + } + + OSM_LOG_EXIT(p_mgr->p_log); + return 0; + +ERROR: + if (!cl_is_qlist_empty(&p_mgr->port_order_list)) + cl_qlist_remove_all(&p_mgr->port_order_list); + if (cn_nodes_provided) + destroy_guid_map(&cn_tbl); + if (io_nodes_provided) + destroy_guid_map(&io_tbl); + if (sw_list) + free(sw_list); + if (cl_is_heap_inited(&heap)) + cl_heap_destroy(&heap); + return -1; +} + +/* meta function which calls subfunctions for finding the optimal switch + for the spanning tree, performing a dijkstra step with this sw as root, + and calculating the mcast table for MLID +*/ +static ib_api_status_t dfsssp_do_mcast_routing(void * context, + osm_mgrp_box_t * mbox) +{ + dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context; + osm_ucast_mgr_t *p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr; + osm_sm_t *sm = (osm_sm_t *) p_mgr->sm; + vertex_t *adj_list = (vertex_t *) dfsssp_ctx->adj_list; + uint32_t adj_list_size = dfsssp_ctx->adj_list_size; + cl_qlist_t mcastgrp_port_list; + cl_qmap_t mcastgrp_port_map; + osm_switch_t *root_sw = NULL, *p_sw = NULL; + osm_port_t *port = NULL; + ib_net16_t lid = 0; + uint32_t err = 0, num_ports = 0, i = 0; + ib_net64_t guid = 0; + ib_api_status_t status = IB_SUCCESS; + cl_heap_t heap; + + OSM_LOG_ENTER(sm->p_log); + + /* using the ucast cache feature with dfsssp might mean that a leaf sw + got removed (and got back) without calling dfsssp_build_graph + and therefore the adj_list (and pointers to osm's internal switches) + could be outdated (here we have no knowledge if it has happened, so + unfortunately a check is necessary... still better than rebuilding + adj_list every time we arrive here) + */ + if (p_mgr->p_subn->opt.use_ucast_cache && p_mgr->cache_valid) { + for (i = 1; i < adj_list_size; i++) { + guid = cl_hton64(adj_list[i].guid); + p_sw = osm_get_switch_by_guid(p_mgr->p_subn, guid); + if (p_sw) { + /* check if switch came back from the dead */ + if (adj_list[i].dropped) + adj_list[i].dropped = FALSE; + + /* verify that sw object has not been moved + (this can happen for a leaf switch, if it + was dropped and came back later without a + rerouting), otherwise we have to update + dfsssp's internal switch list with the new + sw pointer + */ + if (p_sw == adj_list[i].sw) + continue; + else + adj_list[i].sw = p_sw; + } else { + /* if a switch from adj_list is not in the + sw_guid_tbl anymore, then the only reason is + that it was a leaf switch and opensm dropped + it without calling a rerouting + -> calling dijkstra is no problem, since it + is a leaf and different from root_sw + -> only update_mcft and reset_mgrp_membership + need to be aware of these dropped switches + */ + if (!adj_list[i].dropped) + adj_list[i].dropped = TRUE; + } + } + } + + /* construct the generic heap opject to use it in dijkstra */ + cl_heap_construct(&heap); + + /* create a map and a list of all ports which are member in the mcast + group; map for searching elements and list for iteration + */ + if (osm_mcast_make_port_list_and_map(&mcastgrp_port_list, + &mcastgrp_port_map, mbox)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD50: " + "Insufficient memory to make port list\n"); + status = IB_ERROR; + goto Exit; + } + + num_ports = cl_qlist_count(&mcastgrp_port_list); + if (num_ports < 2) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "MLID 0x%X has %u members - nothing to do\n", + mbox->mlid, num_ports); + goto Exit; + } + + /* find the root switch for the spanning tree, which has the smallest + hops count to all LIDs in the mcast group + */ + root_sw = osm_mcast_mgr_find_root_switch(sm, &mcastgrp_port_list); + if (!root_sw) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD51: " + "Unable to locate a suitable switch for group 0x%X\n", + mbox->mlid); + status = IB_ERROR; + goto Exit; + } + + /* a) start one dijkstra step from the root switch to generate a + spanning tree + b) this might be a bit of an overkill to span the whole + network, if there are only a few ports in the mcast group, but + its only one dijkstra step for each mcast group and we did many + steps before in the ucast routing for each LID in the subnet; + c) we can use the subnet structure from the ucast routing, and + don't even have to reset the link weights (=> therefore the mcast + spanning tree will use less 'growded' links in the network) + d) the mcast dfsssp algorithm will not change the link weights + */ + lid = osm_node_get_base_lid(root_sw->p_node, 0); + port = osm_get_port_by_lid(sm->p_subn, lid); + err = dijkstra(p_mgr, &heap, adj_list, adj_list_size, port, lid); + if (err) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD52: " + "Dijkstra step for mcast failed for group 0x%X\n", + mbox->mlid); + status = IB_ERROR; + goto Exit; + } + + /* set mcast group membership again for update_mcft + (unfortunately: osm_mcast_mgr_find_root_switch resets it) + */ + update_mgrp_membership(&mcastgrp_port_list); + + /* update the mcast forwarding tables of the switches */ + err = update_mcft(sm, adj_list, adj_list_size, mbox->mlid, + &mcastgrp_port_map, root_sw); + if (err) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR AD53: " + "Update of mcast forwarding tables failed for group 0x%X\n", + mbox->mlid); + status = IB_ERROR; + goto Exit; + } + +Exit: + if (cl_is_heap_inited(&heap)) + cl_heap_destroy(&heap); + reset_mgrp_membership(adj_list, adj_list_size); + osm_mcast_drop_port_list(&mcastgrp_port_list); + OSM_LOG_EXIT(sm->p_log); + return status; +} + +/* called from extern in QP creation process to gain the the service level and + the virtual lane respectively for a pair +*/ +static uint8_t get_dfsssp_sl(void *context, uint8_t hint_for_default_sl, + const ib_net16_t slid, const ib_net16_t dlid) +{ + dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context; + osm_port_t *src_port, *dest_port; + vltable_t *srcdest2vl_table = NULL; + uint8_t *vl_split_count = NULL; + osm_ucast_mgr_t *p_mgr = NULL; + int32_t res = 0; + + if (dfsssp_ctx + && dfsssp_ctx->routing_type == OSM_ROUTING_ENGINE_TYPE_DFSSSP) { + p_mgr = (osm_ucast_mgr_t *) dfsssp_ctx->p_mgr; + srcdest2vl_table = (vltable_t *) (dfsssp_ctx->srcdest2vl_table); + vl_split_count = (uint8_t *) (dfsssp_ctx->vl_split_count); + } + else + return hint_for_default_sl; + + src_port = osm_get_port_by_lid(p_mgr->p_subn, slid); + if (!src_port) + return hint_for_default_sl; + + dest_port = osm_get_port_by_lid(p_mgr->p_subn, dlid); + if (!dest_port) + return hint_for_default_sl; + + if (!srcdest2vl_table) + return hint_for_default_sl; + + res = vltable_get_vl(srcdest2vl_table, slid, dlid); + + /* we will randomly distribute the traffic over multiple VLs if + necessary for good balancing; therefore vl_split_count provides + the number of VLs to use for certain traffic + */ + if (res > -1) { + if (vl_split_count[res] > 1) + return (uint8_t) (res + rand()%(vl_split_count[res])); + else + return (uint8_t) res; + } else + return hint_for_default_sl; +} + +static dfsssp_context_t *dfsssp_context_create(osm_opensm_t * p_osm, + osm_routing_engine_type_t + routing_type) +{ + dfsssp_context_t *dfsssp_ctx = NULL; + + /* allocate memory */ + dfsssp_ctx = (dfsssp_context_t *) malloc(sizeof(dfsssp_context_t)); + if (dfsssp_ctx) { + /* set initial values */ + dfsssp_ctx->routing_type = routing_type; + dfsssp_ctx->p_mgr = (osm_ucast_mgr_t *) & (p_osm->sm.ucast_mgr); + dfsssp_ctx->adj_list = NULL; + dfsssp_ctx->adj_list_size = 0; + dfsssp_ctx->srcdest2vl_table = NULL; + dfsssp_ctx->vl_split_count = NULL; + } else { + OSM_LOG(p_osm->sm.ucast_mgr.p_log, OSM_LOG_ERROR, + "ERR AD04: cannot allocate memory for dfsssp_ctx in dfsssp_context_create\n"); + return NULL; + } + + return dfsssp_ctx; +} + +static void dfsssp_context_destroy(void *context) +{ + dfsssp_context_t *dfsssp_ctx = (dfsssp_context_t *) context; + vertex_t *adj_list = (vertex_t *) (dfsssp_ctx->adj_list); + uint32_t i = 0; + link_t *link = NULL, *tmp = NULL; + + /* free adj_list */ + for (i = 0; i < dfsssp_ctx->adj_list_size; i++) { + link = adj_list[i].links; + while (link) { + tmp = link; + link = link->next; + free(tmp); + } + } + free(adj_list); + dfsssp_ctx->adj_list = NULL; + dfsssp_ctx->adj_list_size = 0; + + /* free srcdest2vl table and the split count information table + (can be done, because dfsssp_context_destroy is called after + osm_get_dfsssp_sl) + */ + vltable_dealloc(&(dfsssp_ctx->srcdest2vl_table)); + dfsssp_ctx->srcdest2vl_table = NULL; + + if (dfsssp_ctx->vl_split_count) { + free(dfsssp_ctx->vl_split_count); + dfsssp_ctx->vl_split_count = NULL; + } +} + +static void delete(void *context) +{ + if (!context) + return; + dfsssp_context_destroy(context); + + free(context); +} + +int osm_ucast_dfsssp_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm) +{ + /* create context container and add ucast management object */ + dfsssp_context_t *dfsssp_context = + dfsssp_context_create(p_osm, OSM_ROUTING_ENGINE_TYPE_DFSSSP); + if (!dfsssp_context) { + return 1; /* alloc failed -> skip this routing */ + } + + /* reset function pointers to dfsssp routines */ + r->context = (void *)dfsssp_context; + r->build_lid_matrices = dfsssp_build_graph; + r->ucast_build_fwd_tables = dfsssp_do_dijkstra_routing; + r->mcast_build_stree = dfsssp_do_mcast_routing; + r->path_sl = get_dfsssp_sl; + r->destroy = delete; + + /* we initialize with the current time to achieve a 'good' randomized + assignment in get_dfsssp_sl(...) + */ + srand(time(NULL)); + + return 0; +} + +int osm_ucast_sssp_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm) +{ + /* create context container and add ucast management object */ + dfsssp_context_t *dfsssp_context = + dfsssp_context_create(p_osm, OSM_ROUTING_ENGINE_TYPE_SSSP); + if (!dfsssp_context) { + return 1; /* alloc failed -> skip this routing */ + } + + /* reset function pointers to sssp routines */ + r->context = (void *)dfsssp_context; + r->build_lid_matrices = dfsssp_build_graph; + r->ucast_build_fwd_tables = dfsssp_do_dijkstra_routing; + r->mcast_build_stree = dfsssp_do_mcast_routing; + r->destroy = delete; + + return 0; +} diff --git a/opensm/osm_ucast_dnup.c b/opensm/osm_ucast_dnup.c new file mode 100644 index 0000000..d3e5383 --- /dev/null +++ b/opensm/osm_ucast_dnup.c @@ -0,0 +1,499 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007,2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2009 Battelle Memorial Institue. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of Up Down Algorithm using ranking & Min Hop + * Calculation functions + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_DNUP_C +#include +#include +#include + +/* //////////////////////////// */ +/* Local types */ +/* //////////////////////////// */ + +/* direction */ +typedef enum dnup_switch_dir { + UP = 0, + DOWN, + EQUAL +} dnup_switch_dir_t; + +/* dnup structure */ +typedef struct dnup { + osm_opensm_t *p_osm; +} dnup_t; + +struct dnup_node { + cl_list_item_t list; + osm_switch_t *sw; + dnup_switch_dir_t dir; + unsigned rank; + unsigned visited; +}; + +/* This function returns direction based on rank and guid info of current & + remote ports */ +static dnup_switch_dir_t dnup_get_dir(unsigned cur_rank, unsigned rem_rank) +{ + /* HACK: comes to solve root nodes connection, in a classic subnet root nodes do not connect + directly, but in case they are we assign to root node an UP direction to allow DNUP to discover + the subnet correctly (and not from the point of view of the last root node). + */ + if (!cur_rank && !rem_rank) + return EQUAL; + + if (cur_rank < rem_rank) + return DOWN; + else if (cur_rank > rem_rank) + return UP; + else + return EQUAL; +} + +/********************************************************************** + * This function does the bfs of min hop table calculation by guid index + * as a starting point. + **********************************************************************/ +static int dnup_bfs_by_node(IN osm_log_t * p_log, IN osm_subn_t * p_subn, + IN osm_switch_t * p_sw, IN uint8_t prune_weight, + OUT uint8_t * max_hops) +{ + uint8_t pn, pn_rem; + cl_qlist_t list; + uint16_t lid; + struct dnup_node *u; + dnup_switch_dir_t next_dir, current_dir; + + OSM_LOG_ENTER(p_log); + + lid = osm_node_get_base_lid(p_sw->p_node, 0); + lid = cl_ntoh16(lid); + osm_switch_set_hops(p_sw, lid, 0, 0); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Starting from switch - port GUID 0x%" PRIx64 " lid %u\n", + cl_ntoh64(p_sw->p_node->node_info.port_guid), lid); + + u = p_sw->priv; + u->dir = DOWN; + + /* Update list with the new element */ + cl_qlist_init(&list); + cl_qlist_insert_tail(&list, &u->list); + + /* BFS the list till no next element */ + while (!cl_is_qlist_empty(&list)) { + u = (struct dnup_node *)cl_qlist_remove_head(&list); + u->visited = 0; /* cleanup */ + current_dir = u->dir; + /* Go over all ports of the switch and find unvisited remote nodes */ + for (pn = 1; pn < u->sw->num_ports; pn++) { + osm_node_t *p_remote_node; + struct dnup_node *rem_u; + uint8_t current_min_hop, remote_min_hop, + set_hop_return_value; + osm_switch_t *p_remote_sw; + + p_remote_node = + osm_node_get_remote_node(u->sw->p_node, pn, + &pn_rem); + /* If no remote node OR remote node is not a SWITCH + continue to next pn */ + if (!p_remote_node || !p_remote_node->sw) + continue; + /* Fetch remote guid only after validation of remote node */ + p_remote_sw = p_remote_node->sw; + rem_u = p_remote_sw->priv; + /* Decide which direction to mark it (UP/DOWN) */ + next_dir = dnup_get_dir(u->rank, rem_u->rank); + + /* Set MinHop value for the current lid */ + current_min_hop = osm_switch_get_least_hops(u->sw, lid); + /* Check hop count if better insert into list && update + the remote node Min Hop Table */ + remote_min_hop = + osm_switch_get_hop_count(p_remote_sw, lid, pn_rem); + + /* Check if this is a legal step : the only illegal step is going + from UP to DOWN */ + if ((current_dir == UP) && (next_dir == DOWN)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Avoiding move from 0x%016" PRIx64 + " to 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(u->sw->p_node)), + cl_ntoh64(osm_node_get_node_guid(p_remote_node))); + /* Illegal step. If prune_weight is set, allow it with an + * additional weight + */ + if(prune_weight) { + current_min_hop+=prune_weight; + if(current_min_hop >= 64) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR AE02: Too many hops on subnet," + " can't relax illegal Dn/Up transition."); + osm_switch_set_hops(p_remote_sw, lid, + pn_rem, OSM_NO_PATH); + } + } else { + continue; + } + } + if (current_min_hop + 1 < remote_min_hop) { + set_hop_return_value = + osm_switch_set_hops(p_remote_sw, lid, + pn_rem, + current_min_hop + 1); + if(max_hops && current_min_hop + 1 > *max_hops) { + *max_hops = current_min_hop + 1; + } + if (set_hop_return_value) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AE01: " + "Invalid value returned from set min hop is: %d\n", + set_hop_return_value); + } + /* Check if remote port has already been visited */ + if (!rem_u->visited) { + /* Insert dnup_switch item into the list */ + rem_u->dir = next_dir; + rem_u->visited = 1; + cl_qlist_insert_tail(&list, + &rem_u->list); + } + } + } + } + + OSM_LOG_EXIT(p_log); + return 0; +} + +/* NOTE : PLS check if we need to decide that the first */ +/* rank is a SWITCH for BFS purpose */ +static int dnup_subn_rank(IN dnup_t * p_dnup) +{ + osm_switch_t *p_sw; + osm_physp_t *p_physp, *p_remote_physp; + cl_qlist_t list; + cl_map_item_t *item; + struct dnup_node *u, *remote_u; + uint8_t num_ports, port_num; + osm_log_t *p_log = &p_dnup->p_osm->log; + unsigned max_rank = 0; + + OSM_LOG_ENTER(p_log); + cl_qlist_init(&list); + + /* add all node level switches to the list */ + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + u = p_sw->priv; + if (u->rank == 0) + cl_qlist_insert_tail(&list, &u->list); + } + + /* BFS the list till it's empty */ + while (!cl_is_qlist_empty(&list)) { + u = (struct dnup_node *)cl_qlist_remove_head(&list); + /* Go over all remote nodes and rank them (if not already visited) */ + p_sw = u->sw; + num_ports = p_sw->num_ports; + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Handling switch GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); + for (port_num = 1; port_num < num_ports; port_num++) { + ib_net64_t port_guid; + + /* Current port fetched in order to get remote side */ + p_physp = + osm_node_get_physp_ptr(p_sw->p_node, port_num); + + if (!p_physp) + continue; + + p_remote_physp = p_physp->p_remote_physp; + + /* + make sure that all the following occur on p_remote_physp: + 1. The port isn't NULL + 2. It is a switch + */ + if (p_remote_physp && p_remote_physp->p_node->sw) { + remote_u = p_remote_physp->p_node->sw->priv; + port_guid = p_remote_physp->port_guid; + + if (remote_u->rank > u->rank + 1) { + remote_u->rank = u->rank + 1; + max_rank = remote_u->rank; + cl_qlist_insert_tail(&list, + &remote_u->list); + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Rank of port GUID 0x%" PRIx64 + " = %u\n", cl_ntoh64(port_guid), + remote_u->rank); + } + } + } + } + + /* Print Summary of ranking */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Subnet ranking completed. Max Node Rank = %d\n", max_rank); + OSM_LOG_EXIT(p_log); + return 0; +} + +static int dnup_set_min_hop_table(IN dnup_t * p_dnup) +{ + osm_subn_t *p_subn = &p_dnup->p_osm->subn; + osm_log_t *p_log = &p_dnup->p_osm->log; + osm_switch_t *p_sw; + struct dnup_node *u; + cl_map_item_t *item; + uint8_t max_hops = 0; + + OSM_LOG_ENTER(p_log); + + /* Go over all the switches in the subnet - for each init their Min Hop + Table */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Init Min Hop Table of all switches [\n"); + + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + /* Clear Min Hop Table */ + osm_switch_clear_hops(p_sw); + } + + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Init Min Hop Table of all switches ]\n"); + + /* Now do the BFS for each port in the subnet */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "BFS through all port guids in the subnet [\n"); + + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + dnup_bfs_by_node(p_log, p_subn, p_sw, 0, &max_hops); + } + if(p_subn->opt.connect_roots) { + /*This is probably not necessary, by I am more comfortable + * clearing any possible side effects from the previous + * dnup routing pass + */ + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + osm_switch_clear_hops(p_sw); + u = (struct dnup_node *) p_sw->priv; + u->visited = 0; + } + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + dnup_bfs_by_node(p_log, p_subn, p_sw, max_hops + 1, NULL); + } + } + + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "BFS through all port guids in the subnet ]\n"); + /* Cleanup */ + OSM_LOG_EXIT(p_log); + return 0; +} + +static int dnup_build_lid_matrices(IN dnup_t * p_dnup) +{ + int status; + + OSM_LOG_ENTER(&p_dnup->p_osm->log); + + OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE, + "Ranking all port guids in the list\n"); + /* Check if it's not a switched subnet */ + if (cl_is_qmap_empty(&p_dnup->p_osm->subn.sw_guid_tbl)) { + OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_ERROR, "ERR AEOB: " + "This is not a switched subnet, cannot perform DNUP algorithm\n"); + status = -1; + goto _exit; + } + + /* Rank the subnet switches */ + dnup_subn_rank(p_dnup); + + /* After multiple ranking need to set Min Hop Table by DnUp algorithm */ + OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE, + "Setting all switches' Min Hop Table\n"); + status = dnup_set_min_hop_table(p_dnup); + +_exit: + OSM_LOG_EXIT(&p_dnup->p_osm->log); + return status; +} + +static struct dnup_node *create_dnup_node(osm_switch_t * sw) +{ + struct dnup_node *u; + + u = malloc(sizeof(*u)); + if (!u) + return NULL; + memset(u, 0, sizeof(*u)); + u->sw = sw; + u->rank = 0xffffffff; + return u; +} + +static void delete_dnup_node(struct dnup_node *u) +{ + u->sw->priv = NULL; + free(u); +} + +/* DNUP callback function */ +static int dnup_lid_matrices(void *ctx) +{ + dnup_t *p_dnup = ctx; + cl_map_item_t *item; + osm_switch_t *p_sw; + int ret = 0; + int num_leafs = 0; + uint8_t pn, pn_rem; + + OSM_LOG_ENTER(&p_dnup->p_osm->log); + + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + p_sw->priv = create_dnup_node(p_sw); + if (!p_sw->priv) { + OSM_LOG(&(p_dnup->p_osm->log), OSM_LOG_ERROR, "ERR AE0C: " + "cannot create dnup node\n"); + OSM_LOG_EXIT(&p_dnup->p_osm->log); + return -1; + } + } + + + /* First setup node level nodes */ + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + + for (pn = 0; pn < p_sw->num_ports; pn++) { + osm_node_t *p_remote_node; + p_remote_node = osm_node_get_remote_node(p_sw->p_node, pn, &pn_rem); + if(p_remote_node && !p_remote_node->sw) { + struct dnup_node *u = p_sw->priv; + u->rank = 0; + OSM_LOG(&(p_dnup->p_osm->log), + OSM_LOG_VERBOSE, "(%s) rank 0 leaf switch\n", + p_sw->p_node->print_desc); + num_leafs++; + break; + } + } + } + + if(num_leafs == 0) { + OSM_LOG(&(p_dnup->p_osm->log), + OSM_LOG_ERROR, "ERR AE0D: No leaf switches found, DnUp routing failed\n"); + OSM_LOG_EXIT(&p_dnup->p_osm->log); + return -1; + } + + ret = dnup_build_lid_matrices(p_dnup); + + for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *) item; + delete_dnup_node(p_sw->priv); + } + + OSM_LOG_EXIT(&p_dnup->p_osm->log); + return ret; +} + +static void dnup_delete(void *context) +{ + free(context); +} + +int osm_ucast_dnup_setup(struct osm_routing_engine *r, osm_opensm_t *osm) +{ + dnup_t *dnup; + + OSM_LOG_ENTER(&osm->log); + + dnup = malloc(sizeof(dnup_t)); + if (!dnup) + return -1; + memset(dnup, 0, sizeof(dnup_t)); + + dnup->p_osm = osm; + + r->context = dnup; + r->destroy = dnup_delete; + r->build_lid_matrices = dnup_lid_matrices; + + OSM_LOG_EXIT(&osm->log); + return 0; +} diff --git a/opensm/osm_ucast_file.c b/opensm/osm_ucast_file.c new file mode 100644 index 0000000..f891281 --- /dev/null +++ b/opensm/osm_ucast_file.c @@ -0,0 +1,406 @@ +/* + * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2006,2008-2009 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of OpenSM unicast routing module which loads + * routes from the dump file + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_FILE_C +#include +#include +#include + +static uint16_t remap_lid(osm_opensm_t * p_osm, uint16_t lid, ib_net64_t guid) +{ + osm_port_t *p_port; + uint16_t min_lid, max_lid; + uint8_t lmc; + + p_port = osm_get_port_by_guid(&p_osm->subn, guid); + if (!p_port) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "cannot find port guid 0x%016" PRIx64 + " , will use the same lid\n", cl_ntoh64(guid)); + return lid; + } + + osm_port_get_lid_range_ho(p_port, &min_lid, &max_lid); + if (min_lid <= lid && lid <= max_lid) + return lid; + + lmc = osm_port_get_lmc(p_port); + return min_lid + (lid & ((1 << lmc) - 1)); +} + +static void add_path(osm_opensm_t * p_osm, + osm_switch_t * p_sw, uint16_t lid, uint8_t port_num, + ib_net64_t port_guid) +{ + uint16_t new_lid; + uint8_t old_port; + + new_lid = port_guid ? remap_lid(p_osm, lid, port_guid) : lid; + old_port = osm_switch_get_port_by_lid(p_sw, new_lid, OSM_LFT); + if (old_port != OSM_NO_PATH && old_port != port_num) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "LID collision is detected on switch " + "0x016%" PRIx64 ", will overwrite LID %u entry\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), + new_lid); + } + + p_sw->new_lft[new_lid] = port_num; + if (!(p_osm->subn.opt.port_profile_switch_nodes && port_guid && + osm_get_switch_by_guid(&p_osm->subn, port_guid))) + osm_switch_count_path(p_sw, port_num); + + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "route 0x%04x(was 0x%04x) %u 0x%016" PRIx64 + " is added to switch 0x%016" PRIx64 "\n", + new_lid, lid, port_num, cl_ntoh64(port_guid), + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); +} + +static void add_lid_hops(osm_opensm_t * p_osm, osm_switch_t * p_sw, + uint16_t lid, ib_net64_t guid, + uint8_t hops[], unsigned len) +{ + uint8_t i; + + if (len > p_sw->num_ports) + len = p_sw->num_ports; + + for (i = 0; i < len; i++) + osm_switch_set_hops(p_sw, lid, i, hops[i]); +} + +static int do_ucast_file_load(void *context) +{ + char line[1024]; + char *file_name; + FILE *file; + ib_net64_t sw_guid, port_guid; + osm_opensm_t *p_osm = context; + osm_switch_t *p_sw; + uint16_t lid; + uint8_t port_num; + unsigned lineno; + int status = -1; + + file_name = p_osm->subn.opt.lfts_file; + if (!file_name) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "LFTs file name is not given; " + "using default routing algorithm\n"); + return 1; + } + + file = fopen(file_name, "r"); + if (!file) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6302: " + "Can't open ucast dump file \'%s\': %m\n", file_name); + goto Exit; + } + + lineno = 0; + p_sw = NULL; + + while (fgets(line, sizeof(line) - 1, file) != NULL) { + char *p, *q; + lineno++; + + p = line; + while (isspace(*p)) + p++; + + if (*p == '#') + continue; + + if (!strncmp(p, "Multicast mlids", 15)) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, + "ERR 6303: " + "Multicast dump file detected; " + "skipping parsing. Using default " + "routing algorithm\n"); + } else if (!strncmp(p, "Unicast lids", 12)) { + q = strstr(p, " guid 0x"); + if (!q) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse switch definition\n", + file_name, lineno); + goto Exit; + } + p = q + 8; + sw_guid = strtoull(p, &q, 16); + if (q == p || !isspace(*q)) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse switch guid: \'%s\'\n", + file_name, lineno, p); + goto Exit; + } + sw_guid = cl_hton64(sw_guid); + + p_sw = osm_get_switch_by_guid(&p_osm->subn, sw_guid); + if (!p_sw) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "cannot find switch %016" PRIx64 "\n", + cl_ntoh64(sw_guid)); + continue; + } + memset(p_sw->new_lft, OSM_NO_PATH, p_sw->lft_size); + } else if (p_sw && !strncmp(p, "0x", 2)) { + p += 2; + lid = (uint16_t) strtoul(p, &q, 16); + if (q == p || !isspace(*q)) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse lid: \'%s\'\n", + file_name, lineno, p); + goto Exit; + } + p = q; + while (isspace(*p)) + p++; + port_num = (uint8_t) strtoul(p, &q, 10); + if (q == p || !isspace(*q)) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse port: \'%s\'\n", + file_name, lineno, p); + goto Exit; + } + if (port_num >= + osm_node_get_num_physp(p_sw->p_node)) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "Invalid port %d found " + "for switch %016" PRIx64 "\n", + port_num, + cl_ntoh64(osm_node_get_node_guid + (p_sw->p_node))); + goto Exit; + } + + p = q; + /* additionally try to extract guid */ + q = strstr(p, " portguid 0x"); + if (!q) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "PARSE WARNING: %s:%u: " + "cannot find port guid " + "(maybe broken dump): \'%s\'\n", + file_name, lineno, p); + port_guid = 0; + } else { + p = q + 12; + port_guid = strtoull(p, &q, 16); + if (q == p || (!isspace(*q) && *q != ':')) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "PARSE WARNING: %s:%u: " + "cannot parse port guid " + "(maybe broken dump): \'%s\'\n", + file_name, lineno, p); + port_guid = 0; + } + } + port_guid = cl_hton64(port_guid); + add_path(p_osm, p_sw, lid, port_num, port_guid); + } + } + status = 0; +Exit: + if (file) + fclose(file); + return status; +} + +static int do_lid_matrix_file_load(void *context) +{ + char line[1024]; + uint8_t hops[256]; + char *file_name; + FILE *file; + ib_net64_t guid; + osm_opensm_t *p_osm = context; + osm_switch_t *p_sw; + unsigned lineno; + uint16_t lid; + int status = -1; + + file_name = p_osm->subn.opt.lid_matrix_dump_file; + if (!file_name) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "lid matrix file name is not given; " + "using default lid matrix generation algorithm\n"); + return 1; + } + + file = fopen(file_name, "r"); + if (!file) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6305: " + "Can't open lid matrix file \'%s\': %m\n", file_name); + goto Exit; + } + + lineno = 0; + p_sw = NULL; + + while (fgets(line, sizeof(line) - 1, file) != NULL) { + char *p, *q; + lineno++; + + p = line; + while (isspace(*p)) + p++; + + if (*p == '#') + continue; + + if (!strncmp(p, "Switch", 6)) { + q = strstr(p, " guid 0x"); + if (!q) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse switch definition\n", + file_name, lineno); + goto Exit; + } + p = q + 8; + guid = strtoull(p, &q, 16); + if (q == p || !isspace(*q)) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse switch guid: \'%s\'\n", + file_name, lineno, p); + goto Exit; + } + guid = cl_hton64(guid); + + p_sw = osm_get_switch_by_guid(&p_osm->subn, guid); + if (!p_sw) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "cannot find switch %016" PRIx64 "\n", + cl_ntoh64(guid)); + continue; + } + } else if (p_sw && !strncmp(p, "0x", 2)) { + unsigned long num; + unsigned len = 0; + + memset(hops, 0xff, sizeof(hops)); + + p += 2; + num = strtoul(p, &q, 16); + if (num > 0xffff || q == p || + (*q != ':' && !isspace(*q))) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse lid: \'%s\'\n", + file_name, lineno, p); + goto Exit; + } + /* Just checked the range, so casting is safe */ + lid = (uint16_t) num; + p = q; + while (isspace(*p) || *p == ':') + p++; + while (len < 256 && *p && *p != '#') { + num = strtoul(p, &q, 16); + if (num > 0xff || q == p) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "PARSE ERROR: %s:%u: " + "cannot parse hops number: \'%s\'\n", + file_name, lineno, p); + goto Exit; + } + /* Just checked the range, so casting is safe */ + hops[len++] = (uint8_t) num; + p = q; + while (isspace(*p)) + p++; + } + /* additionally try to extract guid */ + q = strstr(p, " portguid 0x"); + if (!q) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "PARSE WARNING: %s:%u: " + "cannot find port guid " + "(maybe broken dump): \'%s\'\n", + file_name, lineno, p); + guid = 0; + } else { + p = q + 12; + guid = strtoull(p, &q, 16); + if (q == p || !isspace(*q)) { + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "PARSE WARNING: %s:%u: " + "cannot parse port guid " + "(maybe broken dump): \'%s\'\n", + file_name, lineno, p); + guid = 0; + } + } + guid = cl_hton64(guid); + add_lid_hops(p_osm, p_sw, lid, guid, hops, len); + } + } + status = 0; +Exit: + if (file) + fclose(file); + return status; +} + +int osm_ucast_file_setup(struct osm_routing_engine *r, osm_opensm_t *osm) +{ + r->context = osm; + r->build_lid_matrices = do_lid_matrix_file_load; + r->ucast_build_fwd_tables = do_ucast_file_load; + return 0; +} diff --git a/opensm/osm_ucast_ftree.c b/opensm/osm_ucast_ftree.c new file mode 100644 index 0000000..a4b32e5 --- /dev/null +++ b/opensm/osm_ucast_ftree.c @@ -0,0 +1,4343 @@ +/* + * Copyright (c) 2009 Simula Research Laboratory. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of OpenSM FatTree routing + */ + +#if HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_FTREE_C +#include +#include + +/* + * FatTree rank is bounded between 2 and 8: + * - Tree of rank 1 has only trivial routing paths, + * so no need to use FatTree routing. + * - Why maximum rank is 8: + * Each node (switch) is assigned a unique tuple. + * Switches are stored in two cl_qmaps - one is + * ordered by guid, and the other by a key that is + * generated from tuple. Since cl_qmap supports only + * a 64-bit key, the maximal tuple length is 8 bytes. + * which means that maximal tree rank is 8. + * Note that the above also implies that each switch + * can have at max 255 up/down ports. + */ + +#define FAT_TREE_MIN_RANK 2 +#define FAT_TREE_MAX_RANK 8 + +typedef enum { + FTREE_DIRECTION_DOWN = -1, + FTREE_DIRECTION_SAME, + FTREE_DIRECTION_UP +} ftree_direction_t; + +/*************************************************** + ** + ** Forward references + ** + ***************************************************/ +struct ftree_sw_t_; +struct ftree_hca_t_; +struct ftree_port_t_; +struct ftree_port_group_t_; +struct ftree_fabric_t_; + +/*************************************************** + ** + ** ftree_tuple_t definition + ** + ***************************************************/ + +#define FTREE_TUPLE_BUFF_LEN 1024 +#define FTREE_TUPLE_LEN 8 + +typedef uint8_t ftree_tuple_t[FTREE_TUPLE_LEN]; +typedef uint64_t ftree_tuple_key_t; + +/*************************************************** + ** + ** ftree_sw_table_element_t definition + ** + ***************************************************/ + +typedef struct { + cl_map_item_t map_item; + struct ftree_sw_t_ *p_sw; +} ftree_sw_tbl_element_t; + +/*************************************************** + ** + ** ftree_port_t definition + ** + ***************************************************/ + +typedef struct ftree_port_t_ { + cl_map_item_t map_item; + uint8_t port_num; /* port number on the current node */ + uint8_t remote_port_num; /* port number on the remote node */ + uint32_t counter_up; /* number of allocated routes upwards */ + uint32_t counter_down; /* number of allocated routes downwards */ +} ftree_port_t; + +/*************************************************** + ** + ** ftree_port_group_t definition + ** + ***************************************************/ + +typedef union ftree_hca_or_sw_ { + struct ftree_hca_t_ *p_hca; + struct ftree_sw_t_ *p_sw; +} ftree_hca_or_sw; + +typedef struct ftree_port_group_t_ { + cl_map_item_t map_item; + uint16_t lid; /* lid of the current node */ + uint16_t remote_lid; /* lid of the remote node */ + ib_net64_t port_guid; /* port guid of this port */ + ib_net64_t node_guid; /* this node's guid */ + uint8_t node_type; /* this node's type */ + ib_net64_t remote_port_guid; /* port guid of the remote port */ + ib_net64_t remote_node_guid; /* node guid of the remote node */ + uint8_t remote_node_type; /* IB_NODE_TYPE_{CA,SWITCH,ROUTER,...} */ + ftree_hca_or_sw hca_or_sw; /* pointer to this hca/switch */ + ftree_hca_or_sw remote_hca_or_sw; /* pointer to remote hca/switch */ + cl_ptr_vector_t ports; /* vector of ports to the same lid */ + boolean_t is_cn; /* whether this port is a compute node */ + boolean_t is_io; /* whether this port is an I/O node */ + uint32_t counter_down; /* number of allocated routes downwards */ + uint32_t counter_up; /* number of allocated routes upwards */ +} ftree_port_group_t; + +/*************************************************** + ** + ** ftree_sw_t definition + ** + ***************************************************/ + +typedef struct ftree_sw_t_ { + cl_map_item_t map_item; + osm_switch_t *p_osm_sw; + uint32_t rank; + ftree_tuple_t tuple; + uint16_t lid; + ftree_port_group_t **down_port_groups; + uint8_t down_port_groups_num; + ftree_port_group_t **sibling_port_groups; + uint8_t sibling_port_groups_num; + ftree_port_group_t **up_port_groups; + uint8_t up_port_groups_num; + boolean_t is_leaf; + unsigned down_port_groups_idx; + uint8_t *hops; + uint32_t min_counter_down; + boolean_t counter_up_changed; +} ftree_sw_t; + +/*************************************************** + ** + ** ftree_hca_t definition + ** + ***************************************************/ + +typedef struct ftree_hca_t_ { + cl_map_item_t map_item; + osm_node_t *p_osm_node; + ftree_port_group_t **up_port_groups; + uint8_t *disconnected_ports; + uint16_t up_port_groups_num; + unsigned cn_num; +} ftree_hca_t; + +/*************************************************** + ** + ** ftree_fabric_t definition + ** + ***************************************************/ + +typedef struct ftree_fabric_t_ { + osm_opensm_t *p_osm; + osm_subn_t *p_subn; + cl_qmap_t hca_tbl; + cl_qmap_t sw_tbl; + cl_qmap_t sw_by_tuple_tbl; + cl_qmap_t cn_guid_tbl; + cl_qmap_t io_guid_tbl; + unsigned cn_num; + unsigned ca_ports; + uint8_t leaf_switch_rank; + uint8_t max_switch_rank; + ftree_sw_t **leaf_switches; + uint32_t leaf_switches_num; + uint16_t max_cn_per_leaf; + uint16_t lft_max_lid; + boolean_t fabric_built; +} ftree_fabric_t; + +static inline osm_subn_t *ftree_get_subnet(IN ftree_fabric_t * p_ftree) +{ + return p_ftree->p_subn; +} + +/*************************************************** + ** + ** comparators + ** + ***************************************************/ + +static int compare_switches_by_index(IN const void *p1, IN const void *p2) +{ + ftree_sw_t **pp_sw1 = (ftree_sw_t **) p1; + ftree_sw_t **pp_sw2 = (ftree_sw_t **) p2; + + uint16_t i; + for (i = 0; i < FTREE_TUPLE_LEN; i++) { + if ((*pp_sw1)->tuple[i] > (*pp_sw2)->tuple[i]) + return 1; + if ((*pp_sw1)->tuple[i] < (*pp_sw2)->tuple[i]) + return -1; + } + return 0; +} + +/***************************************************/ + +static int +compare_port_groups_by_remote_switch_index(IN const void *p1, IN const void *p2) +{ + ftree_port_group_t **pp_g1 = (ftree_port_group_t **) p1; + ftree_port_group_t **pp_g2 = (ftree_port_group_t **) p2; + + return + compare_switches_by_index(&((*pp_g1)->remote_hca_or_sw.p_sw), + &((*pp_g2)->remote_hca_or_sw.p_sw)); +} + +/*************************************************** + ** + ** ftree_tuple_t functions + ** + ***************************************************/ + +static void tuple_init(IN ftree_tuple_t tuple) +{ + memset(tuple, 0xFF, FTREE_TUPLE_LEN); +} + +/***************************************************/ + +static inline boolean_t tuple_assigned(IN ftree_tuple_t tuple) +{ + return (tuple[0] != 0xFF); +} + +/***************************************************/ + +#define FTREE_TUPLE_BUFFERS_NUM 6 + +static const char *tuple_to_str(IN ftree_tuple_t tuple) +{ + static char buffer[FTREE_TUPLE_BUFFERS_NUM][FTREE_TUPLE_BUFF_LEN]; + static uint8_t ind = 0; + char *ret_buffer; + uint32_t i; + + if (!tuple_assigned(tuple)) + return "INDEX.NOT.ASSIGNED"; + + buffer[ind][0] = '\0'; + + for (i = 0; (i < FTREE_TUPLE_LEN) && (tuple[i] != 0xFF); i++) { + if ((strlen(buffer[ind]) + 10) > FTREE_TUPLE_BUFF_LEN) + return "INDEX.TOO.LONG"; + if (i != 0) + strcat(buffer[ind], "."); + sprintf(&buffer[ind][strlen(buffer[ind])], "%u", tuple[i]); + } + + ret_buffer = buffer[ind]; + ind = (ind + 1) % FTREE_TUPLE_BUFFERS_NUM; + return ret_buffer; +} /* tuple_to_str() */ + +/***************************************************/ + +static inline ftree_tuple_key_t tuple_to_key(IN ftree_tuple_t tuple) +{ + ftree_tuple_key_t key; + memcpy(&key, tuple, FTREE_TUPLE_LEN); + return key; +} + +#if 0 +/***************************************************/ + +static inline void tuple_from_key(IN ftree_tuple_t tuple, + IN ftree_tuple_key_t key) +{ + memcpy(tuple, &key, FTREE_TUPLE_LEN); +} +#endif + +/*************************************************** + ** + ** ftree_sw_tbl_element_t functions + ** + ***************************************************/ + +static ftree_sw_tbl_element_t *sw_tbl_element_create(IN ftree_sw_t * p_sw) +{ + ftree_sw_tbl_element_t *p_element = + (ftree_sw_tbl_element_t *) malloc(sizeof(ftree_sw_tbl_element_t)); + if (!p_element) + return NULL; + memset(p_element, 0, sizeof(ftree_sw_tbl_element_t)); + + p_element->p_sw = p_sw; + return p_element; +} + +/***************************************************/ + +static void sw_tbl_element_destroy(IN ftree_sw_tbl_element_t * p_element) +{ + free(p_element); +} + +/*************************************************** + ** + ** ftree_port_t functions + ** + ***************************************************/ + +static ftree_port_t *port_create(IN uint8_t port_num, + IN uint8_t remote_port_num) +{ + ftree_port_t *p_port = (ftree_port_t *) malloc(sizeof(ftree_port_t)); + if (!p_port) + return NULL; + memset(p_port, 0, sizeof(ftree_port_t)); + + p_port->port_num = port_num; + p_port->remote_port_num = remote_port_num; + + return p_port; +} + +/***************************************************/ + +static void port_destroy(IN ftree_port_t * p_port) +{ + free(p_port); +} + +/*************************************************** + ** + ** ftree_port_group_t functions + ** + ***************************************************/ + +static ftree_port_group_t *port_group_create(IN uint16_t lid, + IN uint16_t remote_lid, + IN ib_net64_t port_guid, + IN ib_net64_t node_guid, + IN uint8_t node_type, + IN void *p_hca_or_sw, + IN ib_net64_t remote_port_guid, + IN ib_net64_t remote_node_guid, + IN uint8_t remote_node_type, + IN void *p_remote_hca_or_sw, + IN boolean_t is_cn, + IN boolean_t is_io) +{ + ftree_port_group_t *p_group = + (ftree_port_group_t *) malloc(sizeof(ftree_port_group_t)); + if (p_group == NULL) + return NULL; + memset(p_group, 0, sizeof(ftree_port_group_t)); + + p_group->lid = lid; + p_group->remote_lid = remote_lid; + memcpy(&p_group->port_guid, &port_guid, sizeof(ib_net64_t)); + memcpy(&p_group->node_guid, &node_guid, sizeof(ib_net64_t)); + memcpy(&p_group->remote_port_guid, &remote_port_guid, + sizeof(ib_net64_t)); + memcpy(&p_group->remote_node_guid, &remote_node_guid, + sizeof(ib_net64_t)); + + p_group->node_type = node_type; + switch (node_type) { + case IB_NODE_TYPE_CA: + p_group->hca_or_sw.p_hca = (ftree_hca_t *) p_hca_or_sw; + break; + case IB_NODE_TYPE_SWITCH: + p_group->hca_or_sw.p_sw = (ftree_sw_t *) p_hca_or_sw; + break; + default: + /* we shouldn't get here - port is created only in hca or switch */ + CL_ASSERT(0); + } + + p_group->remote_node_type = remote_node_type; + switch (remote_node_type) { + case IB_NODE_TYPE_CA: + p_group->remote_hca_or_sw.p_hca = + (ftree_hca_t *) p_remote_hca_or_sw; + break; + case IB_NODE_TYPE_SWITCH: + p_group->remote_hca_or_sw.p_sw = + (ftree_sw_t *) p_remote_hca_or_sw; + break; + default: + /* we shouldn't get here - port is created only in hca or switch */ + CL_ASSERT(0); + } + + cl_ptr_vector_init(&p_group->ports, 0, /* min size */ + 8); /* grow size */ + p_group->is_cn = is_cn; + p_group->is_io = is_io; + return p_group; +} /* port_group_create() */ + +/***************************************************/ + +static void port_group_destroy(IN ftree_port_group_t * p_group) +{ + uint32_t i; + uint32_t size; + ftree_port_t *p_port; + + if (!p_group) + return; + + /* remove all the elements of p_group->ports vector */ + size = cl_ptr_vector_get_size(&p_group->ports); + for (i = 0; i < size; i++) + if (cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port) == CL_SUCCESS) + port_destroy(p_port); + + cl_ptr_vector_destroy(&p_group->ports); + free(p_group); +} /* port_group_destroy() */ + +/***************************************************/ + +static void port_group_dump(IN ftree_fabric_t * p_ftree, + IN ftree_port_group_t * p_group, + IN ftree_direction_t direction) +{ + ftree_port_t *p_port; + uint32_t size; + uint32_t i; + char *buff; + + if (!p_group) + return; + + if (!OSM_LOG_IS_ACTIVE_V2(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) + return; + + size = cl_ptr_vector_get_size(&p_group->ports); + + buff = calloc(10, 1024); + if (!buff) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB33: " + "Failed to allocate buffer\n"); + return; + } + + for (i = 0; i < size; i++) { + cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port); + CL_ASSERT(p_port); + + if (i != 0) + strcat(buff, ", "); + sprintf(buff + strlen(buff), "%u", p_port->port_num); + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + " Port Group of size %u, port(s): %s, direction: %s\n" + " Local <--> Remote GUID (LID):" + "0x%016" PRIx64 " (0x%04x) <--> 0x%016" PRIx64 " (0x%04x)\n", + size, buff, + (direction == FTREE_DIRECTION_DOWN) ? "DOWN" : (direction == + FTREE_DIRECTION_SAME) + ? "SIBLING" : "UP", cl_ntoh64(p_group->port_guid), + p_group->lid, cl_ntoh64(p_group->remote_port_guid), + p_group->remote_lid); + + free(buff); + +} /* port_group_dump() */ + +/***************************************************/ + +static void port_group_add_port(IN ftree_port_group_t * p_group, + IN uint8_t port_num, IN uint8_t remote_port_num) +{ + uint16_t i; + ftree_port_t *p_port; + + for (i = 0; i < cl_ptr_vector_get_size(&p_group->ports); i++) { + cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port); + if (p_port->port_num == port_num) + return; + } + + p_port = port_create(port_num, remote_port_num); + CL_ASSERT(p_port); + cl_ptr_vector_insert(&p_group->ports, p_port, NULL); +} + +/*************************************************** + ** + ** ftree_sw_t functions + ** + ***************************************************/ + +static ftree_sw_t *sw_create(IN osm_switch_t * p_osm_sw) +{ + ftree_sw_t *p_sw; + uint8_t ports_num; + + /* make sure that the switch has ports */ + if (p_osm_sw->num_ports == 1) + return NULL; + + p_sw = (ftree_sw_t *) malloc(sizeof(ftree_sw_t)); + if (p_sw == NULL) + return NULL; + memset(p_sw, 0, sizeof(ftree_sw_t)); + + p_sw->p_osm_sw = p_osm_sw; + p_sw->rank = 0xFFFFFFFF; + tuple_init(p_sw->tuple); + + p_sw->lid = + cl_ntoh16(osm_node_get_base_lid(p_sw->p_osm_sw->p_node, 0)); + + ports_num = osm_node_get_num_physp(p_sw->p_osm_sw->p_node); + p_sw->down_port_groups = + (ftree_port_group_t **) malloc(ports_num * + sizeof(ftree_port_group_t *)); + if (p_sw->down_port_groups == NULL) + goto FREE_P_SW; + memset(p_sw->down_port_groups, 0, ports_num * sizeof(ftree_port_group_t *)); + + p_sw->up_port_groups = + (ftree_port_group_t **) malloc(ports_num * + sizeof(ftree_port_group_t *)); + if (p_sw->up_port_groups == NULL) + goto FREE_DOWN; + memset(p_sw->up_port_groups, 0, ports_num * sizeof(ftree_port_group_t *)); + + p_sw->sibling_port_groups = + (ftree_port_group_t **) malloc(ports_num * + sizeof(ftree_port_group_t *)); + if (p_sw->sibling_port_groups == NULL) + goto FREE_UP; + memset(p_sw->sibling_port_groups, 0, ports_num * sizeof(ftree_port_group_t *)); + + /* initialize lft buffer */ + memset(p_osm_sw->new_lft, OSM_NO_PATH, p_osm_sw->lft_size); + p_sw->hops = malloc((p_osm_sw->max_lid_ho + 1) * sizeof(*(p_sw->hops))); + if (p_sw->hops == NULL) + goto FREE_SIBLING; + + memset(p_sw->hops, OSM_NO_PATH, p_osm_sw->max_lid_ho + 1); + + return p_sw; + +FREE_SIBLING: + free(p_sw->sibling_port_groups); +FREE_UP: + free(p_sw->up_port_groups); +FREE_DOWN: + free(p_sw->down_port_groups); +FREE_P_SW: + free(p_sw); + return NULL; +} /* sw_create() */ + +/***************************************************/ + +static void sw_destroy(IN ftree_sw_t * p_sw) +{ + uint8_t i; + + if (!p_sw) + return; + free(p_sw->hops); + + for (i = 0; i < p_sw->down_port_groups_num; i++) + port_group_destroy(p_sw->down_port_groups[i]); + for (i = 0; i < p_sw->sibling_port_groups_num; i++) + port_group_destroy(p_sw->sibling_port_groups[i]); + for (i = 0; i < p_sw->up_port_groups_num; i++) + port_group_destroy(p_sw->up_port_groups[i]); + free(p_sw->down_port_groups); + free(p_sw->sibling_port_groups); + free(p_sw->up_port_groups); + + free(p_sw); +} /* sw_destroy() */ + +/***************************************************/ + +static uint64_t sw_get_guid_no(IN ftree_sw_t * p_sw) +{ + if (!p_sw) + return 0; + return osm_node_get_node_guid(p_sw->p_osm_sw->p_node); +} + +/***************************************************/ + +static uint64_t sw_get_guid_ho(IN ftree_sw_t * p_sw) +{ + return cl_ntoh64(sw_get_guid_no(p_sw)); +} + +/***************************************************/ + +static void sw_dump(IN ftree_fabric_t * p_ftree, IN ftree_sw_t * p_sw) +{ + uint32_t i; + + if (!p_sw) + return; + + if (!OSM_LOG_IS_ACTIVE_V2(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) + return; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Switch index: %s, GUID: 0x%016" PRIx64 + ", Ports: %u DOWN, %u SIBLINGS, %u UP\n", + tuple_to_str(p_sw->tuple), sw_get_guid_ho(p_sw), + p_sw->down_port_groups_num, p_sw->sibling_port_groups_num, + p_sw->up_port_groups_num); + + for (i = 0; i < p_sw->down_port_groups_num; i++) + port_group_dump(p_ftree, p_sw->down_port_groups[i], + FTREE_DIRECTION_DOWN); + for (i = 0; i < p_sw->sibling_port_groups_num; i++) + port_group_dump(p_ftree, p_sw->sibling_port_groups[i], + FTREE_DIRECTION_SAME); + for (i = 0; i < p_sw->up_port_groups_num; i++) + port_group_dump(p_ftree, p_sw->up_port_groups[i], + FTREE_DIRECTION_UP); + +} /* sw_dump() */ + +/***************************************************/ + +static boolean_t sw_ranked(IN ftree_sw_t * p_sw) +{ + return (p_sw->rank != 0xFFFFFFFF); +} + +/***************************************************/ + +static ftree_port_group_t *sw_get_port_group_by_remote_lid(IN ftree_sw_t * p_sw, + IN uint16_t + remote_lid, + IN ftree_direction_t + direction) +{ + uint32_t i; + uint32_t size; + ftree_port_group_t **port_groups; + + if (direction == FTREE_DIRECTION_UP) { + port_groups = p_sw->up_port_groups; + size = p_sw->up_port_groups_num; + } else if (direction == FTREE_DIRECTION_SAME) { + port_groups = p_sw->sibling_port_groups; + size = p_sw->sibling_port_groups_num; + } else { + port_groups = p_sw->down_port_groups; + size = p_sw->down_port_groups_num; + } + + for (i = 0; i < size; i++) + if (remote_lid == port_groups[i]->remote_lid) + return port_groups[i]; + + return NULL; +} /* sw_get_port_group_by_remote_lid() */ + +/***************************************************/ + +static void sw_add_port(IN ftree_sw_t * p_sw, IN uint8_t port_num, + IN uint8_t remote_port_num, IN uint16_t lid, + IN uint16_t remote_lid, IN ib_net64_t port_guid, + IN ib_net64_t remote_port_guid, + IN ib_net64_t remote_node_guid, + IN uint8_t remote_node_type, + IN void *p_remote_hca_or_sw, + IN ftree_direction_t direction) +{ + ftree_port_group_t *p_group = + sw_get_port_group_by_remote_lid(p_sw, remote_lid, direction); + + if (!p_group) { + p_group = port_group_create(lid, remote_lid, + port_guid, sw_get_guid_no(p_sw), + IB_NODE_TYPE_SWITCH, p_sw, + remote_port_guid, remote_node_guid, + remote_node_type, + p_remote_hca_or_sw, FALSE, FALSE); + CL_ASSERT(p_group); + + if (direction == FTREE_DIRECTION_UP) { + p_sw->up_port_groups[p_sw->up_port_groups_num++] = + p_group; + } else if (direction == FTREE_DIRECTION_SAME) { + p_sw-> + sibling_port_groups[p_sw->sibling_port_groups_num++] + = p_group; + } else + p_sw->down_port_groups[p_sw->down_port_groups_num++] = + p_group; + } + port_group_add_port(p_group, port_num, remote_port_num); + +} /* sw_add_port() */ + +/***************************************************/ + +static inline cl_status_t sw_set_hops(IN ftree_sw_t * p_sw, IN uint16_t lid, + IN uint8_t port_num, IN uint8_t hops, + IN boolean_t is_target_sw) +{ + /* set local min hop table(LID) */ + p_sw->hops[lid] = hops; + if (is_target_sw) + return osm_switch_set_hops(p_sw->p_osm_sw, lid, port_num, hops); + return 0; +} + +/***************************************************/ + +static int set_hops_on_remote_sw(IN ftree_port_group_t * p_group, + IN uint16_t target_lid, IN uint8_t hops, + IN boolean_t is_target_sw) +{ + ftree_port_t *p_port; + uint8_t i, ports_num; + ftree_sw_t *p_remote_sw = p_group->remote_hca_or_sw.p_sw; + + /* if lid is a switch, we set the min hop table in the osm_switch struct */ + CL_ASSERT(p_group->remote_node_type == IB_NODE_TYPE_SWITCH); + p_remote_sw->hops[target_lid] = hops; + + /* If target lid is a switch we set the min hop table values + * for each port on the associated osm_sw struct */ + if (!is_target_sw) + return 0; + + ports_num = (uint8_t) cl_ptr_vector_get_size(&p_group->ports); + for (i = 0; i < ports_num; i++) { + cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port); + if (sw_set_hops(p_remote_sw, target_lid, + p_port->remote_port_num, hops, is_target_sw)) + return -1; + } + return 0; +} + +/***************************************************/ + +static inline uint8_t +sw_get_least_hops(IN ftree_sw_t * p_sw, IN uint16_t target_lid) +{ + CL_ASSERT(p_sw->hops != NULL); + return p_sw->hops[target_lid]; +} + +/*************************************************** + ** + ** ftree_hca_t functions + ** + ***************************************************/ + +static ftree_hca_t *hca_create(IN osm_node_t * p_osm_node) +{ + ftree_hca_t *p_hca = (ftree_hca_t *) malloc(sizeof(ftree_hca_t)); + if (p_hca == NULL) + return NULL; + memset(p_hca, 0, sizeof(ftree_hca_t)); + + p_hca->p_osm_node = p_osm_node; + p_hca->up_port_groups = (ftree_port_group_t **) + malloc(osm_node_get_num_physp(p_hca->p_osm_node) * + sizeof(ftree_port_group_t *)); + if (!p_hca->up_port_groups) { + free(p_hca); + return NULL; + } + memset(p_hca->up_port_groups, 0, osm_node_get_num_physp(p_hca->p_osm_node) * + sizeof(ftree_port_group_t *)); + + p_hca->disconnected_ports = (uint8_t *) + calloc(osm_node_get_num_physp(p_hca->p_osm_node) + 1, sizeof(uint8_t)); + if (!p_hca->disconnected_ports) { + free(p_hca->up_port_groups); + free(p_hca); + return NULL; + } + p_hca->up_port_groups_num = 0; + return p_hca; +} + +/***************************************************/ + +static void hca_destroy(IN ftree_hca_t * p_hca) +{ + uint32_t i; + + if (!p_hca) + return; + + for (i = 0; i < p_hca->up_port_groups_num; i++) + port_group_destroy(p_hca->up_port_groups[i]); + + free(p_hca->up_port_groups); + free(p_hca->disconnected_ports); + + free(p_hca); +} + +/***************************************************/ + +static uint64_t hca_get_guid_no(IN ftree_hca_t * p_hca) +{ + if (!p_hca) + return 0; + return osm_node_get_node_guid(p_hca->p_osm_node); +} + +/***************************************************/ + +static uint64_t hca_get_guid_ho(IN ftree_hca_t * p_hca) +{ + return cl_ntoh64(hca_get_guid_no(p_hca)); +} + +/***************************************************/ + +static void hca_dump(IN ftree_fabric_t * p_ftree, IN ftree_hca_t * p_hca) +{ + uint32_t i; + + if (!p_hca) + return; + + if (!OSM_LOG_IS_ACTIVE_V2(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) + return; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "CA GUID: 0x%016" PRIx64 ", Ports: %u UP\n", + hca_get_guid_ho(p_hca), p_hca->up_port_groups_num); + + for (i = 0; i < p_hca->up_port_groups_num; i++) + port_group_dump(p_ftree, p_hca->up_port_groups[i], + FTREE_DIRECTION_UP); +} + +static ftree_port_group_t *hca_get_port_group_by_lid(IN ftree_hca_t * + p_hca, + IN uint16_t + lid) +{ + uint32_t i; + for (i = 0; i < p_hca->up_port_groups_num; i++) + if (lid == + p_hca->up_port_groups[i]->lid) + return p_hca->up_port_groups[i]; + + return NULL; +} +/***************************************************/ + +static void hca_add_port(IN ftree_fabric_t * p_ftree, + IN ftree_hca_t * p_hca, IN uint8_t port_num, + IN uint8_t remote_port_num, IN uint16_t lid, + IN uint16_t remote_lid, IN ib_net64_t port_guid, + IN ib_net64_t remote_port_guid, + IN ib_net64_t remote_node_guid, + IN uint8_t remote_node_type, + IN void *p_remote_hca_or_sw, IN boolean_t is_cn, + IN boolean_t is_io) +{ + ftree_port_group_t *p_group; + + /* this function is supposed to be called only for adding ports + in hca's that lead to switches */ + CL_ASSERT(remote_node_type == IB_NODE_TYPE_SWITCH); + + p_group = hca_get_port_group_by_lid(p_hca, lid); + + if (!p_group) { + p_group = port_group_create(lid, remote_lid, + port_guid, hca_get_guid_no(p_hca), + IB_NODE_TYPE_CA, p_hca, + remote_port_guid, remote_node_guid, + remote_node_type, + p_remote_hca_or_sw, is_cn, is_io); + CL_ASSERT(p_group); + p_hca->up_port_groups[p_hca->up_port_groups_num++] = p_group; + port_group_add_port(p_group, port_num, remote_port_num); + } else + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB32: Duplicated LID for CA GUID: 0x%016" PRIx64 "\n", + cl_ntoh64(port_guid)); +} /* hca_add_port() */ + +/*************************************************** + ** + ** ftree_fabric_t functions + ** + ***************************************************/ + +static ftree_fabric_t *fabric_create() +{ + ftree_fabric_t *p_ftree = + (ftree_fabric_t *) malloc(sizeof(ftree_fabric_t)); + if (p_ftree == NULL) + return NULL; + + memset(p_ftree, 0, sizeof(ftree_fabric_t)); + + cl_qmap_init(&p_ftree->hca_tbl); + cl_qmap_init(&p_ftree->sw_tbl); + cl_qmap_init(&p_ftree->sw_by_tuple_tbl); + cl_qmap_init(&p_ftree->cn_guid_tbl); + cl_qmap_init(&p_ftree->io_guid_tbl); + + return p_ftree; +} + +/***************************************************/ + +static void fabric_clear(ftree_fabric_t * p_ftree) +{ + ftree_hca_t *p_hca; + ftree_hca_t *p_next_hca; + ftree_sw_t *p_sw; + ftree_sw_t *p_next_sw; + ftree_sw_tbl_element_t *p_element; + ftree_sw_tbl_element_t *p_next_element; + name_map_item_t *p_guid_element, *p_next_guid_element; + + if (!p_ftree) + return; + + /* remove all the elements of hca_tbl */ + + p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); + while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { + p_hca = p_next_hca; + p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); + hca_destroy(p_hca); + } + cl_qmap_remove_all(&p_ftree->hca_tbl); + + /* remove all the elements of sw_tbl */ + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + sw_destroy(p_sw); + } + cl_qmap_remove_all(&p_ftree->sw_tbl); + + /* remove all the elements of sw_by_tuple_tbl */ + + p_next_element = + (ftree_sw_tbl_element_t *) cl_qmap_head(&p_ftree->sw_by_tuple_tbl); + while (p_next_element != (ftree_sw_tbl_element_t *) + cl_qmap_end(&p_ftree->sw_by_tuple_tbl)) { + p_element = p_next_element; + p_next_element = (ftree_sw_tbl_element_t *) + cl_qmap_next(&p_element->map_item); + sw_tbl_element_destroy(p_element); + } + cl_qmap_remove_all(&p_ftree->sw_by_tuple_tbl); + + /* remove all the elements of cn_guid_tbl */ + p_next_guid_element = + (name_map_item_t *) cl_qmap_head(&p_ftree->cn_guid_tbl); + while (p_next_guid_element != + (name_map_item_t *) cl_qmap_end(&p_ftree->cn_guid_tbl)) { + p_guid_element = p_next_guid_element; + p_next_guid_element = + (name_map_item_t *) cl_qmap_next(&p_guid_element->item); + free(p_guid_element); + } + cl_qmap_remove_all(&p_ftree->cn_guid_tbl); + + /* remove all the elements of io_guid_tbl */ + p_next_guid_element = + (name_map_item_t *) cl_qmap_head(&p_ftree->io_guid_tbl); + while (p_next_guid_element != + (name_map_item_t *) cl_qmap_end(&p_ftree->io_guid_tbl)) { + p_guid_element = p_next_guid_element; + p_next_guid_element = + (name_map_item_t *) cl_qmap_next(&p_guid_element->item); + free(p_guid_element); + } + cl_qmap_remove_all(&p_ftree->io_guid_tbl); + + /* free the leaf switches array */ + if ((p_ftree->leaf_switches_num > 0) && (p_ftree->leaf_switches)) + free(p_ftree->leaf_switches); + + p_ftree->leaf_switches_num = 0; + p_ftree->cn_num = 0; + p_ftree->ca_ports = 0; + p_ftree->leaf_switch_rank = 0; + p_ftree->max_switch_rank = 0; + p_ftree->max_cn_per_leaf = 0; + p_ftree->lft_max_lid = 0; + p_ftree->leaf_switches = NULL; + p_ftree->fabric_built = FALSE; + +} /* fabric_destroy() */ + +/***************************************************/ + +static void fabric_destroy(ftree_fabric_t * p_ftree) +{ + if (!p_ftree) + return; + fabric_clear(p_ftree); + free(p_ftree); +} + +/***************************************************/ + +static uint8_t fabric_get_rank(ftree_fabric_t * p_ftree) +{ + return p_ftree->leaf_switch_rank + 1; +} + +/***************************************************/ + +static void fabric_add_hca(ftree_fabric_t * p_ftree, osm_node_t * p_osm_node) +{ + ftree_hca_t *p_hca; + + CL_ASSERT(osm_node_get_type(p_osm_node) == IB_NODE_TYPE_CA); + + p_hca = hca_create(p_osm_node); + if (!p_hca) + return; + + cl_qmap_insert(&p_ftree->hca_tbl, p_osm_node->node_info.node_guid, + &p_hca->map_item); +} + +/***************************************************/ + +static void fabric_add_sw(ftree_fabric_t * p_ftree, osm_switch_t * p_osm_sw) +{ + ftree_sw_t *p_sw; + + CL_ASSERT(osm_node_get_type(p_osm_sw->p_node) == IB_NODE_TYPE_SWITCH); + + p_sw = sw_create(p_osm_sw); + if (!p_sw) + return; + + cl_qmap_insert(&p_ftree->sw_tbl, p_osm_sw->p_node->node_info.node_guid, + &p_sw->map_item); + + /* track the max lid (in host order) that exists in the fabric */ + if (p_sw->lid > p_ftree->lft_max_lid) + p_ftree->lft_max_lid = p_sw->lid; +} + +/***************************************************/ + +static void fabric_add_sw_by_tuple(IN ftree_fabric_t * p_ftree, + IN ftree_sw_t * p_sw) +{ + CL_ASSERT(tuple_assigned(p_sw->tuple)); + + cl_qmap_insert(&p_ftree->sw_by_tuple_tbl, tuple_to_key(p_sw->tuple), + &sw_tbl_element_create(p_sw)->map_item); +} + +/***************************************************/ + +static ftree_sw_t *fabric_get_sw_by_tuple(IN ftree_fabric_t * p_ftree, + IN ftree_tuple_t tuple) +{ + ftree_sw_tbl_element_t *p_element; + + CL_ASSERT(tuple_assigned(tuple)); + + tuple_to_key(tuple); + + p_element = + (ftree_sw_tbl_element_t *) cl_qmap_get(&p_ftree->sw_by_tuple_tbl, + tuple_to_key(tuple)); + if (p_element == + (ftree_sw_tbl_element_t *) cl_qmap_end(&p_ftree->sw_by_tuple_tbl)) + return NULL; + + return p_element->p_sw; +} + +/***************************************************/ + +static ftree_sw_t *fabric_get_sw_by_guid(IN ftree_fabric_t * p_ftree, + IN uint64_t guid) +{ + ftree_sw_t *p_sw; + p_sw = (ftree_sw_t *) cl_qmap_get(&p_ftree->sw_tbl, guid); + if (p_sw == (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) + return NULL; + return p_sw; +} + +/***************************************************/ + +static ftree_hca_t *fabric_get_hca_by_guid(IN ftree_fabric_t * p_ftree, + IN uint64_t guid) +{ + ftree_hca_t *p_hca; + p_hca = (ftree_hca_t *) cl_qmap_get(&p_ftree->hca_tbl, guid); + if (p_hca == (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) + return NULL; + return p_hca; +} + +/***************************************************/ + +static void fabric_dump(ftree_fabric_t * p_ftree) +{ + uint32_t i; + ftree_hca_t *p_hca; + ftree_sw_t *p_sw; + + if (!OSM_LOG_IS_ACTIVE_V2(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) + return; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n" + " |-------------------------------|\n" + " |- Full fabric topology dump -|\n" + " |-------------------------------|\n\n"); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "-- CAs:\n"); + + for (p_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); + p_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl); + p_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item)) { + hca_dump(p_ftree, p_hca); + } + + for (i = 0; i <= p_ftree->max_switch_rank; i++) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "-- Rank %u switches\n", i); + for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); + p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { + if (p_sw->rank == i) + sw_dump(p_ftree, p_sw); + } + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n" + " |---------------------------------------|\n" + " |- Full fabric topology dump completed -|\n" + " |---------------------------------------|\n\n"); +} /* fabric_dump() */ + +/***************************************************/ + +static void fabric_dump_general_info(IN ftree_fabric_t * p_ftree) +{ + uint32_t i, j; + ftree_sw_t *p_sw; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + "General fabric topology info\n"); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + "============================\n"); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + " - FatTree rank (roots to leaf switches): %u\n", + p_ftree->leaf_switch_rank + 1); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + " - FatTree max switch rank: %u\n", p_ftree->max_switch_rank); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + " - Fabric has %u CAs, %u CA ports (%u of them CNs), %u switches\n", + cl_qmap_count(&p_ftree->hca_tbl), p_ftree->ca_ports, + p_ftree->cn_num, cl_qmap_count(&p_ftree->sw_tbl)); + + CL_ASSERT(p_ftree->ca_ports >= p_ftree->cn_num); + + for (i = 0; i <= p_ftree->max_switch_rank; i++) { + j = 0; + for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); + p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { + if (p_sw->rank == i) + j++; + } + if (i == 0) + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + " - Fabric has %u switches at rank %u (roots)\n", + j, i); + else if (i == p_ftree->leaf_switch_rank) + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + " - Fabric has %u switches at rank %u (%u of them leafs)\n", + j, i, p_ftree->leaf_switches_num); + else + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, + " - Fabric has %u switches at rank %u\n", j, + i); + } + + if (OSM_LOG_IS_ACTIVE_V2(&p_ftree->p_osm->log, OSM_LOG_VERBOSE)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + " - Root switches:\n"); + for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); + p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { + if (p_sw->rank == 0) + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + " GUID: 0x%016" PRIx64 + ", LID: %u, Index %s\n", + sw_get_guid_ho(p_sw), + p_sw->lid, + tuple_to_str(p_sw->tuple)); + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + " - Leaf switches (sorted by index):\n"); + for (i = 0; i < p_ftree->leaf_switches_num; i++) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + " GUID: 0x%016" PRIx64 + ", LID: %u, Index %s\n", + sw_get_guid_ho(p_ftree->leaf_switches[i]), + p_ftree->leaf_switches[i]->lid, + tuple_to_str(p_ftree->leaf_switches[i]->tuple)); + } + } +} /* fabric_dump_general_info() */ + +/***************************************************/ + +static void fabric_dump_hca_ordering(IN ftree_fabric_t * p_ftree) +{ + ftree_hca_t *p_hca; + ftree_sw_t *p_sw; + ftree_port_group_t *p_group_on_sw; + ftree_port_group_t *p_group_on_hca; + int rename_status = 0; + uint32_t i; + uint32_t j; + unsigned printed_hcas_on_leaf; + + char path[1024], path_tmp[1032]; + FILE *p_hca_ordering_file; + const char *filename = "opensm-ftree-ca-order.dump"; + + snprintf(path, sizeof(path), "%s/%s", + p_ftree->p_osm->subn.opt.dump_files_dir, filename); + + snprintf(path_tmp, sizeof(path_tmp), "%s.tmp", path); + + p_hca_ordering_file = fopen(path_tmp, "w"); + if (!p_hca_ordering_file) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB01: " + "cannot open file \'%s\': %s\n", path_tmp, + strerror(errno)); + return; + } + + /* for each leaf switch (in indexing order) */ + for (i = 0; i < p_ftree->leaf_switches_num; i++) { + p_sw = p_ftree->leaf_switches[i]; + printed_hcas_on_leaf = 0; + + /* for each real CA (CNs and not) connected to this switch */ + for (j = 0; j < p_sw->down_port_groups_num; j++) { + p_group_on_sw = p_sw->down_port_groups[j]; + + if (p_group_on_sw->remote_node_type != IB_NODE_TYPE_CA) + continue; + + p_hca = p_group_on_sw->remote_hca_or_sw.p_hca; + p_group_on_hca = + hca_get_port_group_by_lid(p_hca, + p_group_on_sw-> + remote_lid); + + /* treat non-compute nodes as dummies */ + if (!p_group_on_hca->is_cn) + continue; + + fprintf(p_hca_ordering_file, "0x%04x\t%s\n", + p_group_on_hca->lid, + p_hca->p_osm_node->print_desc); + + printed_hcas_on_leaf++; + } + + /* now print missing HCAs */ + for (j = 0; + j < (p_ftree->max_cn_per_leaf - printed_hcas_on_leaf); j++) + fprintf(p_hca_ordering_file, "0xFFFF\tDUMMY\n"); + + } + /* done going through all the leaf switches */ + + fclose(p_hca_ordering_file); + + rename_status = rename(path_tmp, path); + if (rename_status) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB03: " + "cannot rename file \'%s\': %s\n", path_tmp, + strerror(errno)); + } +} /* fabric_dump_hca_ordering() */ + +/***************************************************/ + +static void fabric_assign_tuple(IN ftree_fabric_t * p_ftree, + IN ftree_sw_t * p_sw, + IN ftree_tuple_t new_tuple) +{ + memcpy(p_sw->tuple, new_tuple, FTREE_TUPLE_LEN); + fabric_add_sw_by_tuple(p_ftree, p_sw); +} + +/***************************************************/ + +static void fabric_assign_first_tuple(IN ftree_fabric_t * p_ftree, + IN ftree_sw_t * p_sw, + IN unsigned int subtree) +{ + uint8_t i; + ftree_tuple_t new_tuple; + + if (p_ftree->leaf_switch_rank >= FTREE_TUPLE_LEN) + return; + + tuple_init(new_tuple); + new_tuple[0] = (uint8_t) p_sw->rank; + + for (i = 1; i <= p_ftree->leaf_switch_rank; i++) + new_tuple[i] = 0; + + if (p_sw->rank == 0) { + if (p_ftree->leaf_switch_rank > 1) + new_tuple[p_ftree->leaf_switch_rank] = subtree; + + for (i = 0; i < 0xFF; i++) { + new_tuple[1] = i; + if (fabric_get_sw_by_tuple(p_ftree, new_tuple) == NULL) + break; + } + if (i == 0xFF) { + /* new tuple not found - there are more than 255 ports in one direction */ + return; + } + } + fabric_assign_tuple(p_ftree, p_sw, new_tuple); +} + +/***************************************************/ + +static void fabric_get_new_tuple(IN ftree_fabric_t * p_ftree, + OUT ftree_tuple_t new_tuple, + IN ftree_tuple_t from_tuple, + IN ftree_direction_t direction) +{ + ftree_sw_t *p_sw; + ftree_tuple_t temp_tuple; + uint8_t var_index; + uint8_t i; + + tuple_init(new_tuple); + memcpy(temp_tuple, from_tuple, FTREE_TUPLE_LEN); + + if (direction == FTREE_DIRECTION_DOWN) { + temp_tuple[0]++; + var_index = from_tuple[0] + 1; + } else { + temp_tuple[0]--; + var_index = from_tuple[0]; + } + + for (i = 0; i < 0xFF; i++) { + temp_tuple[var_index] = i; + p_sw = fabric_get_sw_by_tuple(p_ftree, temp_tuple); + if (p_sw == NULL) /* found free tuple */ + break; + } + + if (i == 0xFF) { + /* new tuple not found - there are more than 255 ports in one direction */ + return; + } + memcpy(new_tuple, temp_tuple, FTREE_TUPLE_LEN); + +} /* fabric_get_new_tuple() */ + +/***************************************************/ + +static inline boolean_t fabric_roots_provided(IN ftree_fabric_t * p_ftree) +{ + return (p_ftree->p_osm->subn.opt.root_guid_file != NULL); +} + +/***************************************************/ + +static inline boolean_t fabric_cns_provided(IN ftree_fabric_t * p_ftree) +{ + return (p_ftree->p_osm->subn.opt.cn_guid_file != NULL); +} + +/***************************************************/ + +static inline boolean_t fabric_ios_provided(IN ftree_fabric_t * p_ftree) +{ + return (p_ftree->p_osm->subn.opt.io_guid_file != NULL); +} + +/***************************************************/ + +static int fabric_mark_leaf_switches(IN ftree_fabric_t * p_ftree) +{ + ftree_sw_t *p_sw; + ftree_hca_t *p_hca; + ftree_hca_t *p_next_hca; + unsigned i; + int res = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Marking leaf switches in fabric\n"); + + /* Scan all the CAs, if they have CNs - find CN port and mark switch + that is connected to this port as leaf switch. + Also, ensure that this marked leaf has rank of p_ftree->leaf_switch_rank. */ + p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); + while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { + p_hca = p_next_hca; + p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); + if (!p_hca->cn_num) + continue; + + for (i = 0; i < p_hca->up_port_groups_num; i++) { + if (!p_hca->up_port_groups[i]->is_cn) + continue; + + /* In CAs, port group alway has one port, and since this + port group is CN, we know that this port is compute node */ + CL_ASSERT(p_hca->up_port_groups[i]->remote_node_type == + IB_NODE_TYPE_SWITCH); + p_sw = p_hca->up_port_groups[i]->remote_hca_or_sw.p_sw; + + /* check if this switch was already processed */ + if (p_sw->is_leaf) + continue; + p_sw->is_leaf = TRUE; + + /* ensure that this leaf switch is at the correct tree level */ + if (p_sw->rank != p_ftree->leaf_switch_rank) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB26: CN port 0x%" PRIx64 + " is connected to switch 0x%" PRIx64 + " with rank %u, " + "while FatTree leaf rank is %u\n", + cl_ntoh64(p_hca-> + up_port_groups[i]->port_guid), + sw_get_guid_ho(p_sw), p_sw->rank, + p_ftree->leaf_switch_rank); + res = -1; + goto Exit; + + } + } + } + +Exit: + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* fabric_mark_leaf_switches() */ + +/***************************************************/ +static void bfs_fabric_indexing(IN ftree_fabric_t * p_ftree, + IN ftree_sw_t *p_first_sw) +{ + ftree_sw_t *p_remote_sw; + ftree_sw_t *p_sw = NULL; + ftree_tuple_t new_tuple; + uint32_t i; + cl_list_t bfs_list; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + cl_list_init(&bfs_list, cl_qmap_count(&p_ftree->sw_tbl)); + /* + * Now run BFS and assign indexes to all switches + * Pseudo code of the algorithm is as follows: + * + * * Add first switch to BFS queue + * * While (BFS queue not empty) + * - Pop the switch from the head of the queue + * - Scan all the downward and upward ports + * - For each port + * + Get the remote switch + * + Assign index to the remote switch + * + Add remote switch to the BFS queue + */ + + cl_list_insert_tail(&bfs_list, p_first_sw); + + while (!cl_is_list_empty(&bfs_list)) { + p_sw = (ftree_sw_t *) cl_list_remove_head(&bfs_list); + + /* Discover all the nodes from ports that are pointing down */ + + if (p_sw->rank >= p_ftree->leaf_switch_rank) { + /* whether downward ports are pointing to CAs or switches, + we don't assign indexes to switches that are located + lower than leaf switches */ + } else { + /* This is not the leaf switch */ + for (i = 0; i < p_sw->down_port_groups_num; i++) { + /* Work with port groups that are pointing to switches only. + No need to assign indexing to HCAs */ + if (p_sw-> + down_port_groups[i]->remote_node_type != + IB_NODE_TYPE_SWITCH) + continue; + + p_remote_sw = + p_sw->down_port_groups[i]-> + remote_hca_or_sw.p_sw; + if (tuple_assigned(p_remote_sw->tuple)) { + /* this switch has been already indexed */ + continue; + } + /* allocate new tuple */ + fabric_get_new_tuple(p_ftree, new_tuple, + p_sw->tuple, + FTREE_DIRECTION_DOWN); + /* Assign the new tuple to the remote switch. + This fuction also adds the switch into the switch_by_tuple table. */ + fabric_assign_tuple(p_ftree, p_remote_sw, + new_tuple); + + /* add the newly discovered switch to the BFS queue */ + cl_list_insert_tail(&bfs_list, p_remote_sw); + } + /* Done assigning indexes to all the remote switches + that are pointed by the downgoing ports. + Now sort port groups according to remote index. */ + qsort(p_sw->down_port_groups, /* array */ + p_sw->down_port_groups_num, /* number of elements */ + sizeof(ftree_port_group_t *), /* size of each element */ + compare_port_groups_by_remote_switch_index); /* comparator */ + } + + /* Done indexing switches from ports that go down. + Now do the same with ports that are pointing up. + if we started from root (rank == 0), the leaf is bsf termination point */ + + if (p_sw->rank != 0 && (p_first_sw->rank != 0 || !p_sw->is_leaf)) { + /* This is not the root switch, which means that all the ports + that are pointing up are taking us to another switches. */ + for (i = 0; i < p_sw->up_port_groups_num; i++) { + p_remote_sw = + p_sw->up_port_groups[i]-> + remote_hca_or_sw.p_sw; + if (tuple_assigned(p_remote_sw->tuple)) + continue; + /* allocate new tuple */ + fabric_get_new_tuple(p_ftree, new_tuple, + p_sw->tuple, + FTREE_DIRECTION_UP); + /* Assign the new tuple to the remote switch. + This fuction also adds the switch to the + switch_by_tuple table. */ + fabric_assign_tuple(p_ftree, + p_remote_sw, new_tuple); + /* add the newly discovered switch to the BFS queue */ + cl_list_insert_tail(&bfs_list, p_remote_sw); + } + /* Done assigning indexes to all the remote switches + that are pointed by the upgoing ports. + Now sort port groups according to remote index. */ + qsort(p_sw->up_port_groups, /* array */ + p_sw->up_port_groups_num, /* number of elements */ + sizeof(ftree_port_group_t *), /* size of each element */ + compare_port_groups_by_remote_switch_index); /* comparator */ + } + /* Done assigning indexes to all the switches that are directly connected + to the current switch - go to the next switch in the BFS queue */ + } + cl_list_destroy(&bfs_list); + + OSM_LOG_EXIT(&p_ftree->p_osm->log); +} + +static void fabric_make_indexing(IN ftree_fabric_t * p_ftree) +{ + ftree_sw_t *p_sw = NULL; + unsigned int subtree = 0; + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Starting FatTree indexing\n"); + + /* using the first switch as a starting point for indexing algorithm. */ + for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); + p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { + if (ftree_get_subnet(p_ftree)->opt.quasi_ftree_indexing) { + /* find first root switch */ + if (p_sw->rank != 0) + continue; + } else { + /* find first leaf switch */ + if (!p_sw->is_leaf) + continue; + } + /* Assign the first tuple to the switch that is used as BFS starting point + in the subtree. + The tuple will be as follows: [rank].0...0.subtree + This fuction also adds the switch it into the switch_by_tuple table. */ + if (!tuple_assigned(p_sw->tuple)) { + fabric_assign_first_tuple(p_ftree, p_sw, subtree++); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Indexing starting point:\n" + " - Switch rank : %u\n" + " - Switch index : %s\n" + " - Node LID : %u\n" + " - Node GUID : 0x%016" + PRIx64 "\n", p_sw->rank, tuple_to_str(p_sw->tuple), + p_sw->lid, sw_get_guid_ho(p_sw)); + } + + bfs_fabric_indexing(p_ftree, p_sw); + + if (ftree_get_subnet(p_ftree)->opt.quasi_ftree_indexing == FALSE) + goto Exit; + } + p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + if (p_sw->is_leaf) { + qsort(p_sw->up_port_groups, /* array */ + p_sw->up_port_groups_num, /* number of elements */ + sizeof(ftree_port_group_t *), /* size of each element */ + compare_port_groups_by_remote_switch_index); /* comparator */ + } + p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + + } +Exit: + OSM_LOG_EXIT(&p_ftree->p_osm->log); +} /* fabric_make_indexing() */ +/***************************************************/ + +static int fabric_create_leaf_switch_array(IN ftree_fabric_t * p_ftree) +{ + ftree_sw_t *p_sw; + ftree_sw_t *p_next_sw; + ftree_sw_t **all_switches_at_leaf_level; + unsigned i; + unsigned all_leaf_idx = 0; + unsigned first_leaf_idx; + unsigned last_leaf_idx; + int res = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + /* create array of ALL the switches that have leaf rank */ + all_switches_at_leaf_level = (ftree_sw_t **) + malloc(cl_qmap_count(&p_ftree->sw_tbl) * sizeof(ftree_sw_t *)); + if (!all_switches_at_leaf_level) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_SYS, FILE_ID, + "Fat-tree routing: Memory allocation failed\n"); + res = -1; + goto Exit; + } + memset(all_switches_at_leaf_level, 0, + cl_qmap_count(&p_ftree->sw_tbl) * sizeof(ftree_sw_t *)); + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + if (p_sw->rank == p_ftree->leaf_switch_rank) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Adding switch 0x%" PRIx64 + " to full leaf switch array\n", + sw_get_guid_ho(p_sw)); + all_switches_at_leaf_level[all_leaf_idx++] = p_sw; + } + } + + /* quick-sort array of leaf switches by index */ + qsort(all_switches_at_leaf_level, /* array */ + all_leaf_idx, /* number of elements */ + sizeof(ftree_sw_t *), /* size of each element */ + compare_switches_by_index); /* comparator */ + + /* check the first and the last REAL leaf (the one + that has CNs) in the array of all the leafs */ + + first_leaf_idx = all_leaf_idx; + last_leaf_idx = 0; + for (i = 0; i < all_leaf_idx; i++) { + if (all_switches_at_leaf_level[i]->is_leaf) { + if (i < first_leaf_idx) + first_leaf_idx = i; + last_leaf_idx = i; + } + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Full leaf array info: first_leaf_idx = %u, last_leaf_idx = %u\n", + first_leaf_idx, last_leaf_idx); + + if (first_leaf_idx >= last_leaf_idx) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Failed to find leaf switches - topology is not " + "fat-tree\n"); + res = -1; + goto Exit; + } + + /* Create array of REAL leaf switches, sorted by index. + This array may contain switches at the same rank w/o CNs, + in case this is the order of indexing. */ + p_ftree->leaf_switches_num = last_leaf_idx - first_leaf_idx + 1; + p_ftree->leaf_switches = (ftree_sw_t **) + malloc(p_ftree->leaf_switches_num * sizeof(ftree_sw_t *)); + if (!p_ftree->leaf_switches) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_SYS, FILE_ID, + "Fat-tree routing: Memory allocation failed\n"); + res = -1; + goto Exit; + } + + memcpy(p_ftree->leaf_switches, + &(all_switches_at_leaf_level[first_leaf_idx]), + p_ftree->leaf_switches_num * sizeof(ftree_sw_t *)); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Created array of %u leaf switches\n", + p_ftree->leaf_switches_num); + +Exit: + free(all_switches_at_leaf_level); + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* fabric_create_leaf_switch_array() */ + +/***************************************************/ + +static void fabric_set_max_cn_per_leaf(IN ftree_fabric_t * p_ftree) +{ + unsigned i; + unsigned j; + unsigned cns_on_this_leaf; + ftree_sw_t *p_sw; + ftree_port_group_t *p_group, *p_up_group; + ftree_hca_t *p_hca; + + for (i = 0; i < p_ftree->leaf_switches_num; i++) { + p_sw = p_ftree->leaf_switches[i]; + cns_on_this_leaf = 0; + for (j = 0; j < p_sw->down_port_groups_num; j++) { + p_group = p_sw->down_port_groups[j]; + if (p_group->remote_node_type != IB_NODE_TYPE_CA) + continue; + p_hca = p_group->remote_hca_or_sw.p_hca; + /* + * Get the hca port group corresponding + * to the LID of remote HCA port + */ + p_up_group = hca_get_port_group_by_lid(p_hca, + p_group->remote_lid); + + CL_ASSERT(p_up_group); + + if (p_up_group->is_cn) + cns_on_this_leaf++; + } + if (cns_on_this_leaf > p_ftree->max_cn_per_leaf) + p_ftree->max_cn_per_leaf = cns_on_this_leaf; + } +} /* fabric_set_max_cn_per_leaf() */ + +/***************************************************/ + +static boolean_t fabric_validate_topology(IN ftree_fabric_t * p_ftree) +{ + ftree_port_group_t *p_group; + ftree_port_group_t *p_ref_group; + ftree_sw_t *p_sw; + ftree_sw_t *p_next_sw; + ftree_sw_t **reference_sw_arr; + uint16_t tree_rank = fabric_get_rank(p_ftree); + boolean_t res = TRUE; + uint8_t i; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Validating fabric topology\n"); + + reference_sw_arr = + (ftree_sw_t **) malloc(tree_rank * sizeof(ftree_sw_t *)); + if (reference_sw_arr == NULL) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_SYS, FILE_ID, + "Fat-tree routing: Memory allocation failed\n"); + return FALSE; + } + memset(reference_sw_arr, 0, tree_rank * sizeof(ftree_sw_t *)); + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (res && p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + + if (!reference_sw_arr[p_sw->rank]) + /* This is the first switch in the current level that + we're checking - use it as a reference */ + reference_sw_arr[p_sw->rank] = p_sw; + else { + /* compare this switch properties to the reference switch */ + + if (reference_sw_arr[p_sw->rank]->up_port_groups_num != + p_sw->up_port_groups_num) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB09: Different number of upward port groups on switches:\n" + " GUID 0x%016" PRIx64 + ", LID %u, Index %s - %u groups\n" + " GUID 0x%016" PRIx64 + ", LID %u, Index %s - %u groups\n", + sw_get_guid_ho + (reference_sw_arr[p_sw->rank]), + reference_sw_arr[p_sw->rank]->lid, + tuple_to_str + (reference_sw_arr[p_sw->rank]->tuple), + reference_sw_arr[p_sw-> + rank]-> + up_port_groups_num, + sw_get_guid_ho(p_sw), p_sw->lid, + tuple_to_str(p_sw->tuple), + p_sw->up_port_groups_num); + res = FALSE; + break; + } + + if (p_sw->rank != (tree_rank - 1) && + reference_sw_arr[p_sw-> + rank]->down_port_groups_num != + p_sw->down_port_groups_num) { + /* we're allowing some hca's to be missing */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB0A: Different number of downward port groups on switches:\n" + " GUID 0x%016" PRIx64 + ", LID %u, Index %s - %u port groups\n" + " GUID 0x%016" PRIx64 + ", LID %u, Index %s - %u port groups\n", + sw_get_guid_ho + (reference_sw_arr[p_sw->rank]), + reference_sw_arr[p_sw->rank]->lid, + tuple_to_str + (reference_sw_arr[p_sw->rank]->tuple), + reference_sw_arr[p_sw-> + rank]-> + down_port_groups_num, + sw_get_guid_ho(p_sw), p_sw->lid, + tuple_to_str(p_sw->tuple), + p_sw->down_port_groups_num); + res = FALSE; + break; + } + + if (reference_sw_arr[p_sw->rank]->up_port_groups_num != + 0) { + p_ref_group = + reference_sw_arr[p_sw-> + rank]->up_port_groups[0]; + for (i = 0; i < p_sw->up_port_groups_num; i++) { + p_group = p_sw->up_port_groups[i]; + if (cl_ptr_vector_get_size + (&p_ref_group->ports) != + cl_ptr_vector_get_size + (&p_group->ports)) { + OSM_LOG(&p_ftree->p_osm->log, + OSM_LOG_ERROR, + "ERR AB0B: Different number of ports in an upward port group on switches:\n" + " GUID 0x%016" + PRIx64 + ", LID %u, Index %s - %u ports\n" + " GUID 0x%016" + PRIx64 + ", LID %u, Index %s - %u ports\n", + sw_get_guid_ho + (reference_sw_arr + [p_sw->rank]), + reference_sw_arr[p_sw-> + rank]-> + lid, + tuple_to_str + (reference_sw_arr + [p_sw->rank]->tuple), + cl_ptr_vector_get_size + (&p_ref_group->ports), + sw_get_guid_ho(p_sw), + p_sw->lid, + tuple_to_str(p_sw-> + tuple), + cl_ptr_vector_get_size + (&p_group->ports)); + res = FALSE; + break; + } + } + } + if (reference_sw_arr[p_sw->rank]->down_port_groups_num + != 0 && p_sw->rank != (tree_rank - 1)) { + /* we're allowing some hca's to be missing */ + p_ref_group = + reference_sw_arr[p_sw-> + rank]->down_port_groups[0]; + for (i = 0; i < p_sw->down_port_groups_num; i++) { + p_group = p_sw->down_port_groups[0]; + if (cl_ptr_vector_get_size + (&p_ref_group->ports) != + cl_ptr_vector_get_size + (&p_group->ports)) { + OSM_LOG(&p_ftree->p_osm->log, + OSM_LOG_ERROR, + "ERR AB0C: Different number of ports in an downward port group on switches:\n" + " GUID 0x%016" + PRIx64 + ", LID %u, Index %s - %u ports\n" + " GUID 0x%016" + PRIx64 + ", LID %u, Index %s - %u ports\n", + sw_get_guid_ho + (reference_sw_arr + [p_sw->rank]), + reference_sw_arr[p_sw-> + rank]-> + lid, + tuple_to_str + (reference_sw_arr + [p_sw->rank]->tuple), + cl_ptr_vector_get_size + (&p_ref_group->ports), + sw_get_guid_ho(p_sw), + p_sw->lid, + tuple_to_str(p_sw-> + tuple), + cl_ptr_vector_get_size + (&p_group->ports)); + res = FALSE; + break; + } + } + } + } /* end of else */ + } /* end of while */ + + if (res == TRUE) + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Fabric topology has been identified as FatTree\n"); + else + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB0D: Fabric topology hasn't been identified as FatTree\n"); + + free(reference_sw_arr); + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* fabric_validate_topology() */ + +/*************************************************** + ***************************************************/ + +static void set_sw_fwd_table(IN cl_map_item_t * const p_map_item, + IN void *context) +{ + ftree_sw_t *p_sw = (ftree_sw_t * const)p_map_item; + ftree_fabric_t *p_ftree = (ftree_fabric_t *) context; + + p_sw->p_osm_sw->max_lid_ho = p_ftree->lft_max_lid; +} + +/*************************************************** + ***************************************************/ + +/* + * Function: Finds the least loaded port group and stores its counter + * Given : A switch + */ +static inline void recalculate_min_counter_down(ftree_sw_t * p_sw) +{ + uint32_t min = (1 << 30); + uint32_t i; + for (i = 0; i < p_sw->down_port_groups_num; i++) { + if (p_sw->down_port_groups[i]->counter_down < min) { + min = p_sw->down_port_groups[i]->counter_down; + } + } + p_sw->min_counter_down = min; + return; +} + +/* + * Function: Return the counter value of the least loaded down port group + * Given : A switch + */ +static inline uint32_t find_lowest_loaded_group_on_sw(ftree_sw_t * p_sw) +{ + return p_sw->min_counter_down; +} + +/* + * Function: Compare the load of two port groups and return which is the least loaded + * Given : Two port groups with remote switch + * When both port groups are equally loaded, it picks the one whom + * remote switch down ports are least loaded. + * This way, it prefers the switch from where it will be easier to go down (creating upward routes). + * If both are equal, it picks the lowest INDEX to be deterministic. + */ +static inline int port_group_compare_load_down(const ftree_port_group_t * p1, + const ftree_port_group_t * p2) +{ + int temp = p1->counter_down - p2->counter_down; + if (temp > 0) + return 1; + if (temp < 0) + return -1; + + /* Find the less loaded remote sw and choose this one */ + do { + uint32_t load1 = + find_lowest_loaded_group_on_sw(p1->remote_hca_or_sw.p_sw); + uint32_t load2 = + find_lowest_loaded_group_on_sw(p2->remote_hca_or_sw.p_sw); + temp = load1 - load2; + if (temp > 0) + return 1; + } while (0); + /* If they are both equal, choose the lowest index */ + return compare_port_groups_by_remote_switch_index(&p1, &p2); +} + +static inline int port_group_compare_load_up(const ftree_port_group_t * p1, + const ftree_port_group_t * p2) +{ + int temp = p1->counter_up - p2->counter_up; + if (temp > 0) + return 1; + if (temp < 0) + return -1; + + /* If they are both equal, choose the lowest index */ + return compare_port_groups_by_remote_switch_index (&p1,&p2); +} + +/* + * Function: Sorts an array of port group by up load order + * Given : A port group array and its length + * As the list is mostly sorted, we used a bubble sort instead of qsort + * as it is much faster. + * + * Important note: + * This function and bubble_sort_down must NOT be factorized. + * Although most of the code is the same and a function pointer could be used + * for the compareason function, it would prevent the compareason function to be inlined + * and cost a great deal to performances. + */ +static inline void +bubble_sort_up(ftree_port_group_t ** p_group_array, uint32_t nmemb) +{ + uint32_t i = 0; + uint32_t j = 0; + ftree_port_group_t *tmp = p_group_array[0]; + + /* As this function is a great number of times, we only go into the loop + * if one of the port counters has changed, thus saving some tests */ + if (tmp->hca_or_sw.p_sw->counter_up_changed == FALSE) { + return; + } + /* While we did modifications on the array order */ + /* i may grew above array length but next loop will fail and tmp will be null for the next time + * this way we save a test i < nmemb for each pass through the loop */ + for (i = 0; tmp; i++) { + /* Assume the array is orderd */ + tmp = NULL; + /* Comparing elements j and j-1 */ + for (j = 1; j < (nmemb - i); j++) { + /* If they are the wrong way around */ + if (port_group_compare_load_up(p_group_array[j], + p_group_array[j - 1]) < 0) { + /* We invert them */ + tmp = p_group_array[j - 1]; + p_group_array[j - 1] = p_group_array[j]; + p_group_array[j] = tmp; + /* This sets tmp != NULL so the main loop will make another pass */ + } + } + } + + /* We have reordered the array so as long noone changes the counter + * it's not necessary to do it again */ + p_group_array[0]->hca_or_sw.p_sw->counter_up_changed = FALSE; +} + +static inline void +bubble_sort_siblings(ftree_port_group_t ** p_group_array, uint32_t nmemb) +{ + uint32_t i = 0; + uint32_t j = 0; + ftree_port_group_t *tmp = p_group_array[0]; + + /* While we did modifications on the array order */ + /* i may grew above array length but next loop will fail and tmp will be null for the next time + * this way we save a test i < nmemb for each pass through the loop */ + for (i = 0; tmp != NULL; i++) { + /* Assume the array is orderd */ + tmp = NULL; + /* Comparing elements j and j-1 */ + for (j = 1; j < (nmemb - i); j++) { + /* If they are the wrong way around */ + if (port_group_compare_load_up(p_group_array[j], + p_group_array[j - 1]) < 0) { + /* We invert them */ + tmp = p_group_array[j - 1]; + p_group_array[j - 1] = p_group_array[j]; + p_group_array[j] = tmp; + } + } + } +} + +/* + * Function: Sorts an array of port group. Order is decide through + * port_group_compare_load_down ( up counters, least load remote switch, biggest GUID) + * Given : A port group array and its length. Each port group points to a remote switch (not a HCA) + * As the list is mostly sorted, we used a bubble sort instead of qsort + * as it is much faster. + * + * Important note: + * This function and bubble_sort_up must NOT be factorized. + * Although most of the code is the same and a function pointer could be used + * for the compareason function, it would prevent the compareason function to be inlined + * and cost a great deal to performances. + */ +static inline void +bubble_sort_down(ftree_port_group_t ** p_group_array, uint32_t nmemb) +{ + uint32_t i = 0; + uint32_t j = 0; + ftree_port_group_t *tmp = p_group_array[0]; + + /* While we did modifications on the array order */ + /* i may grew above array length but next loop will fail and tmp will be null for the next time + * this way we save a test i < nmemb for each pass through the loop */ + for (i = 0; tmp; i++) { + /* Assume the array is orderd */ + tmp = NULL; + /* Comparing elements j and j-1 */ + for (j = 1; j < (nmemb - i); j++) { + /* If they are the wrong way around */ + if (port_group_compare_load_down + (p_group_array[j], p_group_array[j - 1]) < 0) { + /* We invert them */ + tmp = p_group_array[j - 1]; + p_group_array[j - 1] = p_group_array[j]; + p_group_array[j] = tmp; + + } + } + } +} + +/*************************************************** + ***************************************************/ + +/* + * Function: assign-up-going-port-by-descending-down + * Given : a switch and a LID + * Pseudo code: + * foreach down-going-port-group (in indexing order) + * skip this group if the LFT(LID) port is part of this group + * find the least loaded port of the group (scan in indexing order) + * r-port is the remote port connected to it + * assign the remote switch node LFT(LID) to r-port + * increase r-port usage counter + * assign-up-going-port-by-descending-down to r-port node (recursion) + */ + +static boolean_t +fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, + IN ftree_sw_t * p_sw, + IN ftree_sw_t * p_prev_sw, + IN uint16_t target_lid, + IN boolean_t is_main_path, + IN boolean_t is_target_a_sw, + IN uint8_t current_hops) +{ + ftree_sw_t *p_remote_sw; + uint16_t ports_num; + ftree_port_group_t *p_group; + ftree_port_t *p_port; + ftree_port_t *p_min_port; + uint16_t j; + uint16_t k; + boolean_t created_route = FALSE; + boolean_t routed = 0; + uint8_t least_hops; + + /* if there is no down-going ports */ + if (p_sw->down_port_groups_num == 0) + return FALSE; + + /* foreach down-going port group (in load order) */ + bubble_sort_up(p_sw->down_port_groups, p_sw->down_port_groups_num); + + if (p_sw->sibling_port_groups_num > 0) + bubble_sort_siblings(p_sw->sibling_port_groups, + p_sw->sibling_port_groups_num); + + for (k = 0; + k < + (p_sw->down_port_groups_num + + ((target_lid != 0) ? p_sw->sibling_port_groups_num : 0)); k++) { + + if (k < p_sw->down_port_groups_num) { + p_group = p_sw->down_port_groups[k]; + } else { + p_group = + p_sw->sibling_port_groups[k - + p_sw-> + down_port_groups_num]; + } + + /* If this port group doesn't point to a switch, mark + that the route was created and skip to the next group */ + if (p_group->remote_node_type != IB_NODE_TYPE_SWITCH) { + created_route = TRUE; + continue; + } + + if (p_prev_sw + && p_group->remote_lid == p_prev_sw->lid) { + /* This port group has a port that was used when we entered this switch, + which means that the current group points to the switch where we were + at the previous step of the algorithm (before going up). + Skipping this group. */ + continue; + } + + /* find the least loaded port of the group (in indexing order) */ + p_min_port = NULL; + ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports); + if(ports_num == 0) + continue; + + for (j = 0; j < ports_num; j++) { + cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port); + /* first port that we're checking - set as port with the lowest load */ + /* or this port is less loaded - use it as min */ + if (!p_min_port || + p_port->counter_up < p_min_port->counter_up) + p_min_port = p_port; + } + /* At this point we have selected a port in this group with the + lowest load of upgoing routes. + Set on the remote switch how to get to the target_lid - + set LFT(target_lid) on the remote switch to the remote port */ + p_remote_sw = p_group->remote_hca_or_sw.p_sw; + least_hops = sw_get_least_hops(p_remote_sw, target_lid); + + if (least_hops != OSM_NO_PATH) { + /* Loop in the fabric - we already routed the remote switch + on our way UP, and now we see it again on our way DOWN */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Loop of length %d in the fabric:\n " + "Switch %s (LID %u) closes loop through switch %s (LID %u)\n", + current_hops, + tuple_to_str(p_remote_sw->tuple), + p_group->lid, + tuple_to_str(p_sw->tuple), + p_group->remote_lid); + /* We skip only if we have come through a longer path */ + if (current_hops + 1 >= least_hops) + continue; + } + + /* Four possible cases: + * + * 1. is_main_path == TRUE: + * - going DOWN(TRUE,TRUE) through ALL the groups + * + promoting port counter + * + setting path in remote switch fwd tbl + * + setting hops in remote switch on all the ports of each group + * + * 2. is_main_path == FALSE: + * - going DOWN(TRUE,FALSE) through ALL the groups but only if + * the remote (lower) switch hasn't been already configured + * for this target LID (or with a longer path) + * + promoting port counter + * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet + */ + + /* setting fwd tbl port only */ + p_remote_sw->p_osm_sw->new_lft[target_lid] = + p_min_port->remote_port_num; + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Switch %s: set path to CA LID %u through port %u\n", + tuple_to_str(p_remote_sw->tuple), + target_lid, p_min_port->remote_port_num); + + /* On the remote switch that is pointed by the p_group, + set hops for ALL the ports in the remote group. */ + + set_hops_on_remote_sw(p_group, target_lid, + current_hops + 1, is_target_a_sw); + + /* Recursion step: + Assign upgoing ports by stepping down, starting on REMOTE switch */ + routed = fabric_route_upgoing_by_going_down(p_ftree, p_remote_sw, /* remote switch - used as a route-upgoing alg. start point */ + NULL, /* prev. position - NULL to mark that we went down and not up */ + target_lid, /* LID that we're routing to */ + is_main_path, /* whether this is path to HCA that should by tracked by counters */ + is_target_a_sw, /* Whether target lid is a switch or not */ + current_hops + 1); /* Number of hops done to this point */ + created_route |= routed; + /* Counters are promoted only if a route toward a node is created */ + if (routed) { + p_min_port->counter_up++; + p_group->counter_up++; + p_group->hca_or_sw.p_sw->counter_up_changed = TRUE; + } + } + /* done scanning all the down-going port groups */ + + /* if the route was created, promote the index that + indicates which group should we start with when + going through all the downgoing groups */ + if (created_route) + p_sw->down_port_groups_idx = (p_sw->down_port_groups_idx + 1) + % p_sw->down_port_groups_num; + + return created_route; +} /* fabric_route_upgoing_by_going_down() */ + +/***************************************************/ + +/* + * Function: assign-down-going-port-by-ascending-up + * Given : a switch and a LID + * Pseudo code: + * find the least loaded port of all the upgoing groups (scan in indexing order) + * assign the LFT(LID) of remote switch to that port + * track that port usage + * assign-up-going-port-by-descending-down on CURRENT switch + * assign-down-going-port-by-ascending-up on REMOTE switch (recursion) + */ + +static boolean_t +fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, + IN ftree_sw_t * p_sw, + IN ftree_sw_t * p_prev_sw, + IN uint16_t target_lid, + IN boolean_t is_main_path, + IN boolean_t is_target_a_sw, + IN uint16_t reverse_hop_credit, + IN uint16_t reverse_hops, + IN uint8_t current_hops) +{ + ftree_sw_t *p_remote_sw; + uint16_t ports_num; + ftree_port_group_t *p_group; + ftree_port_t *p_port; + ftree_port_group_t *p_min_group; + ftree_port_t *p_min_port; + uint16_t i; + uint16_t j; + boolean_t created_route = FALSE; + boolean_t routed = FALSE; + + + /* Assign upgoing ports by stepping down, starting on THIS switch */ + created_route = fabric_route_upgoing_by_going_down(p_ftree, p_sw, /* local switch - used as a route-upgoing alg. start point */ + p_prev_sw, /* switch that we went up from (NULL means that we went down) */ + target_lid, /* LID that we're routing to */ + is_main_path, /* whether this path to HCA should by tracked by counters */ + is_target_a_sw, /* Whether target lid is a switch or not */ + current_hops); /* Number of hops done up to this point */ + + /* recursion stop condition - if it's a root switch, */ + if (p_sw->rank == 0) { + if (reverse_hop_credit > 0) { + /* We go up by going down as we have some reverse_hop_credit left */ + /* We use the index to scatter a bit the reverse up routes */ + p_sw->down_port_groups_idx = + (p_sw->down_port_groups_idx + + 1) % p_sw->down_port_groups_num; + i = p_sw->down_port_groups_idx; + for (j = 0; j < p_sw->down_port_groups_num; j++) { + + p_group = p_sw->down_port_groups[i]; + i = (i + 1) % p_sw->down_port_groups_num; + + /* Skip this port group unless it points to a switch */ + if (p_group->remote_node_type != + IB_NODE_TYPE_SWITCH) + continue; + p_remote_sw = p_group->remote_hca_or_sw.p_sw; + + created_route |= fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ + p_sw, /* this switch - prev. position switch for the function */ + target_lid, /* LID that we're routing to */ + is_main_path, /* whether this is path to HCA that should by tracked by counters */ + is_target_a_sw, /* Whether target lid is a switch or not */ + reverse_hop_credit - 1, /* Remaining reverse_hops allowed */ + reverse_hops + 1, /* Number of reverse_hops done up to this point */ + current_hops + + + 1); + } + + } + return created_route; + } + + /* We should generate a list of port sorted by load so we can find easily the least + * going port and explore the other pots on secondary routes more easily (and quickly) */ + bubble_sort_down(p_sw->up_port_groups, p_sw->up_port_groups_num); + + p_min_group = p_sw->up_port_groups[0]; + /* Find the least loaded upgoing port in the selected group */ + p_min_port = NULL; + ports_num = (uint16_t) cl_ptr_vector_get_size(&p_min_group->ports); + for (j = 0; j < ports_num; j++) { + cl_ptr_vector_at(&p_min_group->ports, j, (void *)&p_port); + if (!p_min_port) { + /* first port that we're checking - use + it as a port with the lowest load */ + p_min_port = p_port; + } else if (p_port->counter_down < p_min_port->counter_down) { + /* this port is less loaded - use it as min */ + p_min_port = p_port; + } + } + + /* At this point we have selected a group and port with the + lowest load of downgoing routes. + Set on the remote switch how to get to the target_lid - + set LFT(target_lid) on the remote switch to the remote port */ + p_remote_sw = p_min_group->remote_hca_or_sw.p_sw; + + /* Four possible cases: + * + * 1. is_main_path == TRUE: + * - going UP(TRUE,TRUE) on selected min_group and min_port + * + promoting port counter + * + setting path in remote switch fwd tbl + * + setting hops in remote switch on all the ports of selected group + * - going UP(TRUE,FALSE) on rest of the groups, each time on port 0 + * + NOT promoting port counter + * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet + * + * 2. is_main_path == FALSE: + * - going UP(TRUE,FALSE) on ALL the groups, each time on port 0, + * but only if the remote (upper) switch hasn't been already + * configured for this target LID + * + NOT promoting port counter + * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet + */ + + /* covering first half of case 1, and case 3 */ + if (is_main_path) { + if (p_sw->is_leaf) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + " - Routing MAIN path for %s CA LID %u: %s --> %s\n", + (target_lid != 0) ? "real" : "DUMMY", + target_lid, + tuple_to_str(p_sw->tuple), + tuple_to_str(p_remote_sw->tuple)); + } + /* The number of downgoing routes is tracked in the + p_group->counter_down p_port->counter_down counters of the + group and port that belong to the lower side of the link + (on switch with higher rank) */ + p_min_group->counter_down++; + p_min_port->counter_down++; + if (p_min_group->counter_down == + (p_min_group->remote_hca_or_sw.p_sw->min_counter_down + + 1)) { + recalculate_min_counter_down + (p_min_group->remote_hca_or_sw.p_sw); + } + + /* This LID may already be in the LFT in the reverse_hop feature is used */ + /* We update the LFT only if this LID isn't already present. */ + + /* skip if target lid has been already set on remote switch fwd tbl (with a bigger hop count) */ + if ((p_remote_sw->p_osm_sw->new_lft[target_lid] == OSM_NO_PATH) + || + (current_hops + 1 < + sw_get_least_hops(p_remote_sw, target_lid))) { + + p_remote_sw->p_osm_sw->new_lft[target_lid] = + p_min_port->remote_port_num; + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Switch %s: set path to CA LID %u through port %u\n", + tuple_to_str(p_remote_sw->tuple), + target_lid, + p_min_port->remote_port_num); + + /* On the remote switch that is pointed by the min_group, + set hops for ALL the ports in the remote group. */ + + set_hops_on_remote_sw(p_min_group, target_lid, + current_hops + 1, + is_target_a_sw); + } + /* Recursion step: Assign downgoing ports by stepping up, starting on REMOTE switch. */ + created_route |= fabric_route_downgoing_by_going_up(p_ftree, + p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ + p_sw, /* this switch - prev. position switch for the function */ + target_lid, /* LID that we're routing to */ + is_main_path, /* whether this is path to HCA that should by tracked by counters */ + is_target_a_sw, /* Whether target lid is a switch or not */ + reverse_hop_credit, /* Remaining reverse_hops allowed */ + reverse_hops, /* Number of reverse_hops done up to this point */ + current_hops + 1); + } + + /* What's left to do at this point: + * + * 1. is_main_path == TRUE: + * - going UP(TRUE,FALSE) on rest of the groups, each time on port 0, + * but only if the remote (upper) switch hasn't been already + * configured for this target LID + * + NOT promoting port counter + * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet + * + * 2. is_main_path == FALSE: + * - going UP(TRUE,FALSE) on ALL the groups, each time on port 0, + * but only if the remote (upper) switch hasn't been already + * configured for this target LID + * + NOT promoting port counter + * + setting path in remote switch fwd tbl if it hasn't been set yet + * + setting hops in remote switch on all the ports of each group + * if it hasn't been set yet + * + * These two rules can be rephrased this way: + * - foreach UP port group + * + if remote switch has been set with the target LID + * - skip this port group + * + else + * - select port 0 + * - do NOT promote port counter + * - set path in remote switch fwd tbl + * - set hops in remote switch on all the ports of this group + * - go UP(TRUE,FALSE) to the remote switch + */ + + for (i = is_main_path ? 1 : 0; i < p_sw->up_port_groups_num; i++) { + p_group = p_sw->up_port_groups[i]; + p_remote_sw = p_group->remote_hca_or_sw.p_sw; + + /* skip if target lid has been already set on remote switch fwd tbl (with a bigger hop count) */ + if (p_remote_sw->p_osm_sw->new_lft[target_lid] != OSM_NO_PATH) + if (current_hops + 1 >= + sw_get_least_hops(p_remote_sw, target_lid)) + continue; + + if (p_sw->is_leaf) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + " - Routing SECONDARY path for LID %u: %s --> %s\n", + target_lid, + tuple_to_str(p_sw->tuple), + tuple_to_str(p_remote_sw->tuple)); + } + + /* Routing REAL lids on SECONDARY path means routing + switch-to-switch or switch-to-CA paths. + We can safely assume that switch will initiate very + few traffic, so there's no point wasting runtime on + trying to balance these routes - always pick port 0. */ + p_min_port = NULL; + ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports); + if(ports_num == 0) + continue; + for (j = 0; j < ports_num; j++) { + cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port); + if (!p_min_port) { + /* first port that we're checking - use + it as a port with the lowest load */ + p_min_port = p_port; + } else if (p_port->counter_down < + p_min_port->counter_down) { + /* this port is less loaded - use it as min */ + p_min_port = p_port; + } + } + + p_port = p_min_port; + p_remote_sw->p_osm_sw->new_lft[target_lid] = + p_port->remote_port_num; + + /* On the remote switch that is pointed by the p_group, + set hops for ALL the ports in the remote group. */ + + set_hops_on_remote_sw(p_group, target_lid, + current_hops + 1, is_target_a_sw); + + /* Recursion step: + Assign downgoing ports by stepping up, starting on REMOTE switch. */ + routed = fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ + p_sw, /* this switch - prev. position switch for the function */ + target_lid, /* LID that we're routing to */ + FALSE, /* whether this is path to HCA that should by tracked by counters */ + is_target_a_sw, /* Whether target lid is a switch or not */ + reverse_hop_credit, /* Remaining reverse_hops allowed */ + reverse_hops, /* Number of reverse_hops done up to this point */ + current_hops + 1); + created_route |= routed; + } + + /* Now doing the same thing with horizontal links */ + if (p_sw->sibling_port_groups_num > 0) + bubble_sort_down(p_sw->sibling_port_groups, + p_sw->sibling_port_groups_num); + + for (i = 0; i < p_sw->sibling_port_groups_num; i++) { + p_group = p_sw->sibling_port_groups[i]; + p_remote_sw = p_group->remote_hca_or_sw.p_sw; + + /* skip if target lid has been already set on remote switch fwd tbl (with a bigger hop count) */ + if (p_remote_sw->p_osm_sw->new_lft[target_lid] != OSM_NO_PATH) + if (current_hops + 1 >= + sw_get_least_hops(p_remote_sw, target_lid)) + continue; + + if (p_sw->is_leaf) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + " - Routing SECONDARY path for LID %u: %s --> %s\n", + target_lid, + tuple_to_str(p_sw->tuple), + tuple_to_str(p_remote_sw->tuple)); + } + + /* Routing REAL lids on SECONDARY path means routing + switch-to-switch or switch-to-CA paths. + We can safely assume that switch will initiate very + few traffic, so there's no point wasting runtime on + trying to balance these routes - always pick port 0. */ + + p_min_port = NULL; + ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports); + for (j = 0; j < ports_num; j++) { + cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port); + if (!p_min_port) { + /* first port that we're checking - use + it as a port with the lowest load */ + p_min_port = p_port; + } else if (p_port->counter_down < + p_min_port->counter_down) { + /* this port is less loaded - use it as min */ + p_min_port = p_port; + } + } + + p_port = p_min_port; + p_remote_sw->p_osm_sw->new_lft[target_lid] = + p_port->remote_port_num; + + /* On the remote switch that is pointed by the p_group, + set hops for ALL the ports in the remote group. */ + + set_hops_on_remote_sw(p_group, target_lid, + current_hops + 1, is_target_a_sw); + + /* Recursion step: + Assign downgoing ports by stepping up, starting on REMOTE switch. */ + routed = fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ + p_sw, /* this switch - prev. position switch for the function */ + target_lid, /* LID that we're routing to */ + FALSE, /* whether this is path to HCA that should by tracked by counters */ + is_target_a_sw, /* Whether target lid is a switch or not */ + reverse_hop_credit, /* Remaining reverse_hops allowed */ + reverse_hops, /* Number of reverse_hops done up to this point */ + current_hops + 1); + created_route |= routed; + if (routed) { + p_min_group->counter_down++; + p_min_port->counter_down++; + } + } + + /* If we don't have any reverse hop credits, we are done */ + if (reverse_hop_credit == 0) + return created_route; + + if (p_sw->is_leaf) + return created_route; + + /* We explore all the down group ports */ + /* We try to reverse jump for each of them */ + /* They already have a route to us from the upgoing_by_going_down started earlier */ + /* This is only so it'll continue exploring up, after this step backwards */ + for (i = 0; i < p_sw->down_port_groups_num; i++) { + p_group = p_sw->down_port_groups[i]; + p_remote_sw = p_group->remote_hca_or_sw.p_sw; + + /* Skip this port group unless it points to a switch */ + if (p_group->remote_node_type != IB_NODE_TYPE_SWITCH) + continue; + + /* Recursion step: + Assign downgoing ports by stepping up, fter doing one step down starting on REMOTE switch. */ + created_route |= fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ + p_sw, /* this switch - prev. position switch for the function */ + target_lid, /* LID that we're routing to */ + TRUE, /* whether this is path to HCA that should by tracked by counters */ + is_target_a_sw, /* Whether target lid is a switch or not */ + reverse_hop_credit - 1, /* Remaining reverse_hops allowed */ + reverse_hops + 1, /* Number of reverse_hops done up to this point */ + current_hops + + 1); + } + return created_route; + +} /* ftree_fabric_route_downgoing_by_going_up() */ + +/***************************************************/ + +/* + * Pseudo code: + * foreach leaf switch (in indexing order) + * for each compute node (in indexing order) + * obtain the LID of the compute node + * set local LFT(LID) of the port connecting to compute node + * call assign-down-going-port-by-ascending-up(TRUE,TRUE) on CURRENT switch + * for each MISSING compute node + * call assign-down-going-port-by-ascending-up(FALSE,TRUE) on CURRENT switch + */ + +static void fabric_route_to_cns(IN ftree_fabric_t * p_ftree) +{ + ftree_sw_t *p_sw; + ftree_hca_t *p_hca; + ftree_port_group_t *p_leaf_port_group; + ftree_port_group_t *p_hca_port_group; + ftree_port_t *p_port; + unsigned int i, j; + uint16_t hca_lid; + unsigned routed_targets_on_leaf; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + /* for each leaf switch (in indexing order) */ + for (i = 0; i < p_ftree->leaf_switches_num; i++) { + p_sw = p_ftree->leaf_switches[i]; + routed_targets_on_leaf = 0; + + /* for each HCA connected to this switch */ + for (j = 0; j < p_sw->down_port_groups_num; j++) { + p_leaf_port_group = p_sw->down_port_groups[j]; + + /* work with this port group only if the remote node is CA */ + if (p_leaf_port_group->remote_node_type != + IB_NODE_TYPE_CA) + continue; + + p_hca = p_leaf_port_group->remote_hca_or_sw.p_hca; + + /* work with this port group only if remote HCA has CNs */ + if (!p_hca->cn_num) + continue; + + p_hca_port_group = + hca_get_port_group_by_lid(p_hca, + p_leaf_port_group-> + remote_lid); + CL_ASSERT(p_hca_port_group); + + /* work with this port group only if remote port is CN */ + if (!p_hca_port_group->is_cn) + continue; + + /* obtain the LID of HCA port */ + hca_lid = p_leaf_port_group->remote_lid; + + /* set local LFT(LID) to the port that is connected to HCA */ + cl_ptr_vector_at(&p_leaf_port_group->ports, 0, + (void *)&p_port); + p_sw->p_osm_sw->new_lft[hca_lid] = p_port->port_num; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Switch %s: set path to CN LID %u through port %u\n", + tuple_to_str(p_sw->tuple), + hca_lid, p_port->port_num); + + /* set local min hop table(LID) to route to the CA */ + sw_set_hops(p_sw, hca_lid, p_port->port_num, 1, FALSE); + + /* Assign downgoing ports by stepping up. + Since we're routing here only CNs, we're routing it as REAL + LID and updating fat-tree balancing counters. */ + fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ + NULL, /* prev. position switch */ + hca_lid, /* LID that we're routing to */ + TRUE, /* whether this path to HCA should by tracked by counters */ + FALSE, /* whether target lid is a switch or not */ + 0, /* Number of reverse hops allowed */ + 0, /* Number of reverse hops done yet */ + 1); /* Number of hops done yet */ + + /* count how many real targets have been routed from this leaf switch */ + routed_targets_on_leaf++; + } + + /* We're done with the real targets (all CNs) of this leaf switch. + Now route the dummy HCAs that are missing or that are non-CNs. + When routing to dummy HCAs we don't fill lid matrices. */ + if (p_ftree->max_cn_per_leaf > routed_targets_on_leaf) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Routing %u dummy CAs\n", + p_ftree->max_cn_per_leaf - + p_sw->down_port_groups_num); + for (j = 0; j < + p_ftree->max_cn_per_leaf - routed_targets_on_leaf; + j++) { + ftree_sw_t *p_next_sw, *p_ftree_sw; + sw_set_hops(p_sw, 0, 0xFF, 1, FALSE); + /* assign downgoing ports by stepping up */ + fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ + NULL, /* prev. position switch */ + 0, /* LID that we're routing to - ignored for dummy HCA */ + TRUE, /* whether this path to HCA should by tracked by counters */ + FALSE, /* Whether the target LID is a switch or not */ + 0, /* Number of reverse hops allowed */ + 0, /* Number of reverse hops done yet */ + 1); /* Number of hops done yet */ + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + /* need to clean the LID 0 hops for dummy node */ + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_ftree_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_ftree_sw->map_item); + p_ftree_sw->hops[0] = OSM_NO_PATH; + p_ftree_sw->p_osm_sw->new_lft[0] = OSM_NO_PATH; + } + + } + } + } + /* done going through all the leaf switches */ + OSM_LOG_EXIT(&p_ftree->p_osm->log); +} /* fabric_route_to_cns() */ + +/***************************************************/ + +/* + * Pseudo code: + * foreach HCA non-CN port in fabric + * obtain the LID of the HCA port + * get switch that is connected to this HCA port + * set switch LFT(LID) to the port connected to the HCA port + * call assign-down-going-port-by-ascending-up(TRUE,TRUE) on the switch + * + * Routing to these HCAs is routing a REAL hca lid on MAIN path. + * We want to allow load-leveling of the traffic to the non-CNs, + * because such nodes may include IO nodes with heavy usage + * - we should set fwd tables + * - we should update port counters + * Routing to non-CNs is done after routing to CNs, so updated port + * counters will not affect CN-to-CN routing. + */ + +static void fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree) +{ + ftree_sw_t *p_sw; + ftree_hca_t *p_hca; + ftree_hca_t *p_next_hca; + ftree_port_t *p_hca_port; + ftree_port_group_t *p_hca_port_group; + uint16_t hca_lid; + unsigned port_num_on_switch; + unsigned i; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); + while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { + p_hca = p_next_hca; + p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); + + for (i = 0; i < p_hca->up_port_groups_num; i++) { + p_hca_port_group = p_hca->up_port_groups[i]; + + /* skip this port if it's CN, in which case it has been already routed */ + if (p_hca_port_group->is_cn) + continue; + + /* skip this port if it is not connected to switch */ + if (p_hca_port_group->remote_node_type != + IB_NODE_TYPE_SWITCH) + continue; + + p_sw = p_hca_port_group->remote_hca_or_sw.p_sw; + hca_lid = p_hca_port_group->lid; + + /* set switches LFT(LID) to the port that is connected to HCA */ + cl_ptr_vector_at(&p_hca_port_group->ports, 0, + (void *)&p_hca_port); + port_num_on_switch = p_hca_port->remote_port_num; + p_sw->p_osm_sw->new_lft[hca_lid] = port_num_on_switch; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Switch %s: set path to non-CN HCA LID %u through port %u\n", + tuple_to_str(p_sw->tuple), + hca_lid, port_num_on_switch); + + /* set local min hop table(LID) to route to the CA */ + sw_set_hops(p_sw, hca_lid, port_num_on_switch, /* port num */ + 1, FALSE); /* hops */ + + /* Assign downgoing ports by stepping up. + We're routing REAL targets. They are not CNs and not included + in the leafs array, but we treat them as MAIN path to allow load + leveling, which means that the counters will be updated. */ + fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ + NULL, /* prev. position switch */ + hca_lid, /* LID that we're routing to */ + TRUE, /* whether this path to HCA should by tracked by counters */ + FALSE, /* Whether the target LID is a switch or not */ + p_hca_port_group->is_io ? p_ftree->p_osm->subn.opt.max_reverse_hops : 0, /* Number or reverse hops allowed */ + 0, /* Number or reverse hops done yet */ + 1); /* Number of hops done yet */ + } + /* done with all the port groups of this HCA - go to next HCA */ + } + + OSM_LOG_EXIT(&p_ftree->p_osm->log); +} /* fabric_route_to_non_cns() */ + +/***************************************************/ + +/* + * Pseudo code: + * foreach switch in fabric + * obtain its LID + * set local LFT(LID) to port 0 + * call assign-down-going-port-by-ascending-up(TRUE,FALSE) on CURRENT switch + * + * Routing to switch is similar to routing a REAL hca lid on SECONDARY path: + * - we should set fwd tables + * - we should NOT update port counters + */ + +static void fabric_route_to_switches(IN ftree_fabric_t * p_ftree) +{ + ftree_sw_t *p_sw; + ftree_sw_t *p_next_sw; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + + /* set local LFT(LID) to 0 (route to itself) */ + p_sw->p_osm_sw->new_lft[p_sw->lid] = 0; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Switch %s (LID %u): routing switch-to-switch paths\n", + tuple_to_str(p_sw->tuple), p_sw->lid); + + /* set min hop table of the switch to itself */ + sw_set_hops(p_sw, p_sw->lid, 0, /* port_num */ + 0, TRUE); /* hops */ + + fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ + NULL, /* prev. position switch */ + p_sw->lid, /* LID that we're routing to */ + FALSE, /* whether this path to HCA should by tracked by counters */ + TRUE, /* Whether the target LID is a switch or not */ + 0, /* Number of reverse hops allowed */ + 0, /* Number of reverse hops done yet */ + 0); /* Number of hops done yet */ + } + + OSM_LOG_EXIT(&p_ftree->p_osm->log); +} /* fabric_route_to_switches() */ + +/*************************************************** + ***************************************************/ + +static void fabric_route_roots(IN ftree_fabric_t * p_ftree) +{ + uint16_t lid; + uint8_t port_num; + osm_port_t *p_port; + ftree_sw_t *p_sw; + ftree_sw_t *p_leaf_sw; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + /* + * We need a switch that will accomodate all the down/up turns in + * the fabric. Having these turn in a single place in the fabric + * will not create credit loops. + * So we need to select this switch. + * The idea here is to chose leaf with the highest index. I don't + * have any theory to back me up on this. It's just a general thought + * that this way the switch that might be a bottleneck for many mcast + * groups will be far away from the OpenSM, so it will draw the + * multicast traffic away from the SM. + */ + + p_leaf_sw = p_ftree->leaf_switches[p_ftree->leaf_switches_num-1]; + + /* + * Now go over all the switches in the fabric that + * have lower rank, and route the missing LIDs to + * the selected leaf switch. + * In short, this leaf switch now poses a target + * for all those missing LIDs. + */ + + for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); + p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { + + if (p_sw->rank >= p_ftree->leaf_switch_rank) + continue; + + for (lid = 1; lid <= p_leaf_sw->p_osm_sw->max_lid_ho; lid ++) { + + if (p_sw->p_osm_sw->new_lft[lid] != OSM_NO_PATH || + p_leaf_sw->hops[lid] == OSM_NO_PATH) + continue; + + p_port = osm_get_port_by_lid_ho(&p_ftree->p_osm->subn, + lid); + + /* we're interested only in switches */ + if (!p_port || !p_port->p_node->sw) + continue; + + /* + * the missing LID will be routed through the same + * port that routes to the selected leaf switch + */ + port_num = p_sw->p_osm_sw->new_lft[p_leaf_sw->lid]; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Switch %s: setting path to LID %u " + "through port %u\n", + tuple_to_str(p_sw->tuple), lid, port_num); + + /* set local lft */ + p_sw->p_osm_sw->new_lft[lid] = port_num; + + /* + * Set local min hop table. + * The distance to the target LID is a distance + * to the selected leaf switch plus the distance + * from the leaf to the target LID. + */ + sw_set_hops(p_sw, lid, port_num, + p_sw->hops[p_leaf_sw->lid] + + p_leaf_sw->hops[lid], TRUE); + } + } + + OSM_LOG_EXIT(&p_ftree->p_osm->log); +} /* fabric_route_roots() */ + +/***************************************************/ + +static int fabric_populate_nodes(IN ftree_fabric_t * p_ftree) +{ + osm_node_t *p_osm_node; + osm_node_t *p_next_osm_node; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + p_next_osm_node = + (osm_node_t *) cl_qmap_head(&p_ftree->p_osm->subn.node_guid_tbl); + while (p_next_osm_node != + (osm_node_t *) cl_qmap_end(&p_ftree->p_osm-> + subn.node_guid_tbl)) { + p_osm_node = p_next_osm_node; + p_next_osm_node = + (osm_node_t *) cl_qmap_next(&p_osm_node->map_item); + switch (osm_node_get_type(p_osm_node)) { + case IB_NODE_TYPE_CA: + fabric_add_hca(p_ftree, p_osm_node); + break; + case IB_NODE_TYPE_ROUTER: + break; + case IB_NODE_TYPE_SWITCH: + fabric_add_sw(p_ftree, p_osm_node->sw); + break; + default: + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB0E: " "Node GUID 0x%016" PRIx64 + " - Unknown node type: %s\n", + cl_ntoh64(osm_node_get_node_guid(p_osm_node)), + ib_get_node_type_str(osm_node_get_type + (p_osm_node))); + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return -1; + } + } + + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return 0; +} /* fabric_populate_nodes() */ + +/*************************************************** + ***************************************************/ + +static boolean_t sw_update_rank(IN ftree_sw_t * p_sw, IN uint32_t new_rank) +{ + if (sw_ranked(p_sw) && p_sw->rank <= new_rank) + return FALSE; + p_sw->rank = new_rank; + return TRUE; + +} + +/***************************************************/ + +static void rank_switches_from_leafs(IN ftree_fabric_t * p_ftree, + IN cl_list_t * p_ranking_bfs_list) +{ + ftree_sw_t *p_sw; + ftree_sw_t *p_remote_sw; + osm_node_t *p_node; + osm_node_t *p_remote_node; + osm_physp_t *p_osm_port; + uint8_t i; + unsigned max_rank = 0; + + while (!cl_is_list_empty(p_ranking_bfs_list)) { + p_sw = (ftree_sw_t *) cl_list_remove_head(p_ranking_bfs_list); + p_node = p_sw->p_osm_sw->p_node; + + /* note: skipping port 0 on switches */ + for (i = 1; i < osm_node_get_num_physp(p_node); i++) { + p_osm_port = osm_node_get_physp_ptr(p_node, i); + if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) + continue; + + p_remote_node = + osm_node_get_remote_node(p_node, i, NULL); + if (!p_remote_node) + continue; + if (osm_node_get_type(p_remote_node) != + IB_NODE_TYPE_SWITCH) + continue; + + p_remote_sw = fabric_get_sw_by_guid(p_ftree, + osm_node_get_node_guid + (p_remote_node)); + if (!p_remote_sw) { + /* remote node is not a switch */ + continue; + } + + /* if needed, rank the remote switch and add it to the BFS list */ + if (sw_update_rank(p_remote_sw, p_sw->rank + 1)) { + max_rank = p_remote_sw->rank; + cl_list_insert_tail(p_ranking_bfs_list, + p_remote_sw); + } + } + } + + /* set FatTree maximal switch rank */ + p_ftree->max_switch_rank = max_rank; + +} /* rank_switches_from_leafs() */ + +/***************************************************/ + +static int rank_leaf_switches(IN ftree_fabric_t * p_ftree, + IN ftree_hca_t * p_hca, + IN cl_list_t * p_ranking_bfs_list) +{ + ftree_sw_t *p_sw; + osm_node_t *p_osm_node = p_hca->p_osm_node; + osm_node_t *p_remote_osm_node; + osm_physp_t *p_osm_port; + static uint8_t i = 0; + int res = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + for (i = 0; i < osm_node_get_num_physp(p_osm_node); i++) { + p_osm_port = osm_node_get_physp_ptr(p_osm_node, i); + if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) + continue; + + p_remote_osm_node = + osm_node_get_remote_node(p_osm_node, i, NULL); + if (!p_remote_osm_node) + continue; + + switch (osm_node_get_type(p_remote_osm_node)) { + case IB_NODE_TYPE_CA: + /* HCA connected directly to another HCA - not FatTree */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB0F: " + "CA connected directly to another CA: " "0x%016" + PRIx64 " <---> 0x%016" PRIx64 "\n", + hca_get_guid_ho(p_hca), + cl_ntoh64(osm_node_get_node_guid + (p_remote_osm_node))); + res = -1; + goto Exit; + + case IB_NODE_TYPE_ROUTER: + /* leaving this port - proceeding to the next one */ + continue; + + case IB_NODE_TYPE_SWITCH: + /* continue with this port */ + break; + + default: + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB10: Node GUID 0x%016" PRIx64 + " - Unknown node type: %s\n", + cl_ntoh64(osm_node_get_node_guid + (p_remote_osm_node)), + ib_get_node_type_str(osm_node_get_type + (p_remote_osm_node))); + res = -1; + goto Exit; + } + + /* remote node is switch */ + + p_sw = fabric_get_sw_by_guid(p_ftree, + osm_node_get_node_guid + (p_osm_port->p_remote_physp-> + p_node)); + CL_ASSERT(p_sw); + + /* if needed, rank the remote switch and add it to the BFS list */ + + if (!sw_update_rank(p_sw, 0)) + continue; + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Marking rank of switch that is directly connected to CA:\n" + " - CA guid : 0x%016" + PRIx64 "\n" + " - Switch guid: 0x%016" + PRIx64 "\n" + " - Switch LID : %u\n", + hca_get_guid_ho(p_hca), + sw_get_guid_ho(p_sw), p_sw->lid); + cl_list_insert_tail(p_ranking_bfs_list, p_sw); + } + +Exit: + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* rank_leaf_switches() */ + +/***************************************************/ + +static void sw_reverse_rank(IN cl_map_item_t * const p_map_item, + IN void *context) +{ + ftree_fabric_t *p_ftree = (ftree_fabric_t *) context; + ftree_sw_t *p_sw = (ftree_sw_t * const)p_map_item; + if (p_sw->rank != 0xFFFFFFFF) + p_sw->rank = p_ftree->max_switch_rank - p_sw->rank; +} + +/*************************************************** + ***************************************************/ + +static int +fabric_construct_hca_ports(IN ftree_fabric_t * p_ftree, IN ftree_hca_t * p_hca) +{ + ftree_sw_t *p_remote_sw; + osm_node_t *p_node = p_hca->p_osm_node; + osm_node_t *p_remote_node; + uint8_t remote_node_type; + ib_net64_t remote_node_guid; + osm_physp_t *p_remote_osm_port; + uint8_t i; + uint8_t remote_port_num; + boolean_t is_cn; + boolean_t is_in_cn_file; + boolean_t is_io; + boolean_t is_cns_file_provided = fabric_cns_provided(p_ftree); + boolean_t is_ios_file_provided = fabric_ios_provided(p_ftree); + int res = 0; + + for (i = 0; i < osm_node_get_num_physp(p_node); i++) { + osm_physp_t *p_osm_port = osm_node_get_physp_ptr(p_node, i); + is_io = FALSE; + is_cn = TRUE; + is_in_cn_file = FALSE; + + if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) + continue; + + if (p_hca->disconnected_ports[i]) + continue; + + p_remote_osm_port = osm_physp_get_remote(p_osm_port); + p_remote_node = + osm_node_get_remote_node(p_node, i, &remote_port_num); + + if (!p_remote_osm_port || !p_remote_node) + continue; + + remote_node_type = osm_node_get_type(p_remote_node); + remote_node_guid = osm_node_get_node_guid(p_remote_node); + + switch (remote_node_type) { + case IB_NODE_TYPE_ROUTER: + /* leaving this port - proceeding to the next one */ + continue; + + case IB_NODE_TYPE_CA: + /* HCA connected directly to another HCA - not FatTree */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB11: " + "CA connected directly to another CA: " "0x%016" + PRIx64 " <---> 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), + cl_ntoh64(remote_node_guid)); + res = -1; + goto Exit; + + case IB_NODE_TYPE_SWITCH: + /* continue with this port */ + break; + + default: + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB12: Node GUID 0x%016" PRIx64 + " - Unknown node type: %s\n", + cl_ntoh64(remote_node_guid), + ib_get_node_type_str(remote_node_type)); + res = -1; + goto Exit; + } + + /* remote node is switch */ + + p_remote_sw = fabric_get_sw_by_guid(p_ftree, remote_node_guid); + CL_ASSERT(p_remote_sw); + + /* If CN file is not supplied, then all the CAs considered as Compute Nodes. + Otherwise all the CAs are not CNs, and only guids that are present in the + CN file will be marked as compute nodes. */ + if (is_cns_file_provided == TRUE) { + name_map_item_t *p_elem = (name_map_item_t *) + cl_qmap_get(&p_ftree->cn_guid_tbl, + cl_ntoh64(osm_physp_get_port_guid + (p_osm_port))); + if (p_elem == (name_map_item_t *) + cl_qmap_end(&p_ftree->cn_guid_tbl)) + is_cn = FALSE; + else + is_in_cn_file = TRUE; + } + if (is_in_cn_file == FALSE && is_ios_file_provided == TRUE) { + name_map_item_t *p_elem = (name_map_item_t *) + cl_qmap_get(&p_ftree->io_guid_tbl, + cl_ntoh64(osm_physp_get_port_guid + (p_osm_port))); + if (p_elem != (name_map_item_t *) + cl_qmap_end(&p_ftree->io_guid_tbl)) { + is_io = TRUE; + is_cn = FALSE; + } + } + + if (is_cn) { + p_ftree->cn_num++; + p_hca->cn_num++; + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Marking CN port GUID 0x%016" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_osm_port))); + } else if (is_io) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Marking I/O port GUID 0x%016" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_osm_port))); + } else { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Marking non-CN port GUID 0x%016" PRIx64 "\n", + cl_ntoh64(osm_physp_get_port_guid(p_osm_port))); + } + p_ftree->ca_ports++; + + hca_add_port(p_ftree, + p_hca, /* local ftree_hca object */ + i, /* local port number */ + remote_port_num, /* remote port number */ + cl_ntoh16(osm_node_get_base_lid(p_node, i)), /* local lid */ + cl_ntoh16(osm_node_get_base_lid(p_remote_node, 0)), /* remote lid */ + osm_physp_get_port_guid(p_osm_port), /* local port guid */ + osm_physp_get_port_guid(p_remote_osm_port), /* remote port guid */ + remote_node_guid, /* remote node guid */ + remote_node_type, /* remote node type */ + (void *)p_remote_sw, /* remote ftree_hca/sw object */ + is_cn, is_io); /* whether this port is compute node */ + } + +Exit: + return res; +} /* fabric_construct_hca_ports() */ + +/*************************************************** + ***************************************************/ + +static int fabric_construct_sw_ports(IN ftree_fabric_t * p_ftree, + IN ftree_sw_t * p_sw) +{ + ftree_hca_t *p_remote_hca; + ftree_sw_t *p_remote_sw; + osm_node_t *p_node = p_sw->p_osm_sw->p_node; + osm_node_t *p_remote_node; + uint16_t remote_lid; + uint8_t remote_node_type; + ib_net64_t remote_node_guid; + osm_physp_t *p_remote_osm_port; + ftree_direction_t direction; + void *p_remote_hca_or_sw; + uint8_t i; + uint8_t remote_port_num; + int res = 0; + + CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); + + for (i = 1; i < osm_node_get_num_physp(p_node); i++) { + osm_physp_t *p_osm_port = osm_node_get_physp_ptr(p_node, i); + if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) + continue; + + p_remote_osm_port = osm_physp_get_remote(p_osm_port); + if (!p_remote_osm_port) + continue; + + p_remote_node = + osm_node_get_remote_node(p_node, i, &remote_port_num); + if (!p_remote_node) + continue; + + /* ignore any loopback connection on switch */ + if (p_node == p_remote_node) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Ignoring loopback on switch GUID 0x%016" PRIx64 + ", LID %u, rank %u\n", + sw_get_guid_ho(p_sw), + p_sw->lid, p_sw->rank); + continue; + } + + remote_node_type = osm_node_get_type(p_remote_node); + remote_node_guid = osm_node_get_node_guid(p_remote_node); + + switch (remote_node_type) { + case IB_NODE_TYPE_ROUTER: + /* leaving this port - proceeding to the next one */ + continue; + + case IB_NODE_TYPE_CA: + /* switch connected to hca */ + + p_remote_hca = + fabric_get_hca_by_guid(p_ftree, remote_node_guid); + CL_ASSERT(p_remote_hca); + + p_remote_hca_or_sw = (void *)p_remote_hca; + direction = FTREE_DIRECTION_DOWN; + + remote_lid = + cl_ntoh16(osm_physp_get_base_lid(p_remote_osm_port)); + break; + + case IB_NODE_TYPE_SWITCH: + /* switch connected to another switch */ + + p_remote_sw = + fabric_get_sw_by_guid(p_ftree, remote_node_guid); + CL_ASSERT(p_remote_sw); + + p_remote_hca_or_sw = (void *)p_remote_sw; + + if (p_sw->rank > p_remote_sw->rank) { + direction = FTREE_DIRECTION_UP; + } else if (p_sw->rank == p_remote_sw->rank) { + direction = FTREE_DIRECTION_SAME; + } else + direction = FTREE_DIRECTION_DOWN; + + /* switch LID is only in port 0 port_info structure */ + remote_lid = + cl_ntoh16(osm_node_get_base_lid(p_remote_node, 0)); + + break; + + default: + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB13: Node GUID 0x%016" PRIx64 + " - Unknown node type: %s\n", + cl_ntoh64(remote_node_guid), + ib_get_node_type_str(remote_node_type)); + res = -1; + goto Exit; + } + sw_add_port(p_sw, /* local ftree_sw object */ + i, /* local port number */ + remote_port_num, /* remote port number */ + p_sw->lid, /* local lid */ + remote_lid, /* remote lid */ + osm_physp_get_port_guid(p_osm_port), /* local port guid */ + osm_physp_get_port_guid(p_remote_osm_port), /* remote port guid */ + remote_node_guid, /* remote node guid */ + remote_node_type, /* remote node type */ + p_remote_hca_or_sw, /* remote ftree_hca/sw object */ + direction); /* port direction (up or down) */ + + /* Track the max lid (in host order) that exists in the fabric */ + if (remote_lid > p_ftree->lft_max_lid) + p_ftree->lft_max_lid = remote_lid; + } + +Exit: + return res; +} /* fabric_construct_sw_ports() */ + +/*************************************************** + ***************************************************/ +struct rank_root_cxt { + ftree_fabric_t *fabric; + cl_list_t *list; +}; +/*************************************************** + ***************************************************/ +static int rank_root_sw_by_guid(void *cxt, uint64_t guid, char *p) +{ + struct rank_root_cxt *c = cxt; + ftree_sw_t *sw; + + sw = fabric_get_sw_by_guid(c->fabric, cl_hton64(guid)); + if (!sw) { + /* the specified root guid wasn't found in the fabric */ + OSM_LOG(&c->fabric->p_osm->log, OSM_LOG_ERROR, "ERR AB24: " + "Root switch GUID 0x%" PRIx64 " not found\n", guid); + return 0; + } + + OSM_LOG(&c->fabric->p_osm->log, OSM_LOG_DEBUG, + "Ranking root switch with GUID 0x%" PRIx64 "\n", guid); + sw->rank = 0; + cl_list_insert_tail(c->list, sw); + + return 0; +} +/*************************************************** + ***************************************************/ +static boolean_t fabric_load_roots(IN ftree_fabric_t * p_ftree, + IN cl_list_t* p_ranking_bfs_list) +{ + struct rank_root_cxt context; + unsigned num_roots; + + if (p_ranking_bfs_list) { + + /* Rank all the roots and add them to list */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Fetching root nodes from file %s\n", + p_ftree->p_osm->subn.opt.root_guid_file); + + context.fabric = p_ftree; + context.list = p_ranking_bfs_list; + if (parse_node_map(p_ftree->p_osm->subn.opt.root_guid_file, + rank_root_sw_by_guid, &context)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB2A: " + "cannot parse root guids file \'%s\'\n", + p_ftree->p_osm->subn.opt.root_guid_file); + return FALSE; + } + + num_roots = cl_list_count(p_ranking_bfs_list); + if (!num_roots) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB25: " + "No valid roots supplied\n"); + return FALSE; + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Ranked %u valid root switches\n", num_roots); + } + return TRUE; +} +/*************************************************** + ***************************************************/ +static int fabric_rank_from_roots(IN ftree_fabric_t * p_ftree, + IN cl_list_t* p_ranking_bfs_list) +{ + osm_node_t *p_osm_node; + osm_node_t *p_remote_osm_node; + osm_physp_t *p_osm_physp; + ftree_sw_t *p_sw; + ftree_sw_t *p_remote_sw; + int res = 0; + unsigned max_rank = 0; + unsigned i; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + if (!p_ranking_bfs_list) { + res = -1; + goto Exit; + } + while (!cl_is_list_empty(p_ranking_bfs_list)) { + p_sw = (ftree_sw_t *) cl_list_remove_head(p_ranking_bfs_list); + p_osm_node = p_sw->p_osm_sw->p_node; + + /* note: skipping port 0 on switches */ + for (i = 1; i < osm_node_get_num_physp(p_osm_node); i++) { + p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i); + if (!p_osm_physp || !osm_link_is_healthy(p_osm_physp)) + continue; + + p_remote_osm_node = + osm_node_get_remote_node(p_osm_node, i, NULL); + if (!p_remote_osm_node) + continue; + + if (osm_node_get_type(p_remote_osm_node) != + IB_NODE_TYPE_SWITCH) + continue; + + p_remote_sw = fabric_get_sw_by_guid(p_ftree, + osm_node_get_node_guid + (p_remote_osm_node)); + CL_ASSERT(p_remote_sw); + + /* if needed, rank the remote switch and add it to the BFS list */ + if (sw_update_rank(p_remote_sw, p_sw->rank + 1)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Ranking switch 0x%" PRIx64 + " with rank %u\n", + sw_get_guid_ho(p_remote_sw), + p_remote_sw->rank); + max_rank = p_remote_sw->rank; + cl_list_insert_tail(p_ranking_bfs_list, + p_remote_sw); + } + } + /* done with ports of this switch - go to the next switch in the list */ + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Subnet ranking completed. Max Node Rank = %u\n", max_rank); + + /* set FatTree maximal switch rank */ + p_ftree->max_switch_rank = max_rank; + +Exit: + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* fabric_rank_from_roots() */ + +/*************************************************** + ***************************************************/ + +static int fabric_rank_from_hcas(IN ftree_fabric_t * p_ftree) +{ + ftree_hca_t *p_hca; + ftree_hca_t *p_next_hca; + cl_list_t ranking_bfs_list; + int res = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + cl_list_init(&ranking_bfs_list, 10); + + /* Mark REVERSED rank of all the switches in the subnet. + Start from switches that are connected to hca's, and + scan all the switches in the subnet. */ + p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); + while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { + p_hca = p_next_hca; + p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); + if (rank_leaf_switches(p_ftree, p_hca, &ranking_bfs_list) != 0) { + res = -1; + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB14: " + "Subnet ranking failed - subnet is not FatTree"); + goto Exit; + } + } + + /* Now rank rest of the switches in the fabric, while the + list already contains all the ranked leaf switches */ + rank_switches_from_leafs(p_ftree, &ranking_bfs_list); + + /* fix ranking of the switches by reversing the ranking direction */ + cl_qmap_apply_func(&p_ftree->sw_tbl, sw_reverse_rank, (void *)p_ftree); + +Exit: + cl_list_destroy(&ranking_bfs_list); + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* fabric_rank_from_hcas() */ + +/*************************************************** + * After ranking from HCA's we want to re-rank using + * the roots + ***************************************************/ +static int fabric_rerank_using_root(IN ftree_fabric_t * p_ftree, + IN cl_list_t* p_ranking_bfs_list) +{ + ftree_sw_t *p_sw = NULL; + ftree_sw_t *p_next_sw; + int res; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + if (p_sw->rank == 0) + cl_list_insert_tail(p_ranking_bfs_list, p_sw); + else + p_sw->rank = 0xFFFFFFFF; + } + res = fabric_rank_from_roots(p_ftree, p_ranking_bfs_list); + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} +/*************************************************** + ***************************************************/ +static int fabric_rank(IN ftree_fabric_t * p_ftree) +{ + int res = -1; + cl_list_t ranking_bfs_list; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + cl_list_init(&ranking_bfs_list, 10); + + if (fabric_roots_provided(p_ftree) && + fabric_load_roots(p_ftree, &ranking_bfs_list)) + res = fabric_rank_from_roots(p_ftree, &ranking_bfs_list); + else { + res = fabric_rank_from_hcas(p_ftree); + if (!res) + res = fabric_rerank_using_root(p_ftree, &ranking_bfs_list); + } + + if (res) + goto Exit; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "FatTree max switch rank is %u\n", p_ftree->max_switch_rank); + +Exit: + cl_list_destroy(&ranking_bfs_list); + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* fabric_rank() */ + +/*************************************************** + ***************************************************/ + +static void fabric_set_leaf_rank(IN ftree_fabric_t * p_ftree) +{ + unsigned i; + ftree_sw_t *p_sw; + ftree_hca_t *p_hca = NULL; + ftree_hca_t *p_next_hca; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + if (!fabric_roots_provided(p_ftree)) { + /* If root file is not provided, the fabric has to be pure fat-tree + in terms of ranking. Thus, leaf switches rank is the max rank. */ + p_ftree->leaf_switch_rank = p_ftree->max_switch_rank; + } else { + /* Find the first CN and set the leaf_switch_rank to the rank + of the switch that is connected to this CN. Later we will + ensure that all the leaf switches have the same rank. */ + p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); + while (p_next_hca != + (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { + p_hca = p_next_hca; + if (p_hca->cn_num) + break; + p_next_hca = + (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); + } + /* we know that there are CNs in the fabric, so just to be sure... */ + CL_ASSERT(p_next_hca != + (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Selected CN port GUID 0x%" PRIx64 "\n", + hca_get_guid_ho(p_hca)); + + for (i = 0; (i < p_hca->up_port_groups_num) + && (!p_hca->up_port_groups[i]->is_cn); i++) + ; + + CL_ASSERT(i < p_hca->up_port_groups_num); + CL_ASSERT(p_hca->up_port_groups[i]->remote_node_type == + IB_NODE_TYPE_SWITCH); + + p_sw = p_hca->up_port_groups[i]->remote_hca_or_sw.p_sw; + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Selected leaf switch GUID 0x%" PRIx64 ", rank %u\n", + sw_get_guid_ho(p_sw), p_sw->rank); + p_ftree->leaf_switch_rank = p_sw->rank; + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "FatTree leaf switch rank is %u\n", p_ftree->leaf_switch_rank); + OSM_LOG_EXIT(&p_ftree->p_osm->log); +} /* fabric_set_leaf_rank() */ + +/*************************************************** + ***************************************************/ + +static int fabric_populate_ports(IN ftree_fabric_t * p_ftree) +{ + ftree_hca_t *p_hca; + ftree_hca_t *p_next_hca; + ftree_sw_t *p_sw; + ftree_sw_t *p_next_sw; + int res = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); + while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { + p_hca = p_next_hca; + p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); + if (fabric_construct_hca_ports(p_ftree, p_hca) != 0) { + res = -1; + goto Exit; + } + } + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + if (fabric_construct_sw_ports(p_ftree, p_sw) != 0) { + res = -1; + goto Exit; + } + } +Exit: + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} /* fabric_populate_ports() */ + +/*************************************************** + ***************************************************/ +static int add_guid_item_to_map(void *cxt, uint64_t guid, char *p) +{ + cl_qmap_t *map = cxt; + name_map_item_t *item; + name_map_item_t *inserted_item; + + item = malloc(sizeof(*item)); + if (!item) + return -1; + + item->guid = guid; + inserted_item = (name_map_item_t *) cl_qmap_insert(map, guid, &item->item); + if (inserted_item != item) + free(item); + + return 0; +} + +static int fabric_read_guid_files(IN ftree_fabric_t * p_ftree) +{ + int status = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + if (fabric_cns_provided(p_ftree)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Fetching compute nodes from file %s\n", + p_ftree->p_osm->subn.opt.cn_guid_file); + + if (parse_node_map(p_ftree->p_osm->subn.opt.cn_guid_file, + add_guid_item_to_map, + &p_ftree->cn_guid_tbl)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB23: " "Problem parsing CN guid file\n"); + status = -1; + goto Exit; + } + + if (!cl_qmap_count(&p_ftree->cn_guid_tbl)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB27: " + "Compute node guids file has no valid guids\n"); + status = -1; + goto Exit; + } + } + + if (fabric_ios_provided(p_ftree)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Fetching I/O nodes from file %s\n", + p_ftree->p_osm->subn.opt.io_guid_file); + + if (parse_node_map(p_ftree->p_osm->subn.opt.io_guid_file, + add_guid_item_to_map, + &p_ftree->io_guid_tbl)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB28: Problem parsing I/O guid file\n"); + status = -1; + goto Exit; + } + + if (!cl_qmap_count(&p_ftree->io_guid_tbl)) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB29: " + "I/O node guids file has no valid guids\n"); + status = -1; + goto Exit; + } + } +Exit: + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return status; +} /*fabric_read_guid_files() */ + +/*************************************************** + ***************************************************/ +/* Get a Sw and remove all depended HCA's, meaning all + * HCA's which this is the only switch they are connected + * to */ +static int remove_depended_hca(IN ftree_fabric_t *p_ftree, IN ftree_sw_t *p_sw) +{ + ftree_hca_t *p_hca; + int counter = 0; + int port_num; + uint8_t remote_port_num; + osm_physp_t* physp; + osm_node_t* sw_node; + uint64_t remote_hca_guid; + + sw_node = p_sw->p_osm_sw->p_node; + for (port_num = 0; port_num < sw_node->physp_tbl_size; port_num++) { + physp = osm_node_get_physp_ptr(sw_node, port_num); + if (physp && physp->p_remote_physp) { + if (osm_node_get_type(physp->p_remote_physp->p_node) == IB_NODE_TYPE_CA) { + remote_hca_guid = + osm_node_get_node_guid(physp->p_remote_physp->p_node); + p_hca = fabric_get_hca_by_guid(p_ftree, remote_hca_guid); + if (!p_hca) + continue; + + remote_port_num = + osm_physp_get_port_num(physp->p_remote_physp); + p_hca->disconnected_ports[remote_port_num] = 1; + } + } + } + return counter; +} +/*************************************************** + ***************************************************/ +static void fabric_remove_unranked_sw(IN ftree_fabric_t *p_ftree) +{ + ftree_sw_t *p_sw = NULL; + ftree_sw_t *p_next_sw; + int removed_hca; + int count = 0; + + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + if (!sw_ranked(p_sw)) { + cl_qmap_remove_item(&p_ftree->sw_tbl,&p_sw->map_item); + removed_hca = remove_depended_hca(p_ftree, p_sw); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Removing Unranked sw 0x%" PRIx64 " (with %d dependent hca's)\n", + sw_get_guid_ho(p_sw),removed_hca); + sw_destroy(p_sw); + count++; + } + } + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Removed %d invalid switches\n", count); +} +/*************************************************** + ***************************************************/ +static int construct_fabric(IN void *context) +{ + ftree_fabric_t *p_ftree = context; + int status = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + fabric_clear(p_ftree); + + if (p_ftree->p_osm->subn.opt.lmc > 0) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "LMC > 0 is not supported by fat-tree routing.\n" + "Falling back to default routing\n"); + status = -1; + goto Exit; + } + + if (cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl) < 2) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric has %u switches - topology is not fat-tree.\n" + "Falling back to default routing\n", + cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl)); + status = -1; + goto Exit; + } + + if ((cl_qmap_count(&p_ftree->p_osm->subn.node_guid_tbl) - + cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl)) < 2) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric has %u nodes (%u switches) - topology is not fat-tree.\n" + "Falling back to default routing\n", + cl_qmap_count(&p_ftree->p_osm->subn.node_guid_tbl), + cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl)); + status = -1; + goto Exit; + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n" + " |----------------------------------------|\n" + " |- Starting FatTree fabric construction -|\n" + " |----------------------------------------|\n\n"); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Populating FatTree Switch and CA tables\n"); + if (fabric_populate_nodes(p_ftree) != 0) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric topology is not fat-tree - " + "falling back to default routing\n"); + status = -1; + goto Exit; + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Reading guid files provided by user\n"); + if (fabric_read_guid_files(p_ftree) != 0) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Failed reading guid files - " + "falling back to default routing\n"); + status = -1; + goto Exit; + } + + if (cl_qmap_count(&p_ftree->hca_tbl) < 2) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric has %u CAs - topology is not fat-tree.\n" + "Falling back to default routing\n", + cl_qmap_count(&p_ftree->hca_tbl)); + status = -1; + goto Exit; + } + + /* Rank all the switches in the fabric. + After that we will know only fabric max switch rank. + We will be able to check leaf switches rank and the + whole tree rank after filling ports and marking CNs. */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranking FatTree\n"); + if (fabric_rank(p_ftree) != 0) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Failed ranking the tree\n"); + status = -1; + goto Exit; + } + fabric_remove_unranked_sw(p_ftree); + + if (p_ftree->max_switch_rank == 0 && + cl_qmap_count(&p_ftree->sw_tbl) > 1) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB2B: Found more than one root on fabric with " + "maximum rank 0\n"); + status = -1; + goto Exit; + } + + /* For each hca and switch, construct array of ports. + This is done after the whole FatTree data structure is ready, + because we want the ports to have pointers to ftree_{sw,hca}_t + objects, and we need the switches to be already ranked because + that's how the port direction is determined. */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Populating CA & switch ports\n"); + if (fabric_populate_ports(p_ftree) != 0) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric topology is not a fat-tree\n"); + status = -1; + goto Exit; + } else if (p_ftree->cn_num == 0) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric has no valid compute nodes\n"); + status = -1; + goto Exit; + } + + /* Now that the CA ports have been created and CNs were marked, + we can complete the fabric ranking - set leaf switches rank. */ + fabric_set_leaf_rank(p_ftree); + + if (fabric_get_rank(p_ftree) > FAT_TREE_MAX_RANK || + fabric_get_rank(p_ftree) < FAT_TREE_MIN_RANK) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric rank is %u (should be between %u and %u)\n", + fabric_get_rank(p_ftree), FAT_TREE_MIN_RANK, + FAT_TREE_MAX_RANK); + status = -1; + goto Exit; + } + + /* Mark all the switches in the fabric with rank equal to + p_ftree->leaf_switch_rank and that are also connected to CNs. + As a by-product, this function also runs basic topology + validation - it checks that all the CNs are at the same rank. */ + if (fabric_mark_leaf_switches(p_ftree)) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric topology is not a fat-tree\n"); + status = -1; + goto Exit; + } + + /* Assign index to all the switches in the fabric. + This function also sorts leaf switch array by the switch index, + sorts all the port arrays of the indexed switches by remote + switch index, and creates switch-by-tuple table (sw_by_tuple_tbl) */ + fabric_make_indexing(p_ftree); + + /* Create leaf switch array sorted by index. + This array contains switches with rank equal to p_ftree->leaf_switch_rank + and that are also connected to CNs (REAL leafs), and it may contain + switches at the same leaf rank w/o CNs, if this is the order of indexing. + In any case, the first and the last switches in the array are REAL leafs. */ + if (fabric_create_leaf_switch_array(p_ftree)) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric topology is not a fat-tree\n"); + status = -1; + goto Exit; + } + + /* calculate and set ftree.max_cn_per_leaf field */ + fabric_set_max_cn_per_leaf(p_ftree); + + /* print general info about fabric topology */ + fabric_dump_general_info(p_ftree); + + /* dump full tree topology */ + if (OSM_LOG_IS_ACTIVE_V2(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) + fabric_dump(p_ftree); + + /* the fabric is required to be PURE fat-tree only if the root + guid file hasn't been provided by user */ + if (!fabric_roots_provided(p_ftree) && + !fabric_validate_topology(p_ftree)) { + osm_log_v2(&p_ftree->p_osm->log, OSM_LOG_INFO, FILE_ID, + "Fabric topology is not a fat-tree\n"); + status = -1; + goto Exit; + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Max LID in switch LFTs: %u\n", p_ftree->lft_max_lid); + + /* Build the full lid matrices needed for multicast routing */ + osm_ucast_mgr_build_lid_matrices(&p_ftree->p_osm->sm.ucast_mgr); + +Exit: + if (status != 0) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Clearing FatTree Fabric data structures\n"); + fabric_clear(p_ftree); + } else + p_ftree->fabric_built = TRUE; + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n" + " |--------------------------------------------------|\n" + " |- Done constructing FatTree fabric (status = %d) -|\n" + " |--------------------------------------------------|\n\n", + status); + + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return status; +} /* construct_fabric() */ + +/*************************************************** + ***************************************************/ + +static int do_routing(IN void *context) +{ + ftree_fabric_t *p_ftree = context; + int status = 0; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + + if (!p_ftree->fabric_built) { + status = -1; + goto Exit; + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Starting FatTree routing\n"); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Filling switch forwarding tables for Compute Nodes\n"); + fabric_route_to_cns(p_ftree); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Filling switch forwarding tables for non-CN targets\n"); + fabric_route_to_non_cns(p_ftree); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Filling switch forwarding tables for switch-to-switch paths\n"); + fabric_route_to_switches(p_ftree); + + if (p_ftree->p_osm->subn.opt.connect_roots) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Connecting switches that are unreachable within " + "Up/Down rules\n"); + fabric_route_roots(p_ftree); + } + + /* for each switch, set its fwd table */ + cl_qmap_apply_func(&p_ftree->sw_tbl, set_sw_fwd_table, (void *)p_ftree); + + /* write out hca ordering file */ + fabric_dump_hca_ordering(p_ftree); + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "FatTree routing is done\n"); + +Exit: + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return status; +} + +/*************************************************** + ***************************************************/ + +static void delete(IN void *context) +{ + if (!context) + return; + fabric_destroy((ftree_fabric_t *) context); +} + +/*************************************************** + ***************************************************/ + +int osm_ucast_ftree_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm) +{ + ftree_fabric_t *p_ftree = fabric_create(); + if (!p_ftree) + return -1; + + p_ftree->p_osm = p_osm; + p_ftree->p_subn = p_osm->sm.ucast_mgr.p_subn; + + r->context = (void *)p_ftree; + r->build_lid_matrices = construct_fabric; + r->ucast_build_fwd_tables = do_routing; + r->destroy = delete; + + return 0; +} diff --git a/opensm/osm_ucast_lash.c b/opensm/osm_ucast_lash.c new file mode 100644 index 0000000..c8c0a3f --- /dev/null +++ b/opensm/osm_ucast_lash.c @@ -0,0 +1,1331 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2007 Simula Research Laboratory. All rights reserved. + * Copyright (c) 2007 Silicon Graphics Inc. All rights reserved. + * Copyright (c) 2008,2009 System Fabric Works, Inc. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of LASH algorithm Calculation functions + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_LASH_C +#include +#include +#include +#include +#include + +typedef struct _reachable_dest { + int switch_id; + struct _reachable_dest *next; +} reachable_dest_t; + +static void connect_switches(lash_t * p_lash, int sw1, int sw2, int phy_port_1) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + unsigned num = p_lash->switches[sw1]->node->num_links; + switch_t *s1 = p_lash->switches[sw1]; + mesh_node_t *node = s1->node; + switch_t *s2; + link_t *l; + unsigned int i; + + /* + * if doing mesh analysis: + * - do not consider connections to self + * - collapse multiple connections between + * pair of switches to a single locical link + */ + if (p_lash->p_osm->subn.opt.do_mesh_analysis) { + if (sw1 == sw2) + return; + + /* see if we are already linked to sw2 */ + for (i = 0; i < num; i++) { + l = node->links[i]; + + if (node->links[i]->switch_id == sw2) { + l->ports[l->num_ports++] = phy_port_1; + return; + } + } + } + + l = node->links[num]; + l->switch_id = sw2; + l->link_id = -1; + l->ports[l->num_ports++] = phy_port_1; + + s2 = p_lash->switches[sw2]; + for (i = 0; i < s2->node->num_links; i++) { + if (s2->node->links[i]->switch_id == sw1) { + s2->node->links[i]->link_id = num; + l->link_id = i; + break; + } + } + + node->num_links++; + + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "LASH connect: %d, %d, %d\n", sw1, sw2, phy_port_1); +} + +static osm_switch_t *get_osm_switch_from_port(const osm_port_t * port) +{ + osm_physp_t *p = port->p_physp; + if (p->p_node->sw) + return p->p_node->sw; + else if (p->p_remote_physp && p->p_remote_physp->p_node->sw) + return p->p_remote_physp->p_node->sw; + return NULL; +} + +static int cycle_exists(cdg_vertex_t * start, cdg_vertex_t * current, + cdg_vertex_t * prev, int visit_num) +{ + int i, new_visit_num; + int cycle_found = 0; + + if (current != NULL && current->visiting_number > 0) { + if (visit_num > current->visiting_number && current->seen == 0) { + cycle_found = 1; + } + } else { + if (current == NULL) { + current = start; + CL_ASSERT(prev == NULL); + } + + current->visiting_number = visit_num; + + if (prev != NULL) { + prev->next = current; + CL_ASSERT(prev->to == current->from); + CL_ASSERT(prev->visiting_number > 0); + } + + new_visit_num = visit_num + 1; + + for (i = 0; i < current->num_deps; i++) { + cycle_found = + cycle_exists(start, current->deps[i].v, current, + new_visit_num); + if (cycle_found == 1) + i = current->num_deps; + } + + current->seen = 1; + if (prev != NULL) + prev->next = NULL; + } + + return cycle_found; +} + +static inline int get_next_switch(lash_t *p_lash, int sw, int link) +{ + return p_lash->switches[sw]->node->links[link]->switch_id; +} + +static void remove_semipermanent_depend_for_sp(lash_t * p_lash, int sw, + int dest_switch, int lane) +{ + switch_t **switches = p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int i_next_switch, output_link, i, next_link, i_next_next_switch, + depend = 0; + cdg_vertex_t *v; + int __attribute__((unused)) found; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + i_next_switch = get_next_switch(p_lash, sw, output_link); + + while (sw != dest_switch) { + v = cdg_vertex_matrix[lane][sw][i_next_switch]; + CL_ASSERT(v != NULL); + + if (v->num_using_vertex == 1) { + + cdg_vertex_matrix[lane][sw][i_next_switch] = NULL; + + free(v); + } else { + v->num_using_vertex--; + if (i_next_switch != dest_switch) { + next_link = + switches[i_next_switch]->routing_table[dest_switch].out_link; + i_next_next_switch = get_next_switch(p_lash, i_next_switch, next_link); + found = 0; + + for (i = 0; i < v->num_deps; i++) + if (v->deps[i].v == + cdg_vertex_matrix[lane][i_next_switch] + [i_next_next_switch]) { + found = 1; + depend = i; + } + + CL_ASSERT(found); + + if (v->deps[depend].num_used == 1) { + for (i = depend; + i < v->num_deps - 1; i++) { + v->deps[i].v = v->deps[i + 1].v; + v->deps[i].num_used = + v->deps[i + 1].num_used; + } + + v->num_deps--; + } else + v->deps[depend].num_used--; + } + } + + sw = i_next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if (sw != dest_switch) + i_next_switch = get_next_switch(p_lash, sw, output_link); + } +} + +inline static void enqueue(cl_list_t * bfsq, switch_t * sw) +{ + CL_ASSERT(sw->q_state == UNQUEUED); + sw->q_state = Q_MEMBER; + cl_list_insert_tail(bfsq, sw); +} + +inline static void dequeue(cl_list_t * bfsq, switch_t ** sw) +{ + *sw = (switch_t *) cl_list_remove_head(bfsq); + CL_ASSERT((*sw)->q_state == Q_MEMBER); + (*sw)->q_state = MST_MEMBER; +} + +static int get_phys_connection(switch_t *sw, int switch_to) +{ + unsigned int i; + + for (i = 0; i < sw->node->num_links; i++) + if (sw->node->links[i]->switch_id == switch_to) + return i; + return i; +} + +static void shortest_path(lash_t * p_lash, int ir) +{ + switch_t **switches = p_lash->switches, *sw, *swi; + unsigned int i; + cl_list_t bfsq; + + cl_list_construct(&bfsq); + cl_list_init(&bfsq, 20); + + enqueue(&bfsq, switches[ir]); + + while (!cl_is_list_empty(&bfsq)) { + dequeue(&bfsq, &sw); + for (i = 0; i < sw->node->num_links; i++) { + swi = switches[sw->node->links[i]->switch_id]; + if (swi->q_state == UNQUEUED) { + enqueue(&bfsq, swi); + sw->dij_channels[sw->used_channels++] = swi->id; + } + } + } + + cl_list_destroy(&bfsq); +} + +static int generate_routing_func_for_mst(lash_t * p_lash, int sw_id, + reachable_dest_t ** destinations) +{ + int i, next_switch; + switch_t *sw = p_lash->switches[sw_id]; + int num_channels = sw->used_channels; + reachable_dest_t *dest, *i_dest, *concat_dest = NULL, *prev; + + for (i = 0; i < num_channels; i++) { + next_switch = sw->dij_channels[i]; + if (generate_routing_func_for_mst(p_lash, next_switch, &dest)) + return -1; + + i_dest = dest; + prev = i_dest; + + while (i_dest != NULL) { + if (sw->routing_table[i_dest->switch_id].out_link == + NONE) + sw->routing_table[i_dest->switch_id].out_link = + get_phys_connection(sw, next_switch); + + prev = i_dest; + i_dest = i_dest->next; + } + + CL_ASSERT(prev->next == NULL); + prev->next = concat_dest; + concat_dest = dest; + } + + i_dest = (reachable_dest_t *) malloc(sizeof(reachable_dest_t)); + if (!i_dest) + return -1; + i_dest->switch_id = sw->id; + i_dest->next = concat_dest; + *destinations = i_dest; + return 0; +} + +static int generate_cdg_for_sp(lash_t * p_lash, int sw, int dest_switch, + int lane) +{ + unsigned num_switches = p_lash->num_switches; + switch_t **switches = p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int next_switch, output_link, j, exists; + cdg_vertex_t *v, *prev = NULL; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + next_switch = get_next_switch(p_lash, sw, output_link); + + while (sw != dest_switch) { + + if (cdg_vertex_matrix[lane][sw][next_switch] == NULL) { + v = calloc(1, sizeof(*v) + (num_switches - 1) * sizeof(v->deps[0])); + if (!v) + return -1; + v->from = sw; + v->to = next_switch; + v->temp = 1; + cdg_vertex_matrix[lane][sw][next_switch] = v; + } else + v = cdg_vertex_matrix[lane][sw][next_switch]; + + v->num_using_vertex++; + + if (prev != NULL) { + exists = 0; + + for (j = 0; j < prev->num_deps; j++) + if (prev->deps[j].v == v) { + exists = 1; + prev->deps[j].num_used++; + } + + if (exists == 0) { + prev->deps[prev->num_deps].v = v; + prev->deps[prev->num_deps].num_used++; + prev->num_deps++; + + CL_ASSERT(prev->num_deps < (int)num_switches); + + if (prev->temp == 0) + prev->num_temp_depend++; + + } + } + + sw = next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if (sw != dest_switch) { + CL_ASSERT(output_link != NONE); + next_switch = get_next_switch(p_lash, sw, output_link); + } + + prev = v; + } + return 0; +} + +static void set_temp_depend_to_permanent_for_sp(lash_t * p_lash, int sw, + int dest_switch, int lane) +{ + switch_t **switches = p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int next_switch, output_link; + cdg_vertex_t *v; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + next_switch = get_next_switch(p_lash, sw, output_link); + + while (sw != dest_switch) { + v = cdg_vertex_matrix[lane][sw][next_switch]; + CL_ASSERT(v != NULL); + + if (v->temp == 1) + v->temp = 0; + else + v->num_temp_depend = 0; + + sw = next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if (sw != dest_switch) + next_switch = get_next_switch(p_lash, sw, output_link); + } + +} + +static void remove_temp_depend_for_sp(lash_t * p_lash, int sw, int dest_switch, + int lane) +{ + switch_t **switches = p_lash->switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int next_switch, output_link, i; + cdg_vertex_t *v; + + output_link = switches[sw]->routing_table[dest_switch].out_link; + next_switch = get_next_switch(p_lash, sw, output_link); + + while (sw != dest_switch) { + v = cdg_vertex_matrix[lane][sw][next_switch]; + CL_ASSERT(v != NULL); + + if (v->temp == 1) { + cdg_vertex_matrix[lane][sw][next_switch] = NULL; + free(v); + } else { + CL_ASSERT(v->num_temp_depend <= v->num_deps); + v->num_deps = v->num_deps - v->num_temp_depend; + v->num_temp_depend = 0; + v->num_using_vertex--; + + for (i = v->num_deps; i < p_lash->num_switches - 1; i++) + v->deps[i].num_used = 0; + } + + sw = next_switch; + output_link = switches[sw]->routing_table[dest_switch].out_link; + + if (sw != dest_switch) + next_switch = get_next_switch(p_lash, sw, output_link); + + } +} + +static int balance_virtual_lanes(lash_t * p_lash, unsigned lanes_needed) +{ + unsigned num_switches = p_lash->num_switches; + cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix; + int *num_mst_in_lane = p_lash->num_mst_in_lane; + int ***virtual_location = p_lash->virtual_location; + int min_filled_lane, max_filled_lane, trials; + int old_min_filled_lane, old_max_filled_lane, new_num_min_lane, + new_num_max_lane; + unsigned int i, j; + int src, dest, start, next_switch, output_link; + int next_switch2, output_link2; + int stop = 0, cycle_found; + int cycle_found2; + unsigned start_vl = p_lash->p_osm->subn.opt.lash_start_vl; + + max_filled_lane = 0; + min_filled_lane = lanes_needed - 1; + + trials = num_mst_in_lane[max_filled_lane]; + if (lanes_needed == 1) + stop = 1; + + while (stop == 0) { + src = abs(rand()) % (num_switches); + dest = abs(rand()) % (num_switches); + + while (virtual_location[src][dest][max_filled_lane] != 1) { + start = dest; + if (dest == num_switches - 1) + dest = 0; + else + dest++; + + while (dest != start + && virtual_location[src][dest][max_filled_lane] + != 1) { + if (dest == num_switches - 1) + dest = 0; + else + dest++; + } + + if (virtual_location[src][dest][max_filled_lane] != 1) { + if (src == num_switches - 1) + src = 0; + else + src++; + } + } + + if (generate_cdg_for_sp(p_lash, src, dest, min_filled_lane) || + generate_cdg_for_sp(p_lash, dest, src, min_filled_lane)) + return -1; + + output_link = p_lash->switches[src]->routing_table[dest].out_link; + next_switch = get_next_switch(p_lash, src, output_link); + + output_link2 = p_lash->switches[dest]->routing_table[src].out_link; + next_switch2 = get_next_switch(p_lash, dest, output_link2); + + CL_ASSERT(cdg_vertex_matrix[min_filled_lane][src][next_switch] != NULL); + CL_ASSERT(cdg_vertex_matrix[min_filled_lane][dest][next_switch2] != NULL); + + cycle_found = + cycle_exists(cdg_vertex_matrix[min_filled_lane][src][next_switch], NULL, NULL, + 1); + cycle_found2 = + cycle_exists(cdg_vertex_matrix[min_filled_lane][dest][next_switch2], NULL, NULL, + 1); + + for (i = 0; i < num_switches; i++) + for (j = 0; j < num_switches; j++) + if (cdg_vertex_matrix[min_filled_lane][i][j] != NULL) { + cdg_vertex_matrix[min_filled_lane][i][j]->visiting_number = + 0; + cdg_vertex_matrix[min_filled_lane][i][j]->seen = 0; + } + + if (cycle_found == 1 || cycle_found2 == 1) { + remove_temp_depend_for_sp(p_lash, src, dest, min_filled_lane); + remove_temp_depend_for_sp(p_lash, dest, src, min_filled_lane); + + virtual_location[src][dest][max_filled_lane] = 2; + virtual_location[dest][src][max_filled_lane] = 2; + trials--; + trials--; + } else { + set_temp_depend_to_permanent_for_sp(p_lash, src, dest, min_filled_lane); + set_temp_depend_to_permanent_for_sp(p_lash, dest, src, min_filled_lane); + + num_mst_in_lane[max_filled_lane]--; + num_mst_in_lane[max_filled_lane]--; + num_mst_in_lane[min_filled_lane]++; + num_mst_in_lane[min_filled_lane]++; + + remove_semipermanent_depend_for_sp(p_lash, src, dest, max_filled_lane); + remove_semipermanent_depend_for_sp(p_lash, dest, src, max_filled_lane); + virtual_location[src][dest][max_filled_lane] = 0; + virtual_location[dest][src][max_filled_lane] = 0; + virtual_location[src][dest][min_filled_lane] = 1; + virtual_location[dest][src][min_filled_lane] = 1; + p_lash->switches[src]->routing_table[dest].lane = min_filled_lane + start_vl; + p_lash->switches[dest]->routing_table[src].lane = min_filled_lane + start_vl; + } + + if (trials == 0) + stop = 1; + else { + if (num_mst_in_lane[max_filled_lane] - num_mst_in_lane[min_filled_lane] < + p_lash->balance_limit) + stop = 1; + } + + old_min_filled_lane = min_filled_lane; + old_max_filled_lane = max_filled_lane; + + new_num_min_lane = MAX_INT; + new_num_max_lane = 0; + + for (i = 0; i < lanes_needed; i++) { + + if (num_mst_in_lane[i] < new_num_min_lane) { + new_num_min_lane = num_mst_in_lane[i]; + min_filled_lane = i; + } + + if (num_mst_in_lane[i] > new_num_max_lane) { + new_num_max_lane = num_mst_in_lane[i]; + max_filled_lane = i; + } + } + + if (old_min_filled_lane != min_filled_lane) { + trials = num_mst_in_lane[max_filled_lane]; + for (i = 0; i < num_switches; i++) + for (j = 0; j < num_switches; j++) + if (virtual_location[i][j][max_filled_lane] == 2) + virtual_location[i][j][max_filled_lane] = 1; + } + + if (old_max_filled_lane != max_filled_lane) { + trials = num_mst_in_lane[max_filled_lane]; + for (i = 0; i < num_switches; i++) + for (j = 0; j < num_switches; j++) + if (virtual_location[i][j][old_max_filled_lane] == 2) + virtual_location[i][j][old_max_filled_lane] = 1; + } + } + return 0; +} + +static switch_t *switch_create(lash_t * p_lash, unsigned id, osm_switch_t * p_sw) +{ + unsigned num_switches = p_lash->num_switches; + unsigned num_ports = p_sw->num_ports; + switch_t *sw; + unsigned int i; + + sw = malloc(sizeof(*sw) + num_switches * sizeof(sw->routing_table[0])); + if (!sw) + return NULL; + + memset(sw, 0, sizeof(*sw)); + for (i = 0; i < num_switches; i++) { + sw->routing_table[i].out_link = NONE; + sw->routing_table[i].lane = NONE; + } + + sw->id = id; + sw->dij_channels = malloc(num_ports * sizeof(int)); + if (!sw->dij_channels) { + free(sw); + return NULL; + } + + sw->p_sw = p_sw; + p_sw->priv = sw; + + if (osm_mesh_node_create(p_lash, sw)) { + free(sw->dij_channels); + free(sw); + return NULL; + } + + return sw; +} + +static void switch_delete(lash_t *p_lash, switch_t * sw) +{ + if (sw->dij_channels) + free(sw->dij_channels); + free(sw); +} + +static void delete_mesh_switches(lash_t *p_lash) +{ + if (p_lash->switches) { + unsigned id; + for (id = 0; ((int)id) < p_lash->num_switches; id++) + if (p_lash->switches[id]) + osm_mesh_node_delete(p_lash, + p_lash->switches[id]); + } +} + +static void free_lash_structures(lash_t * p_lash) +{ + unsigned int i, j, k; + unsigned num_switches = p_lash->num_switches; + osm_log_t *p_log = &p_lash->p_osm->log; + + OSM_LOG_ENTER(p_log); + + delete_mesh_switches(p_lash); + + /* free cdg_vertex_matrix */ + for (i = 0; i < p_lash->vl_min; i++) { + for (j = 0; j < num_switches; j++) { + for (k = 0; k < num_switches; k++) + if (p_lash->cdg_vertex_matrix[i][j][k]) + free(p_lash->cdg_vertex_matrix[i][j][k]); + if (p_lash->cdg_vertex_matrix[i][j]) + free(p_lash->cdg_vertex_matrix[i][j]); + } + if (p_lash->cdg_vertex_matrix[i]) + free(p_lash->cdg_vertex_matrix[i]); + } + + if (p_lash->cdg_vertex_matrix) + free(p_lash->cdg_vertex_matrix); + + /* free virtual_location */ + for (i = 0; i < num_switches; i++) { + for (j = 0; j < num_switches; j++) { + if (p_lash->virtual_location[i][j]) + free(p_lash->virtual_location[i][j]); + } + if (p_lash->virtual_location[i]) + free(p_lash->virtual_location[i]); + } + if (p_lash->virtual_location) + free(p_lash->virtual_location); + + OSM_LOG_EXIT(p_log); +} + +static int init_lash_structures(lash_t * p_lash) +{ + unsigned vl_min = p_lash->vl_min; + unsigned num_switches = p_lash->num_switches; + osm_log_t *p_log = &p_lash->p_osm->log; + int status = 0; + unsigned int i, j, k; + + OSM_LOG_ENTER(p_log); + + /* initialise cdg_vertex_matrix[num_layers][num_switches][num_switches] */ + p_lash->cdg_vertex_matrix = + (cdg_vertex_t ****) malloc(vl_min * sizeof(cdg_vertex_t ***)); + if (p_lash->cdg_vertex_matrix == NULL) + goto Exit_Mem_Error; + for (i = 0; i < vl_min; i++) { + p_lash->cdg_vertex_matrix[i] = + (cdg_vertex_t ***) malloc(num_switches * + sizeof(cdg_vertex_t **)); + + if (p_lash->cdg_vertex_matrix[i] == NULL) + goto Exit_Mem_Error; + } + + for (i = 0; i < vl_min; i++) { + for (j = 0; j < num_switches; j++) { + p_lash->cdg_vertex_matrix[i][j] = + (cdg_vertex_t **) malloc(num_switches * + sizeof(cdg_vertex_t *)); + if (p_lash->cdg_vertex_matrix[i][j] == NULL) + goto Exit_Mem_Error; + + for (k = 0; k < num_switches; k++) + p_lash->cdg_vertex_matrix[i][j][k] = NULL; + } + } + + /* + * initialise virtual_location[num_switches][num_switches][num_layers], + * default value = 0 + */ + p_lash->virtual_location = + (int ***)malloc(num_switches * sizeof(int ***)); + if (p_lash->virtual_location == NULL) + goto Exit_Mem_Error; + + for (i = 0; i < num_switches; i++) { + p_lash->virtual_location[i] = + (int **)malloc(num_switches * sizeof(int **)); + if (p_lash->virtual_location[i] == NULL) + goto Exit_Mem_Error; + } + + for (i = 0; i < num_switches; i++) { + for (j = 0; j < num_switches; j++) { + p_lash->virtual_location[i][j] = + (int *)malloc(vl_min * sizeof(int *)); + if (p_lash->virtual_location[i][j] == NULL) + goto Exit_Mem_Error; + for (k = 0; k < vl_min; k++) + p_lash->virtual_location[i][j][k] = 0; + } + } + + /* initialise num_mst_in_lane[num_switches], default 0 */ + memset(p_lash->num_mst_in_lane, 0, + IB_MAX_NUM_VLS * sizeof(p_lash->num_mst_in_lane[0])); + + goto Exit; + +Exit_Mem_Error: + status = -1; + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D01: " + "Could not allocate required memory for LASH errno %d, errno %d for lack of memory\n", + errno, ENOMEM); + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +static int lash_core(lash_t * p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + unsigned num_switches = p_lash->num_switches; + switch_t **switches = p_lash->switches; + unsigned lanes_needed = 1; + unsigned int i, j, k, dest_switch = 0; + reachable_dest_t *dests, *idest; + int cycle_found = 0; + unsigned v_lane; + int stop = 0, output_link, i_next_switch; + int output_link2, i_next_switch2; + int cycle_found2 = 0; + int status = -1; + int *switch_bitmap = NULL; /* Bitmap to check if we have processed this pair */ + unsigned start_vl = p_lash->p_osm->subn.opt.lash_start_vl; + + OSM_LOG_ENTER(p_log); + + if (p_lash->p_osm->subn.opt.do_mesh_analysis && osm_do_mesh_analysis(p_lash)) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D05: Mesh analysis failed\n"); + goto Exit; + } + + for (i = 0; i < num_switches; i++) { + + shortest_path(p_lash, i); + if (generate_routing_func_for_mst(p_lash, i, &dests)) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D06: " + "generate_routing_func_for_mst failed\n"); + goto Exit; + } + + idest = dests; + while (idest != NULL) { + dests = dests->next; + free(idest); + idest = dests; + } + + for (j = 0; j < num_switches; j++) { + switches[j]->used_channels = 0; + switches[j]->q_state = UNQUEUED; + } + } + + switch_bitmap = calloc(num_switches * num_switches, sizeof(int)); + if (!switch_bitmap) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D04: " + "Failed allocating switch_bitmap - out of memory\n"); + goto Exit; + } + + for (i = 0; i < num_switches; i++) { + for (dest_switch = 0; dest_switch < num_switches; dest_switch++) + if (dest_switch != i && switch_bitmap[i * num_switches + dest_switch] == 0) { + v_lane = 0; + stop = 0; + while (v_lane < lanes_needed && stop == 0) { + if (generate_cdg_for_sp(p_lash, i, dest_switch, v_lane) || + generate_cdg_for_sp(p_lash, dest_switch, i, v_lane)) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 4D07: generate_cdg_for_sp failed\n"); + goto Exit; + } + + output_link = + switches[i]->routing_table[dest_switch].out_link; + output_link2 = + switches[dest_switch]->routing_table[i].out_link; + + i_next_switch = get_next_switch(p_lash, i, output_link); + i_next_switch2 = get_next_switch(p_lash, dest_switch, output_link2); + + CL_ASSERT(p_lash-> + cdg_vertex_matrix[v_lane][i][i_next_switch] != + NULL); + CL_ASSERT(p_lash-> + cdg_vertex_matrix[v_lane][dest_switch] + [i_next_switch2] != NULL); + + cycle_found = + cycle_exists(p_lash-> + cdg_vertex_matrix[v_lane][i] + [i_next_switch], NULL, NULL, 1); + cycle_found2 = + cycle_exists(p_lash-> + cdg_vertex_matrix[v_lane][dest_switch] + [i_next_switch2], NULL, NULL, 1); + + for (j = 0; j < num_switches; j++) + for (k = 0; k < num_switches; k++) + if (p_lash-> + cdg_vertex_matrix[v_lane][j][k] != + NULL) { + p_lash-> + cdg_vertex_matrix[v_lane][j] + [k]->visiting_number = 0; + p_lash-> + cdg_vertex_matrix[v_lane][j] + [k]->seen = 0; + } + + if (cycle_found == 1 || cycle_found2 == 1) { + remove_temp_depend_for_sp(p_lash, i, dest_switch, + v_lane); + remove_temp_depend_for_sp(p_lash, dest_switch, i, + v_lane); + v_lane++; + } else { + set_temp_depend_to_permanent_for_sp(p_lash, i, + dest_switch, + v_lane); + set_temp_depend_to_permanent_for_sp(p_lash, + dest_switch, i, + v_lane); + stop = 1; + p_lash->num_mst_in_lane[v_lane]++; + p_lash->num_mst_in_lane[v_lane]++; + } + } + + switches[i]->routing_table[dest_switch].lane = v_lane + start_vl; + switches[dest_switch]->routing_table[i].lane = v_lane + start_vl; + + if (cycle_found == 1 || cycle_found2 == 1) { + if (++lanes_needed > p_lash->vl_min) + goto Error_Not_Enough_Lanes; + + if (generate_cdg_for_sp(p_lash, i, dest_switch, v_lane) || + generate_cdg_for_sp(p_lash, dest_switch, i, v_lane)) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "ERR 4D08: generate_cdg_for_sp failed\n"); + goto Exit; + } + + set_temp_depend_to_permanent_for_sp(p_lash, i, dest_switch, + v_lane); + set_temp_depend_to_permanent_for_sp(p_lash, dest_switch, i, + v_lane); + + p_lash->num_mst_in_lane[v_lane]++; + p_lash->num_mst_in_lane[v_lane]++; + } + p_lash->virtual_location[i][dest_switch][v_lane] = 1; + p_lash->virtual_location[dest_switch][i][v_lane] = 1; + + switch_bitmap[i * num_switches + dest_switch] = 1; + switch_bitmap[dest_switch * num_switches + i] = 1; + } + } + + for (i = 0; i < lanes_needed; i++) + OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n", + i, p_lash->num_mst_in_lane[i]); + + OSM_LOG(p_log, OSM_LOG_INFO, + "Lanes needed: %d, Balancing\n", lanes_needed); + + if (balance_virtual_lanes(p_lash, lanes_needed)) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D09: Balancing failed\n"); + goto Exit; + } + + for (i = 0; i < lanes_needed; i++) + OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n", + i, p_lash->num_mst_in_lane[i]); + + status = 0; + goto Exit; + +Error_Not_Enough_Lanes: + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: " + "Lane requirements (%d) exceed available lanes (%d)" + " with starting lane (%d)\n", + lanes_needed, p_lash->vl_min, start_vl); +Exit: + if (switch_bitmap) + free(switch_bitmap); + OSM_LOG_EXIT(p_log); + return status; +} + +static unsigned get_lash_id(osm_switch_t * p_sw) +{ + return ((switch_t *) p_sw->priv)->id; +} + +static int get_next_port(switch_t *sw, int link) +{ + link_t *l = sw->node->links[link]; + int port = l->next_port++; + + /* + * note if not doing mesh analysis + * then num_ports is always 1 + */ + if (l->next_port >= l->num_ports) + l->next_port = 0; + + return l->ports[port]; +} + +static void populate_fwd_tbls(lash_t * p_lash) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + osm_subn_t *p_subn = &p_lash->p_osm->subn; + osm_switch_t *p_sw, *p_next_sw, *p_dst_sw; + osm_port_t *port; + uint16_t max_lid_ho, lid; + + OSM_LOG_ENTER(p_log); + + p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); + + /* Go through each switch individually */ + while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) { + uint64_t current_guid; + switch_t *sw; + p_sw = p_next_sw; + p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + + max_lid_ho = p_sw->max_lid_ho; + current_guid = p_sw->p_node->node_info.port_guid; + sw = p_sw->priv; + + memset(p_sw->new_lft, OSM_NO_PATH, p_sw->lft_size); + + for (lid = 1; lid <= max_lid_ho; lid++) { + port = osm_get_port_by_lid_ho(p_subn, lid); + if (!port) + continue; + + p_dst_sw = get_osm_switch_from_port(port); + if (p_dst_sw == p_sw) { + uint8_t egress_port = port->p_node->sw ? 0 : + port->p_physp->p_remote_physp->port_num; + p_sw->new_lft[lid] = egress_port; + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "LASH fwd MY SRC SRC GUID 0x%016" PRIx64 + " src lash id (%d), src lid no (%u) src lash port (%d) " + "DST GUID 0x%016" PRIx64 + " src lash id (%d), src lash port (%d)\n", + cl_ntoh64(current_guid), -1, lid, + egress_port, cl_ntoh64(current_guid), + -1, egress_port); + } else if (p_dst_sw) { + unsigned dst_lash_switch_id = + get_lash_id(p_dst_sw); + uint8_t lash_egress_port = + (uint8_t) sw-> + routing_table[dst_lash_switch_id].out_link; + uint8_t physical_egress_port = + get_next_port(sw, lash_egress_port); + + p_sw->new_lft[lid] = physical_egress_port; + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "LASH fwd SRC GUID 0x%016" PRIx64 + " src lash id (%d), " + "src lid no (%u) src lash port (%d) " + "DST GUID 0x%016" PRIx64 + " src lash id (%d), src lash port (%d)\n", + cl_ntoh64(current_guid), sw->id, lid, + lash_egress_port, + cl_ntoh64(p_dst_sw->p_node->node_info. + port_guid), + dst_lash_switch_id, + physical_egress_port); + } + } /* for */ + } + OSM_LOG_EXIT(p_log); +} + +static void osm_lash_process_switch(lash_t * p_lash, osm_switch_t * p_sw) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i, port_count; + osm_physp_t *p_current_physp, *p_remote_physp; + unsigned switch_a_lash_id, switch_b_lash_id; + + OSM_LOG_ENTER(p_log); + + switch_a_lash_id = get_lash_id(p_sw); + port_count = osm_node_get_num_physp(p_sw->p_node); + + /* starting at port 1, ignoring management port on switch */ + for (i = 1; i < port_count; i++) { + + p_current_physp = osm_node_get_physp_ptr(p_sw->p_node, i); + if (p_current_physp) { + p_remote_physp = p_current_physp->p_remote_physp; + if (p_remote_physp && p_remote_physp->p_node->sw) { + int physical_port_a_num = + osm_physp_get_port_num(p_current_physp); + int physical_port_b_num = + osm_physp_get_port_num(p_remote_physp); + switch_b_lash_id = + get_lash_id(p_remote_physp->p_node->sw); + + connect_switches(p_lash, switch_a_lash_id, + switch_b_lash_id, + physical_port_a_num); + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "LASH SUCCESS connected G 0x%016" PRIx64 + " , lash_id(%u), P(%u) " " to G 0x%016" + PRIx64 " , lash_id(%u) , P(%u)\n", + cl_ntoh64(osm_physp_get_port_guid + (p_current_physp)), + switch_a_lash_id, physical_port_a_num, + cl_ntoh64(osm_physp_get_port_guid + (p_remote_physp)), + switch_b_lash_id, physical_port_b_num); + } + } + } + + OSM_LOG_EXIT(p_log); +} + +static void lash_cleanup(lash_t * p_lash) +{ + osm_subn_t *p_subn = &p_lash->p_osm->subn; + osm_switch_t *p_next_sw, *p_sw; + + /* drop any existing references to old lash switches */ + p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); + while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + p_sw->priv = NULL; + } + + if (p_lash->switches) { + unsigned id; + for (id = 0; ((int)id) < p_lash->num_switches; id++) + if (p_lash->switches[id]) + switch_delete(p_lash, p_lash->switches[id]); + free(p_lash->switches); + } + p_lash->switches = NULL; +} + +/* + static int discover_network_properties() + Traverse the topology of the network in order to determine + - the maximum number of switches, + - the minimum number of virtual layers +*/ + +static int discover_network_properties(lash_t * p_lash) +{ + int i, id = 0; + uint8_t vl_min; + osm_subn_t *p_subn = &p_lash->p_osm->subn; + osm_switch_t *p_next_sw, *p_sw; + osm_log_t *p_log = &p_lash->p_osm->log; + + p_lash->num_switches = cl_qmap_count(&p_subn->sw_guid_tbl); + + p_lash->switches = calloc(p_lash->num_switches, sizeof(switch_t *)); + if (!p_lash->switches) + return -1; + + vl_min = 5; /* set to a high value */ + + p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); + while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) { + uint16_t port_count; + p_sw = p_next_sw; + p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + + p_lash->switches[id] = switch_create(p_lash, id, p_sw); + if (!p_lash->switches[id]) + return -1; + id++; + + port_count = osm_node_get_num_physp(p_sw->p_node); + + /* Note, ignoring port 0. management port */ + for (i = 1; i < port_count; i++) { + osm_physp_t *p_current_physp = + osm_node_get_physp_ptr(p_sw->p_node, i); + + if (p_current_physp + && p_current_physp->p_remote_physp) { + + ib_port_info_t *p_port_info = + &p_current_physp->port_info; + uint8_t port_vl_min = + ib_port_info_get_op_vls(p_port_info); + if (port_vl_min && port_vl_min < vl_min) + vl_min = port_vl_min; + } + } /* for */ + } /* while */ + + vl_min = 1 << (vl_min - 1); + if (vl_min > 15) + vl_min = 15; + + if (p_lash->p_osm->subn.opt.lash_start_vl >= vl_min) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D03: " + "Start VL(%d) too high for min operational vl(%d)\n", + p_lash->p_osm->subn.opt.lash_start_vl, vl_min); + return -1; + } + + p_lash->vl_min = vl_min - p_lash->p_osm->subn.opt.lash_start_vl; + + OSM_LOG(p_log, OSM_LOG_INFO, + "min operational vl(%d) start vl(%d) max_switches(%d)\n", + p_lash->vl_min, p_lash->p_osm->subn.opt.lash_start_vl, + p_lash->num_switches); + return 0; +} + +static void process_switches(lash_t * p_lash) +{ + osm_switch_t *p_sw, *p_next_sw; + osm_subn_t *p_subn = &p_lash->p_osm->subn; + + /* Go through each switch and process it. i.e build the connection + structure required by LASH */ + p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); + while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl)) { + p_sw = p_next_sw; + p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); + + osm_lash_process_switch(p_lash, p_sw); + } +} + +static int lash_process(void *context) +{ + lash_t *p_lash = context; + osm_log_t *p_log = &p_lash->p_osm->log; + int status = 0; + + OSM_LOG_ENTER(p_log); + + p_lash->balance_limit = 6; + + /* everything starts here */ + lash_cleanup(p_lash); + + status = discover_network_properties(p_lash); + if (status) + goto Exit; + + status = init_lash_structures(p_lash); + if (status) + goto Exit; + + process_switches(p_lash); + + status = lash_core(p_lash); + if (status) + goto Exit; + + populate_fwd_tbls(p_lash); + +Exit: + if (p_lash->vl_min) + free_lash_structures(p_lash); + OSM_LOG_EXIT(p_log); + + return status; +} + +static lash_t *lash_create(osm_opensm_t * p_osm) +{ + lash_t *p_lash; + + p_lash = calloc(1, sizeof(lash_t)); + if (!p_lash) + return NULL; + + p_lash->p_osm = p_osm; + + return p_lash; +} + +static void lash_delete(void *context) +{ + lash_t *p_lash = context; + + if (p_lash->switches) { + unsigned id; + for (id = 0; ((int)id) < p_lash->num_switches; id++) + if (p_lash->switches[id]) + switch_delete(p_lash, p_lash->switches[id]); + free(p_lash->switches); + } + + free(p_lash); +} + +static uint8_t get_lash_sl(void *context, uint8_t path_sl_hint, + const ib_net16_t slid, const ib_net16_t dlid) +{ + unsigned dst_id; + unsigned src_id; + osm_port_t *p_src_port, *p_dst_port; + osm_switch_t *p_sw; + lash_t *p_lash = context; + osm_opensm_t *p_osm = p_lash->p_osm; + + if (!(p_osm->routing_engine_used && + p_osm->routing_engine_used->type == OSM_ROUTING_ENGINE_TYPE_LASH)) + return OSM_DEFAULT_SL; + + p_src_port = osm_get_port_by_lid(&p_osm->subn, slid); + if (!p_src_port) + return OSM_DEFAULT_SL; + + p_dst_port = osm_get_port_by_lid(&p_osm->subn, dlid); + if (!p_dst_port) + return OSM_DEFAULT_SL; + + p_sw = get_osm_switch_from_port(p_dst_port); + if (!p_sw || !p_sw->priv) + return OSM_DEFAULT_SL; + dst_id = get_lash_id(p_sw); + + p_sw = get_osm_switch_from_port(p_src_port); + if (!p_sw || !p_sw->priv) + return OSM_DEFAULT_SL; + + src_id = get_lash_id(p_sw); + if (src_id == dst_id) + return p_osm->subn.opt.lash_start_vl; + + return (uint8_t) ((switch_t *) p_sw->priv)->routing_table[dst_id].lane; +} + +int osm_ucast_lash_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm) +{ + lash_t *p_lash = lash_create(p_osm); + if (!p_lash) + return -1; + + r->context = p_lash; + r->ucast_build_fwd_tables = lash_process; + r->path_sl = get_lash_sl; + r->destroy = lash_delete; + + return 0; +} diff --git a/opensm/osm_ucast_mgr.c b/opensm/osm_ucast_mgr.c new file mode 100644 index 0000000..af667a1 --- /dev/null +++ b/opensm/osm_ucast_mgr.c @@ -0,0 +1,1196 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_ucast_mgr_t. + * This file implements the Unicast Manager object. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_MGR_C +#include +#include +#include +#include +#include +#include +#include +#include + +void osm_ucast_mgr_construct(IN osm_ucast_mgr_t * p_mgr) +{ + memset(p_mgr, 0, sizeof(*p_mgr)); +} + +void osm_ucast_mgr_destroy(IN osm_ucast_mgr_t * p_mgr) +{ + CL_ASSERT(p_mgr); + + OSM_LOG_ENTER(p_mgr->p_log); + + if (p_mgr->cache_valid) + osm_ucast_cache_invalidate(p_mgr); + + OSM_LOG_EXIT(p_mgr->p_log); +} + +ib_api_status_t osm_ucast_mgr_init(IN osm_ucast_mgr_t * p_mgr, IN osm_sm_t * sm) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(sm->p_log); + + osm_ucast_mgr_construct(p_mgr); + + p_mgr->sm = sm; + p_mgr->p_log = sm->p_log; + p_mgr->p_subn = sm->p_subn; + p_mgr->p_lock = sm->p_lock; + + if (sm->p_subn->opt.use_ucast_cache) + cl_qmap_init(&p_mgr->cache_sw_tbl); + + OSM_LOG_EXIT(p_mgr->p_log); + return status; +} + +/********************************************************************** + Add each switch's own and neighbor LIDs to its LID matrix +**********************************************************************/ +static void ucast_mgr_process_hop_0_1(IN cl_map_item_t * p_map_item, + IN void *context) +{ + osm_switch_t * p_sw = (osm_switch_t *) p_map_item; + osm_node_t *p_remote_node; + uint16_t lid, remote_lid; + uint8_t i; + + lid = cl_ntoh16(osm_node_get_base_lid(p_sw->p_node, 0)); + osm_switch_set_hops(p_sw, lid, 0, 0); + + for (i = 1; i < p_sw->num_ports; i++) { + osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, i); + p_remote_node = (p && p->p_remote_physp) ? + p->p_remote_physp->p_node : NULL; + + if (p_remote_node && p_remote_node->sw && + p_remote_node != p_sw->p_node) { + remote_lid = osm_node_get_base_lid(p_remote_node, 0); + remote_lid = cl_ntoh16(remote_lid); + osm_switch_set_hops(p_sw, remote_lid, i, p->hop_wf); + } + } +} + +static void ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * p_mgr, + IN osm_switch_t * p_this_sw, + IN osm_switch_t * p_remote_sw, + IN uint8_t port_num, + IN uint8_t remote_port_num) +{ + osm_switch_t *p_sw; + cl_map_item_t *item; + uint16_t lid_ho; + uint16_t hops; + osm_physp_t *p; + + OSM_LOG_ENTER(p_mgr->p_log); + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Node 0x%" PRIx64 ", remote node 0x%" PRIx64 + ", port %u, remote port %u\n", + cl_ntoh64(osm_node_get_node_guid(p_this_sw->p_node)), + cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)), + port_num, remote_port_num); + + p = osm_node_get_physp_ptr(p_this_sw->p_node, port_num); + + for (item = cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl); + item != cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *) item; + lid_ho = cl_ntoh16(osm_node_get_base_lid(p_sw->p_node, 0)); + hops = osm_switch_get_least_hops(p_remote_sw, lid_ho); + if (hops == OSM_NO_PATH) + continue; + hops += p->hop_wf; + if (hops < + osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) { + if (osm_switch_set_hops + (p_this_sw, lid_ho, port_num, (uint8_t) hops) != 0) + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A03: " + "cannot set hops for lid %u at switch 0x%" + PRIx64 "\n", lid_ho, + cl_ntoh64(osm_node_get_node_guid + (p_this_sw->p_node))); + p_mgr->some_hop_count_set = TRUE; + } + } + + OSM_LOG_EXIT(p_mgr->p_log); +} + +static struct osm_remote_node *find_and_add_remote_sys(osm_switch_t * sw, + uint8_t port, + boolean_t dor, struct + osm_remote_guids_count + *r) +{ + unsigned i; + osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, port); + osm_node_t *node = p->p_remote_physp->p_node; + uint8_t rem_port = osm_physp_get_port_num(p->p_remote_physp); + + for (i = 0; i < r->count; i++) + if (r->guids[i].node == node) + if (!dor || (r->guids[i].port == rem_port)) + return &r->guids[i]; + + r->guids[i].node = node; + r->guids[i].forwarded_to = 0; + r->guids[i].port = rem_port; + r->count++; + return &r->guids[i]; +} + +static void ucast_mgr_process_port(IN osm_ucast_mgr_t * p_mgr, + IN osm_switch_t * p_sw, + IN osm_port_t * p_port, + IN unsigned lid_offset) +{ + uint16_t min_lid_ho; + uint16_t max_lid_ho; + uint16_t lid_ho; + uint8_t port; + boolean_t is_ignored_by_port_prof; + ib_net64_t node_guid; + unsigned start_from = 1; + + OSM_LOG_ENTER(p_mgr->p_log); + + osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); + + /* If the lids are zero - then there was some problem with + * the initialization. Don't handle this port. */ + if (min_lid_ho == 0 || max_lid_ho == 0) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A04: " + "Port 0x%" PRIx64 " (%s port %d) has LID 0. An " + "initialization error occurred. Ignoring port\n", + cl_ntoh64(osm_port_get_guid(p_port)), + p_port->p_node->print_desc, + p_port->p_physp->port_num); + goto Exit; + } + + lid_ho = min_lid_ho + lid_offset; + + if (lid_ho > max_lid_ho) + goto Exit; + + if (lid_offset && !p_mgr->is_dor) + /* ignore potential overflow - it is handled in osm_switch.c */ + start_from = + osm_switch_get_port_by_lid(p_sw, lid_ho - 1, OSM_NEW_LFT) + 1; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Processing port 0x%" PRIx64 + " (\'%s\' port %u), LID %u [%u,%u]\n", + cl_ntoh64(osm_port_get_guid(p_port)), + p_port->p_node->print_desc, p_port->p_physp->port_num, lid_ho, + min_lid_ho, max_lid_ho); + + /* TODO - This should be runtime error, not a CL_ASSERT() */ + CL_ASSERT(max_lid_ho <= IB_LID_UCAST_END_HO); + + node_guid = osm_node_get_node_guid(p_sw->p_node); + + /* + The lid matrix contains the number of hops to each + lid from each port. From this information we determine + how best to distribute the LID range across the ports + that can reach those LIDs. + */ + port = osm_switch_recommend_path(p_sw, p_port, lid_ho, start_from, + p_mgr->p_subn->ignore_existing_lfts, + p_mgr->p_subn->opt.lmc, + p_mgr->is_dor, + p_mgr->p_subn->opt.port_shifting, + !lid_offset && p_port->use_scatter, + OSM_LFT); + + if (port == OSM_NO_PATH) { + /* do not try to overwrite the ppro of non existing port ... */ + is_ignored_by_port_prof = TRUE; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "No path to get to LID %u from switch 0x%" PRIx64 "\n", + lid_ho, cl_ntoh64(node_guid)); + } else { + osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, port); + if (!p) + goto Exit; + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Routing LID %u to port %u for switch 0x%" PRIx64 "\n", + lid_ho, port, cl_ntoh64(node_guid)); + + /* + we would like to optionally ignore this port in equalization + as in the case of the Mellanox Anafa Internal PCI TCA port + */ + is_ignored_by_port_prof = p->is_prof_ignored; + + /* + We also would ignore this route if the target lid is of + a switch and the port_profile_switch_node is not TRUE + */ + if (!p_mgr->p_subn->opt.port_profile_switch_nodes) + is_ignored_by_port_prof |= + (osm_node_get_type(p_port->p_node) == + IB_NODE_TYPE_SWITCH); + } + + /* + We have selected the port for this LID. + Write it to the forwarding tables. + */ + p_sw->new_lft[lid_ho] = port; + if (!is_ignored_by_port_prof) { + struct osm_remote_node *rem_node_used; + osm_switch_count_path(p_sw, port); + if (port > 0 && p_port->priv && + (rem_node_used = find_and_add_remote_sys(p_sw, port, + p_mgr->is_dor, + p_port->priv))) + rem_node_used->forwarded_to++; + } + +Exit: + OSM_LOG_EXIT(p_mgr->p_log); +} + +static void alloc_ports_priv(osm_ucast_mgr_t * mgr) +{ + cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl; + struct osm_remote_guids_count *r; + osm_port_t *port; + cl_map_item_t *item; + unsigned lmc; + + for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl); + item = cl_qmap_next(item)) { + port = (osm_port_t *) item; + lmc = ib_port_info_get_lmc(&port->p_physp->port_info); + r = malloc(sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc)); + if (!r) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, "ERR 3A09: " + "cannot allocate memory to track remote" + " systems for lmc > 0\n"); + port->priv = NULL; + continue; + } + memset(r, 0, sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc)); + port->priv = r; + } +} + +static void free_ports_priv(osm_ucast_mgr_t * mgr) +{ + cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl; + osm_port_t *port; + cl_map_item_t *item; + for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl); + item = cl_qmap_next(item)) { + port = (osm_port_t *) item; + if (port->priv) { + free(port->priv); + port->priv = NULL; + } + } +} + +static void ucast_mgr_process_tbl(IN cl_map_item_t * p_map_item, + IN void *context) +{ + osm_ucast_mgr_t *p_mgr = context; + osm_switch_t * p_sw = (osm_switch_t *) p_map_item; + unsigned i, lids_per_port; + + OSM_LOG_ENTER(p_mgr->p_log); + + CL_ASSERT(p_sw && p_sw->p_node); + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Processing switch 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); + + /* Initialize LIDs in buffer to invalid port number. */ + memset(p_sw->new_lft, OSM_NO_PATH, p_sw->max_lid_ho + 1); + + alloc_ports_priv(p_mgr); + + /* + Iterate through every port setting LID routes for each + port based on base LID and LMC value. + */ + lids_per_port = 1 << p_mgr->p_subn->opt.lmc; + for (i = 0; i < lids_per_port; i++) { + cl_qlist_t *list = &p_mgr->port_order_list; + cl_list_item_t *item; + for (item = cl_qlist_head(list); item != cl_qlist_end(list); + item = cl_qlist_next(item)) { + osm_port_t *port = cl_item_obj(item, port, list_item); + ucast_mgr_process_port(p_mgr, p_sw, port, i); + } + } + + free_ports_priv(p_mgr); + + OSM_LOG_EXIT(p_mgr->p_log); +} + +static void ucast_mgr_process_neighbors(IN cl_map_item_t * p_map_item, + IN void *context) +{ + osm_switch_t * p_sw = (osm_switch_t *) p_map_item; + osm_ucast_mgr_t * p_mgr = context; + osm_node_t *p_node; + osm_node_t *p_remote_node; + uint32_t port_num; + uint8_t remote_port_num; + uint32_t num_ports; + osm_physp_t *p_physp; + + OSM_LOG_ENTER(p_mgr->p_log); + + p_node = p_sw->p_node; + + CL_ASSERT(p_node); + CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Processing switch with GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_node))); + + num_ports = osm_node_get_num_physp(p_node); + + /* + Start with port 1 to skip the switch's management port. + */ + for (port_num = 1; port_num < num_ports; port_num++) { + p_remote_node = osm_node_get_remote_node(p_node, + (uint8_t) port_num, + &remote_port_num); + if (p_remote_node && p_remote_node->sw + && (p_remote_node != p_node)) { + /* make sure the link is healthy. If it is not - don't + propagate through it. */ + p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp || !osm_link_is_healthy(p_physp)) + continue; + + ucast_mgr_process_neighbor(p_mgr, p_sw, + p_remote_node->sw, + (uint8_t) port_num, + remote_port_num); + } + } + + OSM_LOG_EXIT(p_mgr->p_log); +} + +static int set_hop_wf(void *ctx, uint64_t guid, char *p) +{ + osm_ucast_mgr_t *m = ctx; + osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid)); + osm_physp_t *physp; + unsigned port, hop_wf; + char *e; + + if (!node || !node->sw) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "switch with guid 0x%016" PRIx64 " is not found\n", + guid); + return 0; + } + + if (!p || !*p || !(port = strtoul(p, &e, 0)) || (p == e) || + port >= node->sw->num_ports) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "bad port specified for guid 0x%016" PRIx64 "\n", guid); + return 0; + } + + p = e + 1; + + if (!*p || !(hop_wf = strtoul(p, &e, 0)) || p == e || hop_wf >= 0x100) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "bad hop weight factor specified for guid 0x%016" PRIx64 + "port %u\n", guid, port); + return 0; + } + + physp = osm_node_get_physp_ptr(node, port); + if (!physp) + return 0; + + physp->hop_wf = hop_wf; + + return 0; +} + +static void set_default_hop_wf(cl_map_item_t * p_map_item, void *ctx) +{ + osm_switch_t *sw = (osm_switch_t *) p_map_item; + int i; + + for (i = 1; i < sw->num_ports; i++) { + osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i); + if (p) + p->hop_wf = 1; + } +} + +static int set_search_ordering_ports(void *ctx, uint64_t guid, char *p) +{ + osm_subn_t *p_subn = ctx; + osm_node_t *node = osm_get_node_by_guid(p_subn, cl_hton64(guid)); + osm_switch_t *sw; + uint8_t *search_ordering_ports = NULL; + uint8_t port; + unsigned int *ports = NULL; + const int bpw = sizeof(*ports)*8; + int words; + int i = 1; /* port 0 maps to port 0 */ + + if (!node || !(sw = node->sw)) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE, + "switch with guid 0x%016" PRIx64 " is not found\n", + guid); + return 0; + } + + if (sw->search_ordering_ports) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE, + "switch with guid 0x%016" PRIx64 " already listed\n", + guid); + return 0; + } + + search_ordering_ports = malloc(sizeof(*search_ordering_ports)*sw->num_ports); + if (!search_ordering_ports) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, + "ERR 3A07: cannot allocate memory for search_ordering_ports\n"); + return -1; + } + memset(search_ordering_ports, 0, sizeof(*search_ordering_ports)*sw->num_ports); + + /* the ports array is for record keeping of which ports have + * been seen */ + words = (sw->num_ports + bpw - 1)/bpw; + ports = malloc(words*sizeof(*ports)); + if (!ports) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, + "ERR 3A08: cannot allocate memory for ports\n"); + free(search_ordering_ports); + return -1; + } + memset(ports, 0, words*sizeof(*ports)); + + while ((*p != '\0') && (*p != '#')) { + char *e; + + port = strtoul(p, &e, 0); + if ((p == e) || (port == 0) || (port >= sw->num_ports) || + !osm_node_get_physp_ptr(node, port)) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE, + "bad port %d specified for guid 0x%016" PRIx64 "\n", + port, guid); + free(search_ordering_ports); + free(ports); + return 0; + } + + if (ports[port/bpw] & (1u << (port%bpw))) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_VERBOSE, + "port %d already specified for guid 0x%016" PRIx64 "\n", + port, guid); + free(search_ordering_ports); + free(ports); + return 0; + } + + ports[port/bpw] |= (1u << (port%bpw)); + search_ordering_ports[i++] = port; + + p = e; + while (isspace(*p)) { + p++; + } + } + + if (i > 1) { + for (port = 1; port < sw->num_ports; port++) { + /* fill out the rest of the search_ordering_ports array + * in sequence using the remaining unspecified + * ports. + */ + if (!(ports[port/bpw] & (1u << (port%bpw)))) { + search_ordering_ports[i++] = port; + } + } + sw->search_ordering_ports = search_ordering_ports; + } else { + free(search_ordering_ports); + } + + free(ports); + return 0; +} + +int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * p_mgr) +{ + uint32_t i; + uint32_t iteration_max; + cl_qmap_t *p_sw_guid_tbl; + + p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl; + + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, + "Starting switches' Min Hop Table Assignment\n"); + + /* + Set up the weighting factors for the routing. + */ + cl_qmap_apply_func(p_sw_guid_tbl, set_default_hop_wf, NULL); + if (p_mgr->p_subn->opt.hop_weights_file) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Fetching hop weight factor file \'%s\'\n", + p_mgr->p_subn->opt.hop_weights_file); + if (parse_node_map(p_mgr->p_subn->opt.hop_weights_file, + set_hop_wf, p_mgr)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A05: " + "cannot parse hop_weights_file \'%s\'\n", + p_mgr->p_subn->opt.hop_weights_file); + } + } + + /* + Set the switch matrices for each switch's own port 0 LID(s) + then set the lid matrices for the each switch's leaf nodes. + */ + cl_qmap_apply_func(p_sw_guid_tbl, ucast_mgr_process_hop_0_1, p_mgr); + + /* + Get the switch matrices for each switch's neighbors. + This process requires a number of iterations equal to + the number of switches in the subnet minus 1. + + In each iteration, a switch learns the lid/port/hop + information (as contained by a switch's lid matrix) from + its immediate neighbors. After each iteration, a switch + (and it's neighbors) know more routing information than + it did on the previous iteration. + Thus, by repeatedly absorbing the routing information of + neighbor switches, every switch eventually learns how to + route all LIDs on the subnet. + + Note that there may not be any switches in the subnet if + we are in simple p2p configuration. + */ + iteration_max = cl_qmap_count(p_sw_guid_tbl); + + /* + If there are switches in the subnet, iterate until the lid + matrix has been constructed. Otherwise, just immediately + indicate we're done if no switches exist. + */ + if (iteration_max) { + iteration_max--; + + /* + we need to find out when the propagation of + hop counts has relaxed. So this global variable + is preset to 0 on each iteration and if + if non of the switches was set will exit the + while loop + */ + p_mgr->some_hop_count_set = TRUE; + for (i = 0; (i < iteration_max) && p_mgr->some_hop_count_set; + i++) { + p_mgr->some_hop_count_set = FALSE; + cl_qmap_apply_func(p_sw_guid_tbl, + ucast_mgr_process_neighbors, p_mgr); + } + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Min-hop propagated in %d steps\n", i); + } + + return 0; +} + +static int ucast_mgr_setup_all_switches(osm_subn_t * p_subn) +{ + osm_switch_t *p_sw; + uint16_t lids; + + lids = (uint16_t) cl_ptr_vector_get_size(&p_subn->port_lid_tbl); + lids = lids ? lids - 1 : 0; + + for (p_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); + p_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl); + p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) { + if (osm_switch_prepare_path_rebuild(p_sw, lids)) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "ERR 3A0B: " + "cannot setup switch 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid + (p_sw->p_node))); + return -1; + } + if (p_sw->search_ordering_ports) { + free(p_sw->search_ordering_ports); + p_sw->search_ordering_ports = NULL; + } + } + + if (p_subn->opt.port_search_ordering_file) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_DEBUG, + "Fetching dimension ports file \'%s\'\n", + p_subn->opt.port_search_ordering_file); + if (parse_node_map(p_subn->opt.port_search_ordering_file, + set_search_ordering_ports, p_subn)) { + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "ERR 3A0F: " + "cannot parse port_search_ordering_file \'%s\'\n", + p_subn->opt.port_search_ordering_file); + } + } + + return 0; +} + +static int add_guid_to_order_list(void *ctx, uint64_t guid, char *p) +{ + osm_ucast_mgr_t *m = ctx; + osm_port_t *port = osm_get_port_by_guid(m->p_subn, cl_hton64(guid)); + + if (!port) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "port guid not found: 0x%016" PRIx64 "\n", guid); + return 0; + } + + if (port->flag) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "port guid specified multiple times 0x%016" PRIx64 "\n", + guid); + return 0; + } + + cl_qlist_insert_tail(&m->port_order_list, &port->list_item); + port->flag = 1; + port->use_scatter = (m->p_subn->opt.guid_routing_order_no_scatter == TRUE) ? 0 : m->p_subn->opt.scatter_ports; + + return 0; +} + +static void add_port_to_order_list(cl_map_item_t * p_map_item, void *ctx) +{ + osm_port_t *port = (osm_port_t *) p_map_item; + osm_ucast_mgr_t *m = ctx; + + if (!port->flag) { + port->use_scatter = m->p_subn->opt.scatter_ports; + cl_qlist_insert_tail(&m->port_order_list, &port->list_item); + } else + port->flag = 0; +} + +static int mark_ignored_port(void *ctx, uint64_t guid, char *p) +{ + osm_ucast_mgr_t *m = ctx; + osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid)); + osm_physp_t *physp; + unsigned port; + + if (!node || !node->sw) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "switch with guid 0x%016" PRIx64 " is not found\n", + guid); + return 0; + } + + if (!p || !*p || !(port = strtoul(p, NULL, 0)) || + port >= node->sw->num_ports) { + OSM_LOG(m->p_log, OSM_LOG_DEBUG, + "bad port specified for guid 0x%016" PRIx64 "\n", guid); + return 0; + } + + physp = osm_node_get_physp_ptr(node, port); + if (!physp) + return 0; + + physp->is_prof_ignored = 1; + + return 0; +} + +static void clear_prof_ignore_flag(cl_map_item_t * p_map_item, void *ctx) +{ + osm_switch_t *sw = (osm_switch_t *) p_map_item; + int i; + + for (i = 1; i < sw->num_ports; i++) { + osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i); + if (p) + p->is_prof_ignored = 0; + } +} + +static void add_sw_endports_to_order_list(osm_switch_t * sw, + osm_ucast_mgr_t * m) +{ + osm_port_t *port; + osm_physp_t *p; + int i; + + for (i = 1; i < sw->num_ports; i++) { + p = osm_node_get_physp_ptr(sw->p_node, i); + if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw) { + port = osm_get_port_by_guid(m->p_subn, + p->p_remote_physp-> + port_guid); + if (!port || port->flag) + continue; + cl_qlist_insert_tail(&m->port_order_list, + &port->list_item); + port->flag = 1; + port->use_scatter = m->p_subn->opt.scatter_ports; + } + } +} + +static void sw_count_endport_links(osm_switch_t * sw) +{ + osm_physp_t *p; + int i; + + sw->endport_links = 0; + for (i = 1; i < sw->num_ports; i++) { + p = osm_node_get_physp_ptr(sw->p_node, i); + if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw) + sw->endport_links++; + } +} + +static int compar_sw_load(const void *s1, const void *s2) +{ +#define get_sw_endport_links(s) (*(osm_switch_t **)s)->endport_links + return get_sw_endport_links(s2) - get_sw_endport_links(s1); +} + +static void sort_ports_by_switch_load(osm_ucast_mgr_t * m) +{ + int i, num = cl_qmap_count(&m->p_subn->sw_guid_tbl); + void **s = malloc(num * sizeof(*s)); + if (!s) { + OSM_LOG(m->p_log, OSM_LOG_ERROR, "ERR 3A0C: " + "No memory, skip by switch load sorting.\n"); + return; + } + s[0] = cl_qmap_head(&m->p_subn->sw_guid_tbl); + for (i = 1; i < num; i++) + s[i] = cl_qmap_next(s[i - 1]); + + for (i = 0; i < num; i++) + sw_count_endport_links(s[i]); + + qsort(s, num, sizeof(*s), compar_sw_load); + + for (i = 0; i < num; i++) + add_sw_endports_to_order_list(s[i], m); + free(s); +} + +static int ucast_mgr_build_lfts(osm_ucast_mgr_t * p_mgr) +{ + cl_qlist_init(&p_mgr->port_order_list); + + if (p_mgr->p_subn->opt.guid_routing_order_file) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Fetching guid routing order file \'%s\'\n", + p_mgr->p_subn->opt.guid_routing_order_file); + + if (parse_node_map(p_mgr->p_subn->opt.guid_routing_order_file, + add_guid_to_order_list, p_mgr)) + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A0D: " + "cannot parse guid routing order file \'%s\'\n", + p_mgr->p_subn->opt.guid_routing_order_file); + } + sort_ports_by_switch_load(p_mgr); + + if (p_mgr->p_subn->opt.port_prof_ignore_file) { + cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl, + clear_prof_ignore_flag, NULL); + if (parse_node_map(p_mgr->p_subn->opt.port_prof_ignore_file, + mark_ignored_port, p_mgr)) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A0E: " + "cannot parse port prof ignore file \'%s\'\n", + p_mgr->p_subn->opt.port_prof_ignore_file); + } + } + + cl_qmap_apply_func(&p_mgr->p_subn->port_guid_tbl, + add_port_to_order_list, p_mgr); + + cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl, ucast_mgr_process_tbl, + p_mgr); + + cl_qlist_remove_all(&p_mgr->port_order_list); + + return 0; +} + +static void ucast_mgr_set_fwd_top(IN cl_map_item_t * p_map_item, + IN void *cxt) +{ + osm_ucast_mgr_t *p_mgr = cxt; + osm_switch_t * p_sw = (osm_switch_t *) p_map_item; + osm_node_t *p_node; + osm_physp_t *p_physp; + osm_dr_path_t *p_path; + osm_madw_context_t context; + ib_api_status_t status; + ib_switch_info_t si; + boolean_t set_swinfo_require = FALSE; + uint16_t lin_top; + uint8_t life_state; + + CL_ASSERT(p_mgr); + + OSM_LOG_ENTER(p_mgr->p_log); + + CL_ASSERT(p_sw && p_sw->max_lid_ho); + + p_node = p_sw->p_node; + + CL_ASSERT(p_node); + + if (p_mgr->max_lid < p_sw->max_lid_ho) + p_mgr->max_lid = p_sw->max_lid_ho; + + p_physp = osm_node_get_physp_ptr(p_node, 0); + + CL_ASSERT(p_physp); + + p_path = osm_physp_get_dr_path_ptr(p_physp); + + /* + Set the top of the unicast forwarding table. + */ + si = p_sw->switch_info; + lin_top = cl_hton16(p_sw->max_lid_ho); + if (lin_top != si.lin_top) { + set_swinfo_require = TRUE; + si.lin_top = lin_top; + context.si_context.lft_top_change = TRUE; + } else + context.si_context.lft_top_change = FALSE; + + life_state = si.life_state; + ib_switch_info_set_life_time(&si, p_mgr->p_subn->opt.packet_life_time); + + if (life_state != si.life_state) + set_swinfo_require = TRUE; + + if (set_swinfo_require) { + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Setting switch FT top to LID %u\n", p_sw->max_lid_ho); + + context.si_context.light_sweep = FALSE; + context.si_context.node_guid = osm_node_get_node_guid(p_node); + context.si_context.set_method = TRUE; + + status = osm_req_set(p_mgr->sm, p_path, (uint8_t *) & si, + sizeof(si), IB_MAD_ATTR_SWITCH_INFO, + 0, FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + + if (status != IB_SUCCESS) + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A06: " + "Sending SwitchInfo attribute failed (%s)\n", + ib_get_err_str(status)); + } + + OSM_LOG_EXIT(p_mgr->p_log); +} + +static int set_lft_block(IN osm_switch_t *p_sw, IN osm_ucast_mgr_t *p_mgr, + IN uint16_t block_id_ho) +{ + osm_madw_context_t context; + osm_dr_path_t *p_path; + osm_physp_t *p_physp; + ib_api_status_t status; + + /* + Send linear forwarding table blocks to the switch + as long as the switch indicates it has blocks needing + configuration. + */ + if (!p_sw->new_lft) { + /* any routing should provide the new_lft */ + CL_ASSERT(p_mgr->p_subn->opt.use_ucast_cache && + p_mgr->cache_valid && !p_sw->need_update); + return -1; + } + + p_physp = osm_node_get_physp_ptr(p_sw->p_node, 0); + if (!p_physp) + return -1; + + p_path = osm_physp_get_dr_path_ptr(p_physp); + + context.lft_context.node_guid = osm_node_get_node_guid(p_sw->p_node); + context.lft_context.set_method = TRUE; + + if (!p_sw->need_update && !p_mgr->p_subn->need_update && + !memcmp(p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE, + p_sw->lft + block_id_ho * IB_SMP_DATA_SIZE, + IB_SMP_DATA_SIZE)) + return 0; + + /* + * Zero the stored LFT block, so in case the MAD will end up + * with error, we will resend it in the next sweep. + */ + memset(p_sw->lft + block_id_ho * IB_SMP_DATA_SIZE, 0, + IB_SMP_DATA_SIZE); + + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Writing FT block %u to switch 0x%" PRIx64 "\n", block_id_ho, + cl_ntoh64(context.lft_context.node_guid)); + + status = osm_req_set(p_mgr->sm, p_path, + p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE, + IB_SMP_DATA_SIZE, IB_MAD_ATTR_LIN_FWD_TBL, + cl_hton32(block_id_ho), FALSE, + ib_port_info_get_m_key(&p_physp->port_info), + 0, CL_DISP_MSGID_NONE, &context); + + if (status != IB_SUCCESS) { + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A10: " + "Sending linear fwd. tbl. block failed (%s)\n", + ib_get_err_str(status)); + return -1; + } + + return 0; +} + +static void ucast_mgr_pipeline_fwd_tbl(osm_ucast_mgr_t * p_mgr) +{ + cl_qmap_t *tbl; + cl_map_item_t *item; + unsigned i, max_block = p_mgr->max_lid / IB_SMP_DATA_SIZE + 1; + + tbl = &p_mgr->p_subn->sw_guid_tbl; + for (i = 0; i < max_block; i++) + for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl); + item = cl_qmap_next(item)) + set_lft_block((osm_switch_t *)item, p_mgr, i); +} + +void osm_ucast_mgr_set_fwd_tables(osm_ucast_mgr_t * p_mgr) +{ + p_mgr->max_lid = 0; + + cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl, ucast_mgr_set_fwd_top, + p_mgr); + + ucast_mgr_pipeline_fwd_tbl(p_mgr); +} + +static int ucast_mgr_route(struct osm_routing_engine *r, osm_opensm_t * osm) +{ + int ret; + + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + "building routing with \'%s\' routing algorithm...\n", r->name); + + /* Set the before each lft build to keep the routes in place between sweeps */ + if (osm->subn.opt.scatter_ports) + srandom(osm->subn.opt.scatter_ports); + + if (!r->build_lid_matrices || + (ret = r->build_lid_matrices(r->context)) > 0) + ret = osm_ucast_mgr_build_lid_matrices(&osm->sm.ucast_mgr); + + if (ret < 0) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "%s: cannot build lid matrices\n", r->name); + return ret; + } + + if (!r->ucast_build_fwd_tables || + (ret = r->ucast_build_fwd_tables(r->context)) > 0) + ret = ucast_mgr_build_lfts(&osm->sm.ucast_mgr); + + if (ret < 0) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "%s: cannot build fwd tables\n", r->name); + return ret; + } + + osm->routing_engine_used = r; + + osm_ucast_mgr_set_fwd_tables(&osm->sm.ucast_mgr); + + return 0; +} + +int osm_ucast_mgr_process(IN osm_ucast_mgr_t * p_mgr) +{ + osm_opensm_t *p_osm; + struct osm_routing_engine *p_routing_eng; + cl_qmap_t *p_sw_guid_tbl; + int failed = 0; + + OSM_LOG_ENTER(p_mgr->p_log); + + p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl; + p_osm = p_mgr->p_subn->p_osm; + p_routing_eng = p_osm->routing_engine_list; + + CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); + + /* + If there are no switches in the subnet, we are done. + */ + if (cl_qmap_count(p_sw_guid_tbl) == 0 || + ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0) + goto Exit; + + failed = -1; + p_osm->routing_engine_used = NULL; + while (p_routing_eng) { + failed = ucast_mgr_route(p_routing_eng, p_osm); + if (!failed) + break; + p_routing_eng = p_routing_eng->next; + } + + if (!p_osm->routing_engine_used && + p_osm->no_fallback_routing_engine != TRUE) { + /* If configured routing algorithm failed, use default MinHop */ + failed = ucast_mgr_route(p_osm->default_routing_engine, p_osm); + } + + if (p_osm->routing_engine_used) { + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, + "%s tables configured on all switches\n", + osm_routing_engine_type_str(p_osm-> + routing_engine_used->type)); + + if (p_mgr->p_subn->opt.use_ucast_cache) + p_mgr->cache_valid = TRUE; + } else { + p_mgr->p_subn->subnet_initialization_error = TRUE; + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "No routing engine able to successfully configure " + " switch tables on current fabric\n"); + } +Exit: + CL_PLOCK_RELEASE(p_mgr->p_lock); + OSM_LOG_EXIT(p_mgr->p_log); + return failed; +} + +static int ucast_build_lid_matrices(void *context) +{ + return osm_ucast_mgr_build_lid_matrices(context); +} + +static int ucast_build_lfts(void *context) +{ + return ucast_mgr_build_lfts(context); +} + +int osm_ucast_minhop_setup(struct osm_routing_engine *r, osm_opensm_t * osm) +{ + r->context = &osm->sm.ucast_mgr; + r->build_lid_matrices = ucast_build_lid_matrices; + r->ucast_build_fwd_tables = ucast_build_lfts; + return 0; +} + +static int ucast_dor_build_lfts(void *context) +{ + osm_ucast_mgr_t *mgr = context; + int ret; + + mgr->is_dor = 1; + ret = ucast_mgr_build_lfts(mgr); + mgr->is_dor = 0; + + return ret; +} + +int osm_ucast_dor_setup(struct osm_routing_engine *r, osm_opensm_t * osm) +{ + r->context = &osm->sm.ucast_mgr; + r->build_lid_matrices = ucast_build_lid_matrices; + r->ucast_build_fwd_tables = ucast_dor_build_lfts; + return 0; +} + +int ucast_dummy_build_lid_matrices(void *context) +{ + return 0; +} diff --git a/opensm/osm_ucast_nue.c b/opensm/osm_ucast_nue.c new file mode 100644 index 0000000..c6955a0 --- /dev/null +++ b/opensm/osm_ucast_nue.c @@ -0,0 +1,6735 @@ +/* + * Copyright (c) 2009-2016 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. + * Copyright (C) 2012-2017 Tokyo Institute of Technology. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/*! + * \file osm_ucast_nue.c + * \brief File containing a 100%-applicable, balanced, deadlock-free routing + * + * Abstract: + * Implementation of Nue routing for OpenSM. Nue is a deadlock-free routing + * engine which can be used for arbitrary network topologies and any number + * of virtual lanes (this includes the absence of VLs as well). The paper + * explaining the details of Nue routing is: [1] J. Domke, T. Hoefler and + * S. Matsuoka "Routing on the Dependency Graph: A New Approach to + * Deadlock-Free High-Performance Routing", HPDC'16. An in-depth explanation + * of Nue can be found in Chapter 6 of the similarly named dissertation: + * [2] J. Domke "Routing on the Channel Dependency Graph: A New Approach to + * Deadlock-Free, Destination-Based, High-Performance Routing for Lossless + * Interconnection Networks", 2017, Technische Universitaet Dresden + * (online: http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa-225902). + * + * \author Jens Domke + */ + +#if HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_NUE_C +#include +#include +#include +#include +#include +#if defined (ENABLE_METIS_FOR_NUE) +#include +#endif + +/*! \def Macro for "infinity" to initialize distance in Dijkstra's algorithm. */ +#define INFINITY 0x7FFFFFFF + +/*! \enum Enum to identify node status in search for cycles. */ +enum { + WHITE = 0, /*!< White color for undiscovered nodes. */ + GRAY, /*!< Gray color for discovered nodes. */ + BLACK, /*!< Black for nodes which cannot be part of cycle. */ +}; + +/*! \enum Enum to identify first three statuses of nodes & edges in the cCDG. */ +enum { + BLOCKED = 0, /*!< Forbidden, since it will induce a cycle. */ + UNUSED, /*!< Not used by any path, yet. */ + ESCAPEPATHCOLOR, /*!< Reserved for escape path in the cCDG. */ +}; + +/*! \struct channel + * \brief Bit field identifying parts of a channel/link (lids in ib_net16_t). + */ +typedef struct channel { + unsigned local_lid:16; /*!< Node LID of the start point of a link. */ + unsigned local_port:8; /*!< Node port of the start point of a link. */ + unsigned:0; /* for alignment */ + unsigned remote_lid:16; /*!< Node LID of the end point of a link. */ + unsigned remote_port:8; /*!< Node port of the end point of a link. */ +} channel_t; + +/*! \struct network_link + * \brief Network link with basic information and link weights for balancing. + */ +typedef struct network_link { + channel_t link_info; /*!< Identifies LID and port of both ends. */ + struct network_node *to_network_node; /*!< Points to remote switch. */ + struct ccdg_node *corresponding_ccdg_node; /*!< cCDG equivalent. */ + uint64_t weight; /*!< Link weigths needed for path balancing. */ +} network_link_t; + +/*! \struct network_node + * \brief Network nodes are the internal representation of fabric switches. + */ +typedef struct network_node { + /* informations of the fabric */ + ib_net16_t lid; /*!< LID used as key to sort and to fill LFT. */ + ib_net64_t guid; /*!< Identifier to get information from osm. */ + uint8_t num_base_terminals; /*!< Numbers of CAs of this switch. */ + uint8_t num_terminals; /*!< Virtual numbers of CAs (w/ lmc>0). */ + uint8_t num_links; /*!< Number of switch-to-switch links. */ + network_link_t *links; /*!< Array of outgoing sw-to-sw links. */ + osm_switch_t *sw; /*!< Selfpointer into osm's switch struct. */ + boolean_t dropped; /*!< Mark dropped switches (for ucast cache). */ + /* needed for Dijkstra's algorithm on the network */ + network_link_t *used_link; /*!< Path found with Dijkstra's algo. */ + uint8_t hops; /*!< Counting path length. */ + uint64_t distance; /*!< Path length w.r.t edge weights. */ + size_t heap_index; /*!< Helper index for the d-ary heap. */ + /* additionally needed for Dijkstra's on the cdg */ + network_link_t *escape_path; /*!< Store fallback path for impasse. */ + uint8_t num_elem_in_link_stack; /*!< Number links in stack. */ + network_link_t **stack_used_links; /*!< Stack for backtracking. */ + int32_t found_after_backtracking_step; /*!< Backtracking successful. */ + /* additionally needed for convex subgraph calculation */ + boolean_t in_convex_hull; /*!< TRUE if switch in convex hull. */ + boolean_t processed; /*!< Helper for graph traversal. */ + /* additionally needed for betweenness centrality calculation */ + double betw_centrality; /*!< Measument of betweenness. */ + double delta; /*!< Helper for betweenness calc. */ + uint64_t sigma; /*!< Helper for betweenness calc. */ + uint8_t num_elem_in_Ps; /*!< Helper for betweenness calc. */ + struct network_node **Ps; /*!< Helper for betweenness calc. */ + uint8_t num_adj_terminals_in_convex_hull; /*!< Helper for betw. */ + /* additionally needed for cCDG escape path assignment */ + boolean_t has_adj_destinations; /*!< Add reverse path to escape path. */ +} network_node_t; + +/*! \struct network + * \brief Stores the internal subnet representation (diff. from osm internals). + */ +typedef struct network { + uint16_t num_nodes; /*!< Number of switches in the subnet. */ + network_node_t *nodes; /*!< Array with all switches of the subnet. */ + cl_heap_t heap; /*!< Heap object for faster Dijkstra's algo. */ +} network_t; + +/*! \struct color + * \brief Colors are used to identify disjoint acyclic subgraphs of the cCDG. + */ +typedef struct color { + uint16_t color_id; /*!< ID for the 'color' value. */ + struct color *real_col; /*!< Ptr to real color after merging graphs. */ +} color_t; + +/*! \struct ccdg_edge + * \brief Object representing an edge of the complete channel dependency graph. + */ +typedef struct ccdg_edge { + channel_t to_channel_id; /*!< Identifies tail vertex of edge. */ + struct ccdg_node *to_ccdg_node; /*!< Pointer to tail vertex of edge. */ + /* color coding to easily identify if cycle search is needed */ + color_t *color; /*!< Pointer to current coloring. */ + boolean_t wet_paint; /*!< TRUE if color changed recently. */ +} ccdg_edge_t; + +/*! \struct ccdg_node + * \brief Object describing a vertex of the complete channel dependency graph. + */ +typedef struct ccdg_node { + /* channel_id is similar to a guid of a node */ + channel_t channel_id; /*!< Identifies LID and port of both ends. */ + uint8_t num_edges; /*!< Number of edges attached to this vertex. */ + ccdg_edge_t *edges; /*!< Array of edges or channel dependencies. */ + /* needed for dijkstra's algorithm on the cdg */ + network_link_t *corresponding_netw_link; /*!< For fast access. */ + uint64_t distance; /*!< Current path length w.r.t edge weights. */ + size_t heap_index; /*!< Helper index for the d-ary heap. */ + /* color coding to easily identify if cycle search is needed */ + color_t *color; /*!< Pointer to current coloring. */ + boolean_t wet_paint; /*!< TRUE if color changed in this iteration. */ + /* for cycle search in cdg */ + uint8_t status; /*!< Helper for iterative cycle search. */ + uint8_t next_edge_idx; /*!< Save next edge to check after using pre. */ + struct ccdg_node *pre; /*!< Track traversal in cycle search in cCDG. */ +} ccdg_node_t; + +/*! \struct ccdg + * \brief Stores the complete channel dependency graph (nodes, edges, etc). + */ +typedef struct ccdg { + uint32_t num_nodes; /*!< NUmber of nodes in the complete CDG. */ + ccdg_node_t *nodes; /*!< Array storing nodes of the complete CDG. */ + uint32_t num_colors; /*!< Size of the color array. */ + color_t *color_array; /*!< Distinguish disjoint acyclic sub-CDGs. */ + cl_heap_t heap; /*!< Heap object for faster Dijkstra's algo. */ +} ccdg_t; + +/*! \struct backtracking_candidate + * \brief Stores potential alternative paths in the local backtracking step. + */ +typedef struct backtracking_candidate { + network_link_t *link_to_adj_netw_node; + ccdg_node_t *orig_used_ccdg_node_for_adj_netw_node; +} backtracking_candidate_t; + +/*! \struct nue_context + * \brief Primary structure for Nue (storing graph, cCDG, destinations, etc). + */ +typedef struct nue_context { + /* external parts */ + osm_routing_engine_type_t routing_type; /*!< Name of routing engine. */ + osm_ucast_mgr_t *mgr; /*!< Pointer to osm management object. */ + /* internal parts */ + network_t network; /*!< Network object storing fabric copy. */ + ccdg_t ccdg; /*!< Complete CDG object for the fabric. */ + uint16_t num_destinations[IB_MAX_NUM_VLS]; /*!< Number of desti. */ + ib_net16_t *destinations[IB_MAX_NUM_VLS]; /*!< Array of desti. */ + uint8_t max_vl; /*!< Highest common #VL supported by all. */ + uint8_t max_lmc; /*!< Highest supported LMC across fabric. */ + uint8_t *dlid_to_vl_mapping; /*!< Store VLs to serve path_sl requ. */ +} nue_context_t; + +#if defined (ENABLE_METIS_FOR_NUE) +/*! \struct metis_context + * \brief Complete information about fabric graph to perform partitioning. + */ +typedef struct metis_context { + idx_t nvtxs[1]; /*!< Number of vertices in the graph. */ + idx_t ncon[1]; /*!< Number of balancing constraints. */ + idx_t *xadj; /*!< Number of adjacent nodes per node. */ + idx_t *adjncy; /*!< Array of adjacent nodes per node. */ + idx_t nparts[1]; /*!< Number of parts to split the graph into. */ + idx_t options[METIS_NOPTIONS]; /*!< Array of METIS options. */ + idx_t objval[1]; /*!< Stores the edge-cut of the partitioning. */ + idx_t *part; /*!< Stores partitioning vector of the graph. */ +} metis_context_t; +#endif + +/*************** predefine all internal functions ********************* + **********************************************************************/ +/*! \fn add_ccdg_edge_betw_nodes_to_colored_subccdg(const ccdg_t *, + * const ccdg_node_t *, + * const ccdg_node_t *, + * ccdg_edge_t *) + * \brief This fn changes the color pointer of a cCDG edge to the same color + * as the input ccdg_node1 and sets its wet_paint flag to TRUE. + * + * \param[in] ccdg The cCDG object. + * \param[in] ccdg_node1 A vertex of the cCDG. + * \param[in] ccdg_node2 An adjacent vertex in the cCDG. + * \param[in,out] ccdg_edge The edge from ccdg_node1 to ccdg_node2 vertex, or + * NULL which causes the fn to search for the edge. + * The color of this edge will be changed. + * \return NONE + */ +static inline void +add_ccdg_edge_betw_nodes_to_colored_subccdg(const ccdg_t *, + const ccdg_node_t *, + const ccdg_node_t *, + ccdg_edge_t *); + +/*! \fn add_ccdg_node_to_colored_subccdg(const ccdg_t *, + * const ccdg_node_t *, + * ccdg_node_t *) + * \brief This fn colors ccdg_node2 in the same color as ccdg_node1 by changing + * its color pointer, and also colors the edge in between them. + * + * \param[in] ccdg The cCDG object. + * \param[in] ccdg_node1 A vertex of the cCDG. + * \param[in,out] ccdg_node2 An adjacent vertex in the cCDG. The color of this + * vertex will be changed. + * \return NONE + */ +static inline void +add_ccdg_node_to_colored_subccdg(const ccdg_t *, + const ccdg_node_t *, + ccdg_node_t *); + +/*! \fn add_link_to_stack_of_used_links(network_node_t *, + * network_link_t *) + * \brief Appends the input link to the stack contained in the network node + * but only if the link is not already in the stack. + * + * \param[in,out] network_node A network node (switch) object of Nue. + * \param[in] link A link whose tail ends in this network_node. + * \return NONE + */ +static inline void +add_link_to_stack_of_used_links(network_node_t *, + network_link_t *); + +/*! \fn attempt_local_backtracking(const osm_ucast_mgr_t *, + * const network_t *, + * const network_node_t *, + * const ccdg_t *, + * const int32_t) + * \brief Check alternative paths within a small radius to find and use valid + * channel dependencies which won't close a cycle in the cCDG. + * + * Function description: A detailed description of this function can be found + * in Section 6.2.6.2 / Algorithm 6.5 of reference [2] (see abstract at the + * begining of this file). + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] source_netw_node Current source for Dijkstra's on cCDG should be + * discarded from the list of candidates. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] color Color of current iteration to prevent cycles. + * \return Pointer to a cCDG node if the backtracking was successful, or NULL + * otherwise. + */ +static ccdg_node_t * +attempt_local_backtracking(const osm_ucast_mgr_t *, + const network_t *, + const network_node_t *, + const ccdg_t *, + const int32_t); + +/*! \fn attempt_shortcut_discovery(const osm_ucast_mgr_t *, + * const network_t *, + * const network_node_t *, + * const ccdg_t *, + * const ccdg_node_t *, + * const int32_t color) + * \brief Check for alternative paths or shortcuts through the cCDG (w.r.t + * weight-based distance) after successfully solving an impasse. + * + * Function description: A detailed description of this function can be found + * in Section 6.2.6.3 / Algorithm 6.6 of reference [2] (see abstract at the + * begining of this file). + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] potential_shortcut_netw_node Former undiscovered node. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] potential_shortcut_ccdg_node Used cCDG vertex to reach former + * undiscovered node. + * \param[in] color Color of current iteration to prevent cycles. + * \return TRUE is a shortcut was found, or FALSE otherwise. + */ +static boolean_t +attempt_shortcut_discovery(const osm_ucast_mgr_t *, + const network_t *, + const network_node_t *, + const ccdg_t *, + const ccdg_node_t *, + const int32_t color); + +/*! \fn build_complete_cdg(const osm_ucast_mgr_t *, + * const network_t *, + * ccdg_t *, + * const uint32_t) + * \brief Parses the subnet and constructs the complete channel dependency + * graph for this subnet while considering all switch-to-switch links. + * + * Function description: Conceptually, the complete channel dependency graph + * is very similar to the commonly known channel dependency graph (CDG), which + * is created by following calculated routes and connecting vertices (channels + * representing subnet links) if and only if the two corresponding links are + * used by a path from source to target. However, the complete CDG (cCDG) does + * not require actual paths (or assumes every possible path) and connects all + * pairs of cCDG vertices when the corresponding two links are attached to the + * same network switch. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in,out] ccdg Nue's internal object storing the complete CDG. + * \param[in] total_num_sw_to_sw_links Number of switch-to-switch links. + * \return Integer 0 for sucessful cCDG creation, or any integer unequal to 0 + * otherwise. + */ +static int +build_complete_cdg(const osm_ucast_mgr_t *, + const network_t *, + ccdg_t *, + const uint32_t); + +/*! \fn calculate_convex_subnetwork(const osm_ucast_mgr_t *, + * const network_t *, + * ib_net16_t *, + * const uint16_t) + * \brief The fn determines the convex hull of a subset of network nodes. + * + * Function description: The function determines the convex hull of a subset of + * nodes of the network. This convex hull is the enclosure of all shortest + * paths between these nodes, therefore we calculate a spanning tree from each + * node and which is traversed in the opposite direction to collect all nodes + * along the shortest paths. The result is the assignment of TRUE to the + * in_convex_hull flag for each node in the convex hull, and FALSE otherwise. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] destinations Destinations for the routing in the current VL. + * \param[in] num_destinations Number of destinations in the destination array. + * \return Integer 0 if calculation was sucessful, or any integer unequal to 0 + * otherwise. + */ +static int +calculate_convex_subnetwork(const osm_ucast_mgr_t *, + const network_t *, + ib_net16_t *, + const uint16_t); + +/*! \fn calculate_spanning_tree_in_network(const osm_ucast_mgr_t *, + * network_t *, + * network_node_t *) + * \brief Calculates a trivial spanning tree for the network. + * + * Function description: The fn operates similar to Dijkstra's algorithm and + * the spanning tree calculation also includes the link weights. The reason + * to consider these link weights is that we don't end up with similar escape + * paths for each virtual layer, since weights change after each routing step. + * The spanning tree is temporarily stored in the escape_path parameter of + * each network node. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] root_node A network node defining the root of the spanning tree. + * \return Integer 0 if calculation was sucessful, or any integer unequal to 0 + * otherwise. + */ +static int +calculate_spanning_tree_in_network(const osm_ucast_mgr_t *, + network_t *, + network_node_t *); + +/*! \fn change_fake_ccdg_node_color(const ccdg_t *, + * ccdg_node_t *, + * const int32_t) + * \brief Assigns the right color (colored subgraph) to a temporarily added + * fake channel in the cCDG, which acts as source for Dijkstra's algo. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node A fake node which acts as Dijkstra's source. + * \param[in] color Color of current iteration to prevent cyclic subgraphs. + * \return NONE + */ +static inline void +change_fake_ccdg_node_color(const ccdg_t *, + ccdg_node_t *, + const int32_t); + +/*! \fn compare_backtracking_candidates_by_distance(const void *, + * const void *) + * \brief Comparator for backtracking candidates of cCDG vertices w.r.t their + * distance parameter. Assumed input type: `backtracking_candidate_t *' + * + * \param[in] btc1 A cCDG vertex. + * \param[in] btc2 A second cCDG vertex. + * \return Negative value for param1 < param2, positive for '>', or 0 for '='. + */ +static inline int +compare_backtracking_candidates_by_distance(const void *, + const void *); + +/*! \fn compare_ccdg_nodes_by_channel_id(const void *, + * const void *) + * \brief Comparator for cCDG vertices w.r.t their assigned channel identifyer + * (LIDs/ports combination). Assumed input type: `ccdg_node_t *' + * + * \param[in] cn1 A cCDG vertex. + * \param[in] cn2 A second cCDG vertex. + * \return Negative value for param1 < param2, positive for '>', or 0 for '='. + */ +static inline int +compare_ccdg_nodes_by_channel_id(const void *, + const void *); + +/*! \fn compare_lids(const void *, + * const void *) + * \brief Comparator for two subnet local identifiers (LID). Assumed input + * type: `ib_net16_t *' + * + * \param[in] l1 A LID of a network component (CA or switch). + * \param[in] l2 A second LID of a network component. + * \return Negative value for param1 < param2, positive for '>', or 0 for '='. + */ +static int +compare_lids(const void *, + const void *); + +/*! \fn compare_network_nodes_by_lid(const void *, + * const void *) + * \brief Comparator of two network nodes (switches) w.r.t their LIDs. Assumed + * input type: `network_node_t *' + * + * \param[in] n1 A network node of Nue's internal network object. + * \param[in] n2 A second network node. + * \return Negative value for param1 < param2, positive for '>', or 0 for '='. + */ +static int +compare_network_nodes_by_lid(const void *, + const void *); + +/*! \fn compare_two_channel_id(const void *, + * const void *) + * \brief Comparator of two channel IDs (bit field identifying all parts of a + * link). Assumed input type: `channel_t *' + * + * \param[in] c1 A channel identifier. + * \param[in] c2 A second channel ID. + * \return Negative value for param1 < param2, positive for '>', or 0 for '='. + */ +static int +compare_two_channel_id(const void *, + const void *); + +/*! \fn construct_ccdg(ccdg_t *) + * \brief Set all ccdg_t struct parameters to 0, and call cl_heap_construct + * for the heap element in the struct afterwards. + * + * \param[in,out] ccdg Nue's internal object supposed to store a complete CDG. + * \return NONE + */ +static inline void +construct_ccdg(ccdg_t *); + +/*! \fn construct_ccdg_edge(ccdg_edge_t *) + * \brief Set all ccdg_edge_t struct parameters to 0. + * + * \param[in,out] edge An edge of the complete channel dependency graph. + * \return NONE + */ +static inline void +construct_ccdg_edge(ccdg_edge_t *); + +/*! \fn construct_ccdg_node(ccdg_node_t *) + * \brief Set all ccdg_node_t struct parameters to 0. + * + * \param[in,out] node A vertex of the complete channel dependency graph. + * \return NONE + */ +static inline void +construct_ccdg_node(ccdg_node_t *); + +#if defined (ENABLE_METIS_FOR_NUE) +/*! \fn construct_metis_context(metis_context_t *) + * \brief Set all metis_context_t struct parameters to 0. + * + * \param[in,out] metis_ctx The context which will hold the input for METIS. + * \return NONE + */ +static inline void +construct_metis_context(metis_context_t *); +#endif + +/*! \fn construct_network_link(network_link_t *) + * \brief Set all network_link_t struct parameters to 0. + * + * \param[in,out] link A link between two network nodes. + * \return NONE + */ +static inline void +construct_network_link(network_link_t *); + +/*! \fn construct_network(network_t *) + * \brief Set all network_t struct parameters to 0, and call cl_heap_construct + * for the heap element in the struct afterwards. + * + * \param[in,out] network Nue's network object supposed to store the subnet. + * \return NONE + */ +static inline void +construct_network(network_t *); + +/*! \fn construct_network_node(network_node_t *) + * \brief Set all network_node_t struct parameters to 0. + * + * \param[in,out] node A network node of Nue's internal subnet representation. + * \return NONE + */ +static inline void +construct_network_node(network_node_t *); + +/*! \fn create_context(nue_context_t *) + * \brief This fn calls the constructors for the network and ccdg structs, as + * well as allocates arrays to store destinations and VL mappings. + * + * \param[in,out] nue_ctx Nue's context storing graph, cCDG, destinations, etc. + * \return Integer 0 if context initialization was sucessful, or any integer + * unequal to 0 otherwise. + */ +static int +create_context(nue_context_t *); + +/*! \fn destroy_ccdg(ccdg_t *) + * \brief All allocated memory within the ccdg_t struct is freed, and + * cl_heap_destroy is called for the heap element in the struct. + * + * \param[in,out] ccdg Nue's internal object storing the complete CDG. + * \return NONE + */ +static inline void +destroy_ccdg(ccdg_t *); + +/*! \fn destroy_ccdg_node(ccdg_node_t *) + * \brief All allocated memory within the ccdg_node_t struct is freed. + * + * \param[in,out] node A vertex of the complete channel dependency graph. + * \return NONE + */ +static inline void +destroy_ccdg_node(ccdg_node_t *); + +/*! \fn destroy_context(nue_context_t *) + * \brief All allocated memory within the nue_context_t struct is freed. + * + * \param[in,out] nue_ctx Nue's context storing graph, cCDG, destinations, etc. + * \return NONE + */ +static void +destroy_context(nue_context_t *); + +#if defined (ENABLE_METIS_FOR_NUE) +/*! \fn destroy_metis_context(metis_context_t *) + * \brief All allocated memory within the metis_context_t struct is freed. + * + * \param[in,out] metis_ctx The context which holds the input arrays for METIS. + * \return NONE + */ +static inline void +destroy_metis_context(metis_context_t *); +#endif + +/*! \fn destroy_network(network_t *) + * \brief All allocated memory within the network_t struct is freed, and + * cl_heap_destroy is called for the heap element in the struct. + * + * \param[in,out] network Nue's network object storing the subnet. + * \return NONE + */ +static inline void +destroy_network(network_t *); + +/*! \fn destroy_network_node(network_node_t *) + * \brief All allocated memory within the network_node_t struct is freed. + * + * \param[in,out] node A network node (switch) object of Nue. + * \return NONE + */ +static inline void +destroy_network_node(network_node_t *); + +/*! \fn determine_num_adj_terminals_in_convex_hull(const osm_ucast_mgr_t *, + * const network_t *, + * ib_net16_t *, + * const uint16_t) + * \brief Counting the number of terminals (CAs) attached to each switch which + * itself is member of the convex hull (LMC ignored here). + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] destinations Destinations for the routing in the current VL. + * \param[in] num_destinations Number of destinations in the destination array. + * \return NONE + */ +static void +determine_num_adj_terminals_in_convex_hull(const osm_ucast_mgr_t *, + const network_t *, + ib_net16_t *, + const uint16_t); + +/*! \fn distribute_lids_onto_virtual_layers(nue_context_t *, + * const boolean_t) + * \brief This fn assigns destination LIDs to different virtual layers. + * + * Function description: The fn is redirecting the distribution of routing + * destination to either distribute_lids_with_metis if METIS was found during + * OpenSM installation, or distribute_lids_semi_randomly otherwise. + * + * \param[in] nue_ctx Nue's context storing graph, cCDG, destinations, etc. + * \param[in] include_sw Whether or not to consider switches as destinations. + * \return Integer 0 if distribution was sucessful, or any integer unequal to 0 + * otherwise. + */ +static inline int +distribute_lids_onto_virtual_layers(nue_context_t *, + const boolean_t); + +#if defined (ENABLE_METIS_FOR_NUE) +/*! \fn distribute_lids_with_metis(nue_context_t *, + * const boolean_t) + * \brief This fn uses METIS to partition the subnet into #VL parts and then + * assigns LIDs to different layers according to the partitioning. + * + * \param[in] nue_ctx Nue's context storing graph, cCDG, destinations, etc. + * \param[in] include_sw Whether or not to consider switches as destinations. + * \return Integer 0 if distribution was sucessful, or any integer unequal to 0 + * otherwise. + */ +static int +distribute_lids_with_metis(nue_context_t *, + const boolean_t); +#else +/*! \fn distribute_lids_semi_randomly(nue_context_t *, + * const boolean_t) + * \brief This fn randomly assigns destination to different virtual layers. + * + * \param[in] nue_ctx Nue's context storing graph, cCDG, destinations, etc. + * \param[in] include_sw Whether or not to consider switches as destinations. + * \return Integer 0 if distribution was sucessful, or any integer unequal to 0 + * otherwise. + */ +static int +distribute_lids_semi_randomly(nue_context_t *, + const boolean_t); +#endif + +/*! \fn dry_ccdg_edge_color_betw_nodes(const ccdg_t *, + * const ccdg_node_t *, + * const ccdg_node_t *) + * \brief Change the status of the color for a cCDG edge from temporarily to + * permanent, which we call 'drying' the color. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node1 A vertex of the cCDG. + * \param[in] ccdg_node2 An adjacent vertex in the cCDG. + * \return NONE + */ +static inline void +dry_ccdg_edge_color_betw_nodes(const ccdg_t *, + const ccdg_node_t *, + const ccdg_node_t *); + +/*! \fn dry_ccdg_node_color(ccdg_node_t *) + * \brief Change the status of the color for a cCDG vertex from temporarily to + * permanent, which we call 'drying' the color. + * + * \param[in,out] ccdg_node A vertex of the complete CDG. + * \return NONE + */ +static inline void +dry_ccdg_node_color(ccdg_node_t *); + +/*! \fn fix_ccdg_colors(const osm_ucast_mgr_t *, + * const network_t *, + * const network_node_t *, + * const ccdg_t *, + * const ccdg_node_t *) + * \brief Change the status of all colors of cCDG vertices and edges, which are + * actually used after the routing step, from temporarily to permanent. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] source_netw_node Current source node for Dijkstra's on the cCDG. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] source_ccdg_node Current source cCDG vertex for Dijkstra's algo. + * \return NONE + */ +static void +fix_ccdg_colors(const osm_ucast_mgr_t *, + const network_t *, + const network_node_t *, + const ccdg_t *, + const ccdg_node_t *); + +/*! \fn fix_ccdg_edge_color(ccdg_edge_t *) + * \brief Overwrite the (tmp) color pointer of the cCDG edge with the real + * color after merging individually colored cCDG subgraphs. + * + * \param[in,out] ccdg_edge An edge of the cCDG. + * \return NONE + */ +static inline void +fix_ccdg_edge_color(ccdg_edge_t *); + +/*! \fn fix_ccdg_node_color(ccdg_node_t *) + * \brief Overwrite the (tmp) color pointer of the cCDG vertex with the real + * color after merging individually colored cCDG subgraphs. + * + * \param[in,out] ccdg_node A vertex of the cCDG. + * \return NONE + */ +static inline void +fix_ccdg_node_color(ccdg_node_t *); + +/*! \fn found_path_between_ccdg_nodes_in_subgraph(const osm_ucast_mgr_t *, + * const ccdg_t *, + * ccdg_node_t *, + * const ccdg_node_t *, + * const int32_t) + * \brief Search an specifically colored subgraph of the complete CDG for a + * path from a given source vertex to an target vertex. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] source A source vertex in the cCDG to start the search. + * \param[in] target A target vertex of the cCDG we are looking for. + * \param[in] color The color of ID of the subgraph which should be searched. + * \return TRUE if path from source to target was found, or FALSE otherwise. + */ +static boolean_t +found_path_between_ccdg_nodes_in_subgraph(const osm_ucast_mgr_t *, + const ccdg_t *, + ccdg_node_t *, + const ccdg_node_t *, + const int32_t); + +/*! \fn get_base_lids_and_number_of_lids(nue_context_t *) + * \brief Count the total number of CAs (or LIDs for lmc>0) in the fabric. + * + * Function description: The resulting number even includes base/enhanced + * switch port 0 (base SP0 will have lmc=0), hence total number of LIDs. + * All found LIDs (regardless of CA or switch) are stored for later use in the + * nue_ctx->destinations[0] array which will be post-processed by other fn. + * + * \param[in,out] nue_ctx Nue's context storing graph, cCDG, destinations, etc. + * \return Total number of LIDs in the fabric. + */ +static uint64_t +get_base_lids_and_number_of_lids(nue_context_t *); + +/*! \fn get_ccdg_edge_betw_nodes(const ccdg_node_t *, + * const ccdg_node_t *) + * \brief This is a helper fn to access cCDG edges, iterating over the input + * vertex1's edge ist searching for the input vertex2. + * + * \param[in] ccdg_node1 A cCDG vertex. + * \param[in] ccdg_node2 A second cCDG vertex. + * \return Pointer to the cCDG edge between the input cCDG nodes, or NULL if + * none exist because these two nodes are not adjacent. + */ +static ccdg_edge_t * +get_ccdg_edge_betw_nodes(const ccdg_node_t *, + const ccdg_node_t *); + +/*! \fn get_ccdg_edge_color_betw_nodes(const ccdg_t *, + * const ccdg_node_t *, + * const ccdg_node_t *, + * ccdg_edge_t *) + * \brief The fn determines the currently assigned color for an cCDG edge (if + * NULL, then search for the correct edge begtween the given vertices). + * + * Function description: Caller must make sure the edge exists and/or the + * two input cCDG vertices are actually adjacent (with edge: vertex1->vertex2) + * since no verification is performed when OpenSM is executed in non-debug + * mode. Hence, a careless approach may lead to segmentation faults. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node1 A cCDG vertex. + * \param[in] ccdg_node2 A second cCDG vertex. + * \param[in] ccdg_edge An edge of the complete CDG, or NULL. + * \return The color ID associated with the cCDG edge between the input nodes. + */ +static inline uint16_t +get_ccdg_edge_color_betw_nodes(const ccdg_t *, + const ccdg_node_t *, + const ccdg_node_t *, + ccdg_edge_t *); + +/*! \fn get_ccdg_edge_color(const ccdg_t *, + * const ccdg_edge_t *) + * \brief The fn returns the currently assigned color ID for an cCDG edge. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_edge An edge of the complete CDG. + * \return The color ID associated with an input cCDG edge. + */ +static inline uint16_t +get_ccdg_edge_color(const ccdg_t *, + const ccdg_edge_t *); + +/*! \fn get_ccdg_node_by_channel_id(const ccdg_t *, + * const channel_t) + * \brief This fn uses the stdlib to find (via binary search) a cCDG vertex + * in a sorted array of vertices. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] c_id Channel ID bit field identifying all parts of a subnet link. + * \return Pointer to the cCDG node associated with the channe ID (LIDs/ports + * combination), or NULL if no match is found. + */ +static inline ccdg_node_t * +get_ccdg_node_by_channel_id(const ccdg_t *, + const channel_t); + +/*! \fn get_ccdg_node_color(const ccdg_t *, + * const ccdg_node_t *) + * \brief The fn returns the currently assigned color ID for a cCDG vertex. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node A vertex of the complete channel dependency graph. + * \return The color ID associated with an input cCDG node. + */ +static inline uint16_t +get_ccdg_node_color(const ccdg_t *, + const ccdg_node_t *); + +/*! \fn get_central_node_wrt_subnetwork(const osm_ucast_mgr_t *, + * const network_t *, + * ib_net16_t *, + * const uint16_t, + * network_node_t **, + * uint16_t *) + * \brief This fn implements a slightly modified version of Brandes' algorithms + * for betweenness centrality. + * + * Function description: The slightly modified calculation of the betweenness + * centrality problem is based on the algorithm described by U. Brandes "A + * Faster Algorithm for Betweenness Centrality" in Journal of Mathematical + * Sociology, 2001. We calculate the betweenness value only for switches of + * the subnet, since terminals shouldn't be the most central node w.r.t a + * convex hull anyways. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] destinations Destinations for routing in the current VL. + * \param[in] num_destinations Number of destinations in the array. + * \param[out] central_node Pointer to the network node determined as + * most central w.r.t the given convex hull. + * \param[out] central_node_index Index of this most central node in the node + * array of the network object. + * \return Integer 0 if calculation was sucessful, or any integer unequal to 0 + * otherwise. + */ +static int +get_central_node_wrt_subnetwork(const osm_ucast_mgr_t *, + const network_t *, + ib_net16_t *, + const uint16_t, + network_node_t **, + uint16_t *); + +/*! \fn get_inverted_channel_id(const channel_t) + * \brief Swap the elements of an channel ID (LID1|P1->LID2|P2) to determine + * the reverse channel (LID2|P2->LID1|P1) of it. + * + * \param[in] in_channel Channel ID bit field identifying all parts of a link. + * \return A new channel ID with inverted head/tail of the corresponding link. + */ +static inline channel_t +get_inverted_channel_id(const channel_t); + +/*! \fn get_lid(const ib_net16_t *, + * const uint32_t, + * const ib_net16_t) + * \brief This fn uses the stdlib to find (via binary search) a subnet LID + * in a sorted array of (destinations) LIDs. + * + * \param[in] lid_array Sorted array of LIDs to perform the search on. + * \param[in] num_lids Number of LIDs in the given array. + * \param[in] lid Target LID to search for. + * \return Pointer to a LID in the array of destinations. + */ +static inline ib_net16_t * +get_lid(const ib_net16_t *, + const uint32_t, + const ib_net16_t); + +/*! \fn get_max_num_vls(const osm_ucast_mgr_t *) + * \brief This fn determines the larges number of VLs, supported by all nodes + * in the subnet, or returns the user supplied number (if it is smaller). + * + * \param[in] mgr The management object of OpenSM. + * \return The larges common number of virtual lanes which is supported by all + * nodes in the subnet, or any smaller number requested by the user. + */ +static uint8_t +get_max_num_vls(const osm_ucast_mgr_t *); + +/*! \fn get_network_node_by_lid(const network_t *, + * const ib_net16_t) + * \brief This fn uses the stdlib to find (via binary search) a network node + * for a given LID in a sorted array of network nodes. + * + * \param[in] network Nue's network object storing the subnet. + * \param[in] lid Target LID of the network node to search for. + * \return Pointer to a network object (switch) assiciated with the input LID. + */ +static inline network_node_t * +get_network_node_by_lid(const network_t *, + const ib_net16_t); + +/*! \fn get_switch_lid(const osm_ucast_mgr_t *, + * const ib_net16_t) + * \brief The fn returns the input LID, assuming it belongs to a subnet switch, + * or the LID of the adjacent switch, if the input belongs to a CA. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] lid Local identifier of a subnet component (either CA or switch). + * \return The LID of a switch in the subnet. + */ +static inline ib_net16_t +get_switch_lid(const osm_ucast_mgr_t *, + const ib_net16_t); + +/*! \fn init_ccdg_colors(const ccdg_t *) + * \brief Iterates over all cCDG vertices and edges to set the color ID/Ptr + * into the UNUSED state. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \return NONE + */ +static void +init_ccdg_colors(const ccdg_t *); + +/*! \fn init_ccdg_edge(ccdg_edge_t *, + * const channel_t) + * \brief Initialize the to_channel_id parameter of the cCDG edge with a given + * ID identifying the channel ID of the tail vertex of this edge. + * + * \param[in,out] edge An edge of the complete CDG. + * \param[in] to_cid Channel ID bit field identifying all parts of a link. + * \return NONE + */ +static inline void +init_ccdg_edge(ccdg_edge_t *, + const channel_t); + +/*! \fn init_ccdg_edge_color(const ccdg_t *, + * ccdg_edge_t *) + * \brief Initialize a cCDG edge to set the color ID/Ptr into the UNUSED state. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in,out] ccdg_edge An edge of the complete CDG. + * \return NONE + */ +static inline void +init_ccdg_edge_color(const ccdg_t *, + ccdg_edge_t *); + +/*! \fn init_ccdg_escape_path_edge_color_betw_nodes(const ccdg_t *, + * const ccdg_node_t *, + * const ccdg_node_t *) + * \brief Initialize a cCDG edge to set the color ID/Ptr into the + * ESCAPEPATHCOLOR state. The edge is determined by the given vertices. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node1 A cCDG vertex. + * \param[in] ccdg_node2 A second, adjacent cCDG vertex. + * \return NONE + */ +static inline void +init_ccdg_escape_path_edge_color_betw_nodes(const ccdg_t *, + const ccdg_node_t *, + const ccdg_node_t *); + +/*! \fn init_ccdg_escape_path_node_color(const ccdg_t *, + * ccdg_node_t *) + * \brief Initialize a cCDG vertex to set the color ID/Ptr into the + * ESCAPEPATHCOLOR state. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node A complete CDG vertex. + * \return NONE + */ +static inline void +init_ccdg_escape_path_node_color(const ccdg_t *, + ccdg_node_t *); + +/*! \fn init_ccdg_node(ccdg_node_t *, + * const channel_t, + * const uint8_t, + * ccdg_edge_t *, + * network_link_t *) + * \brief Initialize most parameters of a cCDG vertex describing the graph + * structure of the complete channel dependency graph. + * + * \param[in,out] node A complete CDG vertex. + * \param[in] channel_id Channel ID bit field identifying parts of a link. + * \param[in] num_edges Number of outgoing cCDG edges from this vertex. + * \param[in] edges Array of outgoing cCDG edges. + * \param[in] corresponding_netw_link Pointer to the network link which is + * equivalent to the CDG vertex. + * \return NONE + */ +static inline void +init_ccdg_node(ccdg_node_t *, + const channel_t, + const uint8_t, + ccdg_edge_t *, + network_link_t *); + +/*! \fn init_ccdg_node_color(const ccdg_t *, + * ccdg_node_t *) + * \brief Initialize a cCDG vertex to set the color ID/Ptr into the UNUSED + * state. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in,out] ccdg_node A complete CDG vertex. + * \return NONE + */ +static inline void +init_ccdg_node_color(const ccdg_t *, + ccdg_node_t *); + +/*! \fn init_linear_forwarding_tables(const osm_ucast_mgr_t *, + * const network_t *) + * \brief Initialize the new_lft array of a given network node (and corresp. + * subnet switch) with OSM_NO_PATH, except for management port 0 of the switch. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \return NONE + */ +static void +init_linear_forwarding_tables(const osm_ucast_mgr_t *, + const network_t *); + +#if defined (ENABLE_METIS_FOR_NUE) +/*! \fn init_metis_context(metis_context_t *, + * const idx_t, + * const idx_t, + * const idx_t, + * const idx_t) + * \brief Initialize basic information and options for a future METIS call to + * partition to subnet. + * + * \param[in,out] metis_ctx The context which holds the input parameters and + * arrays for METIS. + * \param[in] nvtxs The number of vertices in the graph. + * \param[in] nparts The number of parts to partition the graph. + * \param[in] seed Seed to overwrite METIS_OPTION_SEED. + * \param[in] numbering Initializing value for METIS_OPTION_NUMBERING. + * \return NONE + */ +static inline void +init_metis_context(metis_context_t *, + const idx_t, + const idx_t, + const idx_t, + const idx_t); +#endif + +/*! \fn init_network_link(network_link_t *, + * const ib_net16_t, + * const uint8_t, + * const ib_net16_t, + * const uint8_t, + * const uint64_t) + * \brief Initialize the link information (associated LIDs/ports) and weight + * for the internal representation of a subnet link. + * + * \param[in,out] link A link between two network nodes. + * \param[in] loc_lid LID of the head side of the switch-to-switch link. + * \param[in] loc_port Switch port of the head of the link. + * \param[in] rem_lid LID of the tail side of the switch-to-switch link. + * \param[in] rem_port Switch port of the tail of the link. + * \param[in] weight Initializing balancing weight value for this link. + * \return NONE + */ +static inline void +init_network_link(network_link_t *, + const ib_net16_t, + const uint8_t, + const ib_net16_t, + const uint8_t, + const uint64_t); + +/*! \fn init_network_node(network_node_t *, + * const ib_net16_t, + * const ib_net64_t, + * const uint8_t, + * const uint8_t, + * const uint8_t, + * network_link_t *, + * osm_switch_t *) + * \brief Initialize most parameters of a network node (subnet switch) to store + * information such as attached CAs, links, LID, etc. + * + * \param[in,out] node Network node which represents a subnet switch. + * \param[in] lid The base local identifier (LID) of this switch. + * \param[in] guid The global unique identifier of this switch. + * \param[in] num_base_lids Number of base LIDs of attached CAs. + * \param[in] num_lids Number of 'virtual' CAs (by considering LMC). + * \param[in] num_links NUmber of healthy switch-to-switch links. + * \return Integer 0 if initialization and memory allocation was sucessful, or + * any integer unequal to 0 otherwise. + */ +static inline int +init_network_node(network_node_t *, + const ib_net16_t, + const ib_net64_t, + const uint8_t, + const uint8_t, + const uint8_t, + network_link_t *, + osm_switch_t *); + +/*! \fn mark_escape_paths(const osm_ucast_mgr_t *, + * network_t *, + * const ccdg_t *, + * ib_net16_t *, + * const uint16_t, + * const boolean_t) + * \brief Calculates a set of 'escape paths' for Nue as fallback option, + * similar to an Up/Down routing tree, in case of unsuccessful routing. + * + * Function description: The function derives the escape paths from a spanning + * tree rooted at the most central node w.r.t the destination nodes in the + * current virtual layer. Escape paths are initial channel dependencies which + * aren't to be 'broken', meaning: they are virtual paths building a backbone + * in case Nue runs into an impass and can't find all routes towards one + * destination LID. Obviously too many fallbacks would overload the backbone + * and result in worse network throughput similar to an Up/Down routing. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] destinations Destinations for the routing in the current VL. + * \param[in] num_destinations Number of destinations in the destination array. + * \param[in] verify_network_integrity If TRUE, a sanity check is performed to + * determine subnet connectivity issues. + * \return Integer 0 if calculation was sucessful, or any integer unequal to 0 + * otherwise. + */ +static int +mark_escape_paths(const osm_ucast_mgr_t *, + network_t *, + const ccdg_t *, + ib_net16_t *, + const uint16_t, + const boolean_t); + +/*! \fn mcast_cleanup(const network_t *, + * cl_qlist_t *) + * \brief Reset is_mc_member and num_of_mcm of all network nodes for future + * computations and calls osm_mcast_drop_port_list for the mcast group. + * + * \param[in] network Nue's network object storing the subnet. + * \param[in] mcastgrp_port_list List of ports being member in the mcast group. + * \return NONE + */ +static inline void +mcast_cleanup(const network_t *, + cl_qlist_t *); + +/*! \fn merge_two_colored_subccdg_by_nodes(const ccdg_t *, + * const ccdg_node_t *, + * ccdg_node_t *) + * \brief Merges two uniquely colored, disjoint, acyclic subgraphs of the cCDG + * into one acyclic subgraph (resulting color defined by ccdg_node1). + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node1 A vertex of the cCDG. + * \param[in,out] ccdg_node2 An adjacent vertex in the cCDG. + * \return NONE + */ +static inline void +merge_two_colored_subccdg_by_nodes(const ccdg_t *, + const ccdg_node_t *, + ccdg_node_t *); + +/*! \fn nue_create_context(const osm_opensm_t *, + * const osm_routing_engine_type_t) + * \brief This fn allocats the context for Nue routing and assigns some initial + * parameters for the nue_context_t struct. + * + * \param[in] osm OpenSM's global context object. + * \param[in] routing_type Routing ID (should be OSM_ROUTING_ENGINE_TYPE_NUE). + * \return Pointer to the whole context object for Nue routing, or NULL if + * there was an issue during the allocation process. + */ +static nue_context_t * +nue_create_context(const osm_opensm_t *, + const osm_routing_engine_type_t); + +/*! \fn nue_destroy_context(void *) + * \brief Interface fn exposed to OpenSM - all allocated memory within the + * nue_context_t struct is freed. + * + * \param[in,out] context Pointer to a contex object (should be nue_context_t). + * \return NONE + */ +static void +nue_destroy_context(void *); + +/*! \fn nue_discover_network(void *) + * \brief Interface fn exposed to OpenSM - Traverse subnet to gather info about + * connected switches used to create an internal network representation. + * + * \param[in,out] context Pointer to a contex object (should be nue_context_t). + * \return Integer 0 if subnet discovery was sucessful, or any integer unequal + * to 0 otherwise. + */ +static int +nue_discover_network(void *); + +/*! \fn nue_do_mcast_routing(void *, + * osm_mgrp_box_t *) + * \brief Interface fn exposed to OpenSM - Calculates a spanning tree for a + * requested mcast group which will define the mcast forwarding tables. + * + * \param[in,out] context Pointer to a contex object (should be nue_context_t). + * \param[in] mbox OpenSM's internal object for mcast requests. + * \return IB_SUCCESS if requested multicast routing tables where calculated + * sucessfully, or IB_ERROR otherwise. + */ +static ib_api_status_t +nue_do_mcast_routing(void *, + osm_mgrp_box_t *); + +/*! \fn nue_do_ucast_routing(void *) + * \brief Interface fn exposed to OpenSM - Calculates the set of deadlock-free + * ucast forwarding tables for the fabric, regardless of #VL available. + * + * Function description: A detailed description of this function can be found + * in Section 6.2.4 / Algorithm 6.3 of reference [2] (see abstract at the + * begining of this file). Here is a basic draft of what the function does and + * how the routing on the channel dependency graph works in pseudo code: + * -- Assume N is the set of destinations and k the number of available VLs + * -- Partition N into k disjoint subsets N_1 ,..., N_k of destinations + * -- foreach virtual layer L_i with i in {1 ,..., k} do: + * ----- Create a convex subgraph H_i for N_i + * ----- Identify central n_r in N_i of convex H_i via Brandes' algorithm + * ----- Create a new complete channel dependency graph D_i for layer L_i + * ----- Define escape paths D* in D_i for spanning tree root n_r + * ----- foreach destination node n in N_i do: + * -------- Identify all deadlock-free paths towards n + * -------- Store these paths in ucast forwarding tables + * -------- Update channel weights in D_i for these paths + * + * \param[in,out] context Pointer to a contex object (should be nue_context_t). + * \return Integer 0 if unicast routing was sucessful, or any integer unequal + * to 0 otherwise. + */ +static int +nue_do_ucast_routing(void *); + +/*! \fn nue_get_vl_for_path(void *, + * const uint8_t, + * const ib_net16_t, + * const ib_net16_t) + * \brief Interface fn exposed to OpenSM - The fn returns the virtual layer to + * use for a path towards a given destination. + * + * \param[in] context Ptr to contex obj (should be nue_context_t). + * \param[in] hint_for_default_sl Desired/suggested SL in the path request. + * \param[in] slid Source LID for the requested path record. + * \param[in] dlid Destination LID for requested path record. + * \return A virtual lane for the path from source LID to destination LID. + */ +static uint8_t +nue_get_vl_for_path(void *, + const uint8_t, + const ib_net16_t, + const ib_net16_t); + +/*! \fn osm_ucast_nue_setup(struct osm_routing_engine *, + * osm_opensm_t *) + * \brief Interface fn exposed to OpenSM to initialize the Nue routing engine. + * + * \param[in] r OpenSM's internal struct for routings: fn pointers, etc. + * \param[in] osm OpenSM's global context object. + * \return ein int um spass + */ +int +osm_ucast_nue_setup(struct osm_routing_engine *, + osm_opensm_t *); + +/*! \fn print_ccdg(const osm_ucast_mgr_t *, + * const ccdg_t *, + * const boolean_t) + * \brief Dump the internal cCDG into the OpenSM log file. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] print_colors If TRUE, then cCDG vertex/edge colors are included. + * \return NONE + */ +static void +print_ccdg(const osm_ucast_mgr_t *, + const ccdg_t *, + const boolean_t); + +/*! \fn print_ccdg_node(const osm_ucast_mgr_t *, + * const ccdg_t *, + * const ccdg_node_t *, + * const uint32_t, + * const boolean_t) + * \brief Dump vertex information and adjacent edges into the OpenSM log file. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node A vertex of the cCDG. + * \param[in] i Index of the cCDG vertex in the array of vertices. + * \param[in] print_colors If TRUE, then cCDG vertex/edge colors are included. + * \return NONE + */ +static inline void +print_ccdg_node(const osm_ucast_mgr_t *, + const ccdg_t *, + const ccdg_node_t *, + const uint32_t, + const boolean_t); + +/*! \fn print_channel_id(const osm_ucast_mgr_t *, + * const channel_t, + * const boolean_t) + * \brief Dump the channel ID of an cCDG vertex into the OpenSM log file or + * into the OpenSM console. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] channel_id Channel ID bit field identifying parts of a link. + * \param[in] console If TRUE, then ouput is directed to osm console. + * \return NONE + */ +static inline void +print_channel_id(const osm_ucast_mgr_t *, + const channel_t, + const boolean_t); + +/*! \fn print_destination_distribution(const osm_ucast_mgr_t *, + * ib_net16_t **, + * const uint16_t *) + * \brief Dump the destination LIDs assigned to each virtual layer into the + * OpenSM log file. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] destinations Array of destination arrays per virtual layer. + * \param[in] num_dest Array containing the number of destinations assigned + * to each virtual layer. + * \return NONE + */ +static void +print_destination_distribution(const osm_ucast_mgr_t *, + ib_net16_t **, + const uint16_t *); + +/*! \fn print_network(const osm_ucast_mgr_t *, + * const network_t *) + * \brief Dump the discovered, internal representation of the switch-based + * network with additional information into the OpenSM log file. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \return NONE + */ +static void +print_network(const osm_ucast_mgr_t *, + const network_t *); + +/*! \fn print_network_link(const osm_ucast_mgr_t *, + * const network_link_t *, + * const uint8_t) + * \brief Dump the information of a switch-to-switch link, i.e. adjacent node + * names, LIDs, GUIDs, etc., into the OpenSM log file. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] link A link between two network nodes. + * \param[in] i Index of this link within the link array of a switch. + * \return NONE + */ +static inline void +print_network_link(const osm_ucast_mgr_t *, + const network_link_t *, + const uint8_t); + +/*! \fn print_network_node(const osm_ucast_mgr_t *, + * const network_node_t *, + * const uint16_t, + * const boolean_t) + * \brief Dump the information of a network node (i.e. switch) into the OpenSM + * log file, including all of its links to adjacent nodes. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] node A network node of the internal subnet representation. + * \param[in] i Index of the node within the node array of network_t. + * \param[in] print_links If TRUE, the outgoing links of the node are printed. + * \return NONE + */ +static inline void +print_network_node(const osm_ucast_mgr_t *, + const network_node_t *, + const uint16_t, + const boolean_t); + +/*! \fn print_routes(const osm_ucast_mgr_t *, + * const network_t *, + * const osm_port_t *, + * const ib_net16_t) + * \brief Dump the calclated egress port (from ucast forwarding tables) for all + * network switches to route traffic towards the destination. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] dest_port OpenSM's internal port object for the destination. + * \param[in] dlid Destination LID of the current routing step. + * \return NONE + */ +static void +print_routes(const osm_ucast_mgr_t *, + const network_t *, + const osm_port_t *, + const ib_net16_t); + +/*! \fn print_spanning_tree(const osm_ucast_mgr_t *, + * const network_t *) + * \brief Dump the calclated spanning tree, which is the basis for the escape + * paths in the current virtual layer, into the OpenSM log file. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \return NONE + */ +static void +print_spanning_tree(const osm_ucast_mgr_t *, + const network_t *); + +/*! \fn reset_ccdg_color_array(const osm_ucast_mgr_t *, + * ccdg_t *, + * const uint16_t *, + * const uint8_t, + * const uint8_t) + * \brief This fn allocates the color_array used by Nue to track cCDG subgraphs + * for the first call or just resets the color IDs/ptr in the array. + * + * \param[in] mgr The management object of OpenSM. + * \param[in,out] ccdg Nue's internal object storing the complete CDG. + * \param[in] num_destinations Array of number of destinations assigned per VL. + * \param[in] max_vl Number of virtual layers to be used by Nue. + * \param[in] max_lmc Largest LMC assigned to any subnet component. + * \return Integer 0 if sucessful, or any integer unequal to 0 otherwise. + */ +static int +reset_ccdg_color_array(const osm_ucast_mgr_t *, + ccdg_t *, + const uint16_t *, + const uint8_t, + const uint8_t); + +/*! \fn reset_ccdg_edge_color_betw_nodes(const ccdg_t *, + * const ccdg_node_t *, + * const ccdg_node_t *, + * ccdg_edge_t *) + * \brief This fn changes the color pointer of a cCDG edge into the default + * UNUSED state and sets its wet_paint flag to FALSE. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] ccdg_node1 A vertex of the cCDG. + * \param[in] ccdg_node2 An adjacent vertex in the cCDG. + * \param[in,out] ccdg_edge The edge from ccdg_node1 to ccdg_node2 vertex, or + * NULL which causes the fn to search for the edge. + * The color of this edge will be reset into UNUSED. + * \return NONE + */ +static inline void +reset_ccdg_edge_color_betw_nodes(const ccdg_t *, + const ccdg_node_t *, + const ccdg_node_t *, + ccdg_edge_t *); + +/*! \fn reset_ccdg_edge_color(const ccdg_t *, + * ccdg_edge_t *) + * \brief This fn changes the color pointer of a given cCDG edge into the + * UNUSED state and sets its wet_paint flag to FALSE. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in,out] ccdg_edge An edge of the complete channel dependency graph. + * \return NONE + */ +static inline void +reset_ccdg_edge_color(const ccdg_t *, + ccdg_edge_t *); + +/*! \fn reset_ccdg_node_color(const ccdg_t *, + * ccdg_node_t *) + * \brief This fn changes the color pointer of a given cCDG vertex into the + * UNUSED state and sets its wet_paint flag to FALSE. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in,out] ccdg_node A vertex of the complete channel dependency graph. + * \return NONE + */ +static inline void +reset_ccdg_node_color(const ccdg_t *, + ccdg_node_t *); + +/*! \fn reset_delta_for_betw_centrality(const network_t *) + * \brief The fn iterates over all network nodes and sets the delta struct + * element of network_node_t back to 0. + * + * \param[in,out] network Nue's network object storing the subnet. + * \return NONE + */ +static void +reset_delta_for_betw_centrality(const network_t *); + +/*! \fn reset_mgrp_membership(const network_t *) + * \brief Reset is_mc_member and num_of_mcm of all network nodes for future + * multicast routing computations. + * + * \param[in,out] network Nue's network object storing the subnet. + * \return NONE + */ +static void +reset_mgrp_membership(const network_t *); + +/*! \fn reset_sigma_distance_Ps_for_betw_centrality(const network_t *) + * \brief The fn iterates over all network nodes and resets three struct + * elementsof network_node_t back to 0 or INFINITY, respectively. + * + * \param[in,out] network Nue's network object storing the subnet. + * \return NONE + */ +static void +reset_sigma_distance_Ps_for_betw_centrality(const network_t *); + +/*! \fn route_via_modified_dijkstra_on_ccdg(const osm_ucast_mgr_t *, + * const network_t *, + * ccdg_t *, + * const osm_port_t *, + * const ib_net16_t, + * const int32_t, + * boolean_t *) + * \brief The fn computes shortest paths from a source to all other netw nodes + * but in the complete CDG while complying to the cycle-free constraint. + * + * Function description: A detailed description of this function can be found + * in Section 6.2.4 / Algorithm 6.2 of reference [2] (see abstract at the + * begining of this file). + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] dest_port OpenSM's internal port object for the destination. + * \param[in] dlid Destination LID of the current routing step. + * \param[in] source_color A color ID associated with the current subgraph. + * \param[out] fallback_to_escape_paths The fn sets it to TRUE if Nue routing + * encountered a unsolvable impasse. + * \return Integer 0 if modified Dijkstra's algorithm executed on the cCDG for + * a given input destination LID completed sucessfully, or any integer + * unequal to 0 otherwise. + */ +static int +route_via_modified_dijkstra_on_ccdg(const osm_ucast_mgr_t *, + const network_t *, + ccdg_t *, + const osm_port_t *, + const ib_net16_t, + const int32_t, + boolean_t *); + +/*! \fn set_ccdg_edge_into_blocked_state(const ccdg_t *, + * ccdg_edge_t *) + * \brief Change a cCDG edge to set the color ID/Ptr into the BLOCKED state, + * and hence prevent any further use since it would close a cycle. + * + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in,out] ccdg_edge An edge of the complete CDG. + * \return NONE + */ +static inline void +set_ccdg_edge_into_blocked_state(const ccdg_t *, + ccdg_edge_t *); + +/*! \fn sort_backtracking_candidates_by_distance(backtracking_candidate_t *, + * const size_t) + * \brief Qsort the backtracking candidates by the distances determined during + * Dijkstra's algo to choose 'best' possible option for a replacement. + * + * \param[in,out] backtracking_candidate_array Array to store candidates. + * \param[in] num_elem_in_array Number of candidates in array. + * \return NONE + */ +static inline void +sort_backtracking_candidates_by_distance(backtracking_candidate_t *, + const size_t); + +/*! \fn sort_ccdg_nodes_by_channel_id(const ccdg_t *) + * \brief This fn uses the stdlib to sort (via qsort) the vertex array of the + * cCDG w.r.t the channel IDs. + * + * \param[in,out] ccdg Nue's internal object storing the complete CDG. + * \return NONE + */ +static inline void +sort_ccdg_nodes_by_channel_id(const ccdg_t *); + +/*! \fn sort_destinations_by_lid(ib_net16_t *, + * const uint32_t) + * \brief This fn uses the stdlib to sort (via qsort) the destination array. + * + * \param[in,out] lid_array Array of destination LIDs within the virtual layer. + * \param[in] num_lids Size of the given array. + * \return NONE + */ +static inline void +sort_destinations_by_lid(ib_net16_t *, + const uint32_t); + +/*! \fn sort_network_nodes_by_lid(const network_t *) + * \brief This fn uses the stdlib to sort (via qsort) the network node array + * w.r.t the LIDs of the nodes. + * + * \param[in,out] Nue's network object storing the subnet. + * \return NONE + */ +static inline void +sort_network_nodes_by_lid(const network_t *); + +/*! \fn update_ccdg_heap_index(const void *, + * const size_t) + * \brief Callback fn for the cl_heap to update the heap index of a complete + * CDG vertex. + * + * \param[in,out] context A cCDG vertex (assumed input type: `ccdg_node_t'). + * \param[in] new_index The new index of the vertex w.r.t the d-ary heap. + * \return NONE + */ +static void +update_ccdg_heap_index(const void *, + const size_t); + +/*! \fn update_dlid_to_vl_mapping(uint8_t *, + * const ib_net16_t, + * const uint8_t) + * \brief Store a virtual layer for a destination LID for later requests via + * the path_sl interface function. + * + * \param[in,out] dlid_to_vl_mapping Array of virtual layer assignments. + * \param[in] dlid Destination LID for current routing step. + * \param[in] virtual_layer Virtual layer all paths towards dlid use. + * \return NONE + */ +static inline void +update_dlid_to_vl_mapping(uint8_t *, + const ib_net16_t, + const uint8_t); + +/*! \fn update_linear_forwarding_tables(const osm_ucast_mgr_t *, + * const network_t *, + * const osm_port_t *, + * const ib_net16_t) + * \brief Update the ucast linear forwarding tables of all switches towards a + * given destination LID based on the calculated paths. + * + * \param[in] mgr The management object of OpenSM. + * \param[in,out] network Nue's network object storing the subnet. + * \param[in] dest_port OpenSM's internal port object for the destination. + * \param[in] dlid Destination LID for current routing step. + * \return NONE + */ +static void +update_linear_forwarding_tables(const osm_ucast_mgr_t *, + const network_t *, + const osm_port_t *, + const ib_net16_t); + +/*! \fn update_mcast_forwarding_tables(const osm_ucast_mgr_t *, + * const network_t *, + * const uint16_t, + * const cl_qmap_t *, + * const network_node_t *) + * \brief Update the mcast linear forwarding tables of switches participating + * in the mcast for a given multicast LID based on the calculated paths. + * + * \param[in] mgr The management object of OpenSM. + * \param[in,out] network Nue's network object storing the subnet. + * \param[in] mlid_ho The multicast LID in host order. + * \param[in] port_map All subnet ports participating in the multicast. + * \param[in] root_node Network node acting as root for the spanning tree. + * \return NONE + */ +static void +update_mcast_forwarding_tables(const osm_ucast_mgr_t *, + const network_t *, + const uint16_t, + const cl_qmap_t *, + const network_node_t *); + +/*! \fn update_mgrp_membership(cl_qlist_t *) + * \brief The fn updates the multicast group membership information to identify + * whether a switch needs to be processed or not in later iterations. + * + * \param[in,out] port_list OpenSM's internal object storing a 'list' of ports. + * \return NONE + */ +static void +update_mgrp_membership(cl_qlist_t *); + +/*! \fn update_netw_heap_index(const void *, + * const size_t) + * \brief Callback fn for the cl_heap to update the heap index of a network + * node. + * + * \param[in,out] context A netw node (assumed input type: `network_node_t'). + * \param[in] new_index The new index of the node w.r.t the d-ary heap. + * \return NONE + */ +static void +update_netw_heap_index(const void *, + const size_t); + +/*! \fn update_network_link_weights(const osm_ucast_mgr_t *, + * const network_t *, + * const ib_net16_t) + * \brief Update the edge weights along the path towards to the destination of + * the current routing step. + * + * \param[in] mgr The management object of OpenSM. + * \param[in,out] network Nue's network object storing the subnet. + * \param[in] desti LID of the switch adjacent to the real destination, + * if it's a terminal, or LID of the destination switch. + * \return NONE + */ +static void +update_network_link_weights(const osm_ucast_mgr_t *, + const network_t *, + const ib_net16_t); + +/*! \fn use_escape_paths_to_solve_impass(const osm_ucast_mgr_t *, + * const network_t *, + * const osm_port_t *, + * const ib_net16_t) + * \brief In the rare event of an unsolvable routing impass, this fn sets the + * 'calculated' routes for a destination to pre-determined escape path. + * + * \param[in] mgr The management object of OpenSM. + * \param[in,out] network Nue's network object storing the subnet. + * \param[in] dest_port OpenSM's internal port object for the destination. + * \param[in] dlid Destination LID of the current routing step. + * \return NONE + */ +static void +use_escape_paths_to_solve_impass(const osm_ucast_mgr_t *, + const network_t *, + const osm_port_t *, + const ib_net16_t); + +/*! \fn using_edge_induces_cycle_in_ccdg(const osm_ucast_mgr_t *, + * const ccdg_t *, + * const ccdg_node_t *, + * ccdg_edge_t *, + * const int32_t) + * \brief Verify that using a cCDG edge as part of a route does not induce any + * cycles in the complete CDG in combination with existing paths. + * + * Function description: A detailed description of this function can be found + * in Section 6.2.6.1 / Algorithm 6.4 of reference [2] (see abstract at the + * begining of this file). + * + * \param[in] mgr The management object of OpenSM. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in] head The head side (cCDG vertex) of the ccdg_edge. + * \param[in,out] ccdg_edge An edge of the cCDG, which color may be adjusted. + * \param[in] color Color ID of current iteration to prevent cycles. + * \return TRUE if adding the given cCDG edge would close a cycle in the + * uniquely colored subgraph of the cCDG, or FALSE otherwise. + */ +static boolean_t +using_edge_induces_cycle_in_ccdg(const osm_ucast_mgr_t *, + const ccdg_t *, + const ccdg_node_t *, + ccdg_edge_t *, + const int32_t); + +#if defined (_DEBUG_) +/*! \fn deep_cpy_ccdg(const osm_ucast_mgr_t *, + * const ccdg_t *, + * ccdg_t *) + * \brief Allocate struct elements or reset (based on out_ccdg input status) + * a verification cCDG and perform a deep copy of the cCDG escape paths. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] in_ccdg Nue's internal object storing the complete CDG. + * \param[in,out] out_ccdg A 1-to-1 deep copy of Nue's internal cCDG object. + * \return TRUE if copying the cCDG was successful, or FALSE otherwise. + */ +static boolean_t +deep_cpy_ccdg(const osm_ucast_mgr_t *, + const ccdg_t *, + ccdg_t *); + +/*! \fn is_channel_id_in_verify_ccdg_node_list(const ccdg_t *, + * const channel_t *, + * ccdg_node_t **) + * \brief This fn iterates over the cCDG vertex array to search for a given + * channel ID and returns a pointer to the vertex if found, or NULL. + * + * \param[in] ccdg A 1-to-1 deep copy of Nue's internal cCDG object. + * \param[in] channel_id Channel ID bit field identifying parts of a link. + * \param[out] out_ccdg_node Pointer to a vertex of a (copied) complete CDG. + * \return TRUE if a given channel ID is part of the cCDG vertex list, or FALSE + * otherwise. + */ +static boolean_t +is_channel_id_in_verify_ccdg_node_list(const ccdg_t *, + const channel_t *, + ccdg_node_t **); + +/*! \fn is_channel_id_in_verify_ccdg_edge_list(const ccdg_node_t *, + * const channel_t *, + * ccdg_edge_t **) + * \brief This fn iterates over the edge list of a cCDG vertex to search/verify + * a given channel ID / neighbor vertex and returns the edge, or NULL. + * + * \param[in] ccdg_node A vertex of a (copied) complete CDG. + * \param[in] channel_id Channel ID bit field identifying parts of a link. + * \param[out] out_ccdg_edge Pointer to an edge of a (copied) complete CDG. + * \return TRUE if a given channel ID is part of the cCDG edge list, or FALSE + * otherwise. + */ +static boolean_t +is_channel_id_in_verify_ccdg_edge_list(const ccdg_node_t *, + const channel_t *, + ccdg_edge_t **); + +/*! \fn add_paths_to_verify_ccdg(const osm_ucast_mgr_t *, + * const network_t *, + * const ib_net16_t, + * const ccdg_t *, + * ccdg_t *, + * const boolean_t) + * \brief The fn traverses the network from all switches towards the routing + * desti and adds the channel dependencies to the verification cCDG. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] network Nue's network object storing the subnet. + * \param[in] desti The destination LID of the current routing step. + * \param[in] ccdg Nue's internal object storing the complete CDG. + * \param[in,out] verify_ccdg A copy of the cCDG for validation purposes. + * \param[in] fallback_to_escape_paths Whether or not the real routing ended up + * in an unsolvable impasse for the desti. + * \return TRUE if a set of newly calculated routes are added to the cCDG + * successfully, or FALSE otherwise. + */ +static boolean_t +add_paths_to_verify_ccdg(const osm_ucast_mgr_t *, + const network_t *, + const ib_net16_t, + const ccdg_t *, + ccdg_t *, + const boolean_t); + +/*! \fn is_ccdg_cycle_free(const osm_ucast_mgr_t *, + * const ccdg_t *) + * \brief Perform a search for cycles in the verification (c)CDG which would + * indicate a broken, not deadlock-free Nue routing. + * + * Function description: This fn is a 'naive' implementaton to thoroughly check + * the CDG for the absence of cycles, and hence it is SLOW and shouldn't be + * called too often or for gigantic topologies. + * + * \param[in] mgr The management object of OpenSM. + * \param[in] ccdg A copy of the cCDG for validation purposes. + * \return TRUE if the verification CDG is cycle-free, or FALSE otherwise. + */ +static boolean_t +is_ccdg_cycle_free(const osm_ucast_mgr_t *, + const ccdg_t *); +#endif + +/********************************************************************** + **********************************************************************/ + +/************ construct/init/destroy functions for structs ************ + **********************************************************************/ +static inline void construct_network_link(network_link_t * link) +{ + CL_ASSERT(link); + memset(link, 0, sizeof(network_link_t)); +} + +static inline void init_network_link(network_link_t * link, + const ib_net16_t loc_lid, + const uint8_t loc_port, + const ib_net16_t rem_lid, + const uint8_t rem_port, + const uint64_t weight) +{ + CL_ASSERT(link); + + link->link_info.local_lid = loc_lid; + link->link_info.local_port = loc_port; + link->link_info.remote_lid = rem_lid; + link->link_info.remote_port = rem_port; + link->weight = weight; +} + +static inline void construct_network_node(network_node_t * node) +{ + CL_ASSERT(node); + memset(node, 0, sizeof(network_node_t)); +} + +static inline int init_network_node(network_node_t * node, + const ib_net16_t lid, + const ib_net64_t guid, + const uint8_t num_base_lids, + const uint8_t num_lids, + const uint8_t num_links, + network_link_t * links, osm_switch_t * sw) +{ + CL_ASSERT(node); + + node->lid = lid; + node->guid = guid; + node->num_base_terminals = num_base_lids; + node->num_terminals = num_lids; + node->num_links = num_links; + node->links = links; + node->stack_used_links = + (network_link_t **) malloc(num_links * sizeof(network_link_t *)); + if (num_links && !node->stack_used_links) + return -1; + node->sw = sw; + node->dropped = FALSE; + + return 0; +} + +static inline void destroy_network_node(network_node_t * node) +{ + CL_ASSERT(node); + + if (node->links) { + free(node->links); + node->links = NULL; + } + if (node->stack_used_links) { + free(node->stack_used_links); + node->stack_used_links = NULL; + } + if (node->Ps) { + free(node->Ps); + node->Ps = NULL; + } +} + +static inline void construct_network(network_t * network) +{ + CL_ASSERT(network); + memset(network, 0, sizeof(network_t)); + cl_heap_construct(&(network->heap)); +} + +static inline void destroy_network(network_t * network) +{ + network_node_t *netw_node_iter = NULL; + uint32_t i = 0; + + CL_ASSERT(network); + + if (network->nodes) { + for (i = 0, netw_node_iter = network->nodes; + i < network->num_nodes; i++, netw_node_iter++) { + destroy_network_node(netw_node_iter); + } + free(network->nodes); + network->nodes = NULL; + } + if (cl_is_heap_inited(&(network->heap))) + cl_heap_destroy(&(network->heap)); +} + +static inline void construct_ccdg_edge(ccdg_edge_t * edge) +{ + CL_ASSERT(edge); + memset(edge, 0, sizeof(ccdg_edge_t)); +} + +static inline void init_ccdg_edge(ccdg_edge_t * edge, const channel_t to_cid) +{ + CL_ASSERT(edge); + edge->to_channel_id = to_cid; +} + +static inline void construct_ccdg_node(ccdg_node_t * node) +{ + CL_ASSERT(node); + memset(node, 0, sizeof(ccdg_node_t)); +} + +static inline void init_ccdg_node(ccdg_node_t * node, + const channel_t channel_id, + const uint8_t num_edges, ccdg_edge_t * edges, + network_link_t * corresponding_netw_link) +{ + CL_ASSERT(node); + + node->channel_id = channel_id; + node->num_edges = num_edges; + node->edges = edges; + node->corresponding_netw_link = corresponding_netw_link; + node->status = WHITE; + node->next_edge_idx = 0; + node->pre = NULL; +} + +static inline void destroy_ccdg_node(ccdg_node_t * node) +{ + CL_ASSERT(node); + if (node->edges) { + free(node->edges); + node->edges = NULL; + } +} + +static inline void construct_ccdg(ccdg_t * ccdg) +{ + CL_ASSERT(ccdg); + memset(ccdg, 0, sizeof(ccdg_t)); + cl_heap_construct(&(ccdg->heap)); +} + +static inline void destroy_ccdg(ccdg_t * ccdg) +{ + ccdg_node_t *ccdg_node_iter = NULL; + uint32_t i = 0; + + CL_ASSERT(ccdg); + + if (ccdg->nodes) { + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) { + destroy_ccdg_node(ccdg_node_iter); + } + free(ccdg->nodes); + ccdg->nodes = NULL; + } + if (ccdg->color_array) { + free(ccdg->color_array); + ccdg->color_array = NULL; + } + if (cl_is_heap_inited(&(ccdg->heap))) + cl_heap_destroy(&(ccdg->heap)); +} + +#if defined (ENABLE_METIS_FOR_NUE) +static inline void construct_metis_context(metis_context_t * metis_ctx) +{ + CL_ASSERT(metis_ctx); + memset(metis_ctx, 0, sizeof(metis_context_t)); +} + +static inline void init_metis_context(metis_context_t * metis_ctx, + const idx_t nvtxs, const idx_t nparts, + const idx_t seed, const idx_t numbering) +{ + CL_ASSERT(metis_ctx); + + *(metis_ctx->nvtxs) = nvtxs; + *(metis_ctx->ncon) = 1; + *(metis_ctx->nparts) = nparts; + + METIS_SetDefaultOptions(metis_ctx->options); + metis_ctx->options[METIS_OPTION_SEED] = seed; + metis_ctx->options[METIS_OPTION_NUMBERING] = numbering; +} + +static inline void destroy_metis_context(metis_context_t * metis_ctx) +{ + CL_ASSERT(metis_ctx); + + if (metis_ctx->xadj) { + free(metis_ctx->xadj); + metis_ctx->xadj = NULL; + } + if (metis_ctx->adjncy) { + free(metis_ctx->adjncy); + metis_ctx->adjncy = NULL; + } + if (metis_ctx->part) { + free(metis_ctx->part); + metis_ctx->part = NULL; + } +} +#endif + +/********************************************************************** + **********************************************************************/ + +/******** helper functions to sort/access destinations by lid ********* + **********************************************************************/ +static int compare_lids(const void *l1, const void *l2) +{ + ib_net16_t *lid1 = (ib_net16_t *) l1; + ib_net16_t *lid2 = (ib_net16_t *) l2; + + CL_ASSERT(lid1 && lid2); + + if (*lid1 < *lid2) + return -1; + else if (*lid1 > *lid2) + return 1; + else + return 0; +} + +/* use stdlib to sort the lid array */ +static inline void sort_destinations_by_lid(ib_net16_t * lid_array, + const uint32_t num_lids) +{ + CL_ASSERT(lid_array); + qsort(lid_array, num_lids, sizeof(ib_net16_t), compare_lids); +} + +/* use stdlib to find (binary search) a lid in a sorted array */ +static inline ib_net16_t *get_lid(const ib_net16_t * lid_array, + const uint32_t num_lids, const ib_net16_t lid) +{ + CL_ASSERT(lid_array); + return bsearch(&lid, lid_array, num_lids, sizeof(ib_net16_t), + compare_lids); +} + +/********************************************************************** + **********************************************************************/ + +/******** helper functions to sort/access network nodes by lids ******* + **********************************************************************/ +static int compare_network_nodes_by_lid(const void *n1, const void *n2) +{ + network_node_t *node1 = (network_node_t *) n1; + network_node_t *node2 = (network_node_t *) n2; + + CL_ASSERT(node1 && node2); + return compare_lids((void *)&(node1->lid), (void *)&(node2->lid)); +} + +/* use stdlib to sort the node array */ +static inline void sort_network_nodes_by_lid(const network_t * network) +{ + CL_ASSERT(network); + qsort(network->nodes, network->num_nodes, sizeof(network_node_t), + compare_network_nodes_by_lid); +} + +/* use stdlib to find (binary search) a node in a sorted array */ +static inline network_node_t *get_network_node_by_lid(const network_t * network, + const ib_net16_t lid) +{ + network_node_t key; + + CL_ASSERT(network); + + construct_network_node(&key); + key.lid = lid; + return bsearch(&key, network->nodes, network->num_nodes, + sizeof(network_node_t), compare_network_nodes_by_lid); +} + +/********************************************************************** + **********************************************************************/ + +/****** helper functions to sort/access ccdg nodes by channel_id ****** + **********************************************************************/ +static inline channel_t get_inverted_channel_id(const channel_t in_channel) +{ + channel_t out_channel; + + out_channel.local_lid = in_channel.remote_lid; + out_channel.local_port = in_channel.remote_port; + out_channel.remote_lid = in_channel.local_lid; + out_channel.remote_port = in_channel.local_port; + + return out_channel; +} + +static int compare_two_channel_id(const void *c1, const void *c2) +{ + channel_t *channel_id1 = (channel_t *) c1; + channel_t *channel_id2 = (channel_t *) c2; + uint64_t key1 = 0, key2 = 0; + ib_net16_t l_lid_1 = 0, l_lid_2 = 0, r_lid_1 = 0, r_lid_2 = 0; + uint8_t l_port_1 = 0, l_port_2 = 0, r_port_1 = 0, r_port_2 = 0; + + CL_ASSERT(channel_id1 && channel_id2); + + l_lid_1 = (ib_net16_t) channel_id1->local_lid; + l_port_1 = (uint8_t) channel_id1->local_port; + r_lid_1 = (ib_net16_t) channel_id1->remote_lid; + r_port_1 = (uint8_t) channel_id1->remote_port; + + l_lid_2 = (ib_net16_t) channel_id2->local_lid; + l_port_2 = (uint8_t) channel_id2->local_port; + r_lid_2 = (ib_net16_t) channel_id2->remote_lid; + r_port_2 = (uint8_t) channel_id2->remote_port; + + key1 = + (((uint64_t) l_lid_1) << 48) + (((uint64_t) l_port_1) << 32) + + (((uint64_t) r_lid_1) << 16) + ((uint64_t) r_port_1); + + key2 = + (((uint64_t) l_lid_2) << 48) + (((uint64_t) l_port_2) << 32) + + (((uint64_t) r_lid_2) << 16) + ((uint64_t) r_port_2); + + if (key1 < key2) + return -1; + else if (key1 > key2) + return 1; + else + return 0; +} + +static inline int compare_ccdg_nodes_by_channel_id(const void *cn1, + const void *cn2) +{ + ccdg_node_t *ccdg_node1 = (ccdg_node_t *) cn1; + ccdg_node_t *ccdg_node2 = (ccdg_node_t *) cn2; + + CL_ASSERT(ccdg_node1 && ccdg_node2); + return compare_two_channel_id(&(ccdg_node1->channel_id), + &(ccdg_node2->channel_id)); +} + +/* use stdlib to sort the node array w.r.t the channel id */ +static inline void sort_ccdg_nodes_by_channel_id(const ccdg_t * ccdg) +{ + CL_ASSERT(ccdg); + qsort(ccdg->nodes, ccdg->num_nodes, sizeof(ccdg_node_t), + compare_ccdg_nodes_by_channel_id); +} + +/* use stdlib to find (binary search) a node in a sorted array */ +static inline ccdg_node_t *get_ccdg_node_by_channel_id(const ccdg_t * ccdg, + const channel_t c_id) +{ + ccdg_node_t key; + + CL_ASSERT(ccdg); + + construct_ccdg_node(&key); + key.channel_id = c_id; + return bsearch(&key, ccdg->nodes, ccdg->num_nodes, sizeof(ccdg_node_t), + compare_ccdg_nodes_by_channel_id); +} + +/********************************************************************** + **********************************************************************/ + +/***************** helper function to access ccdg edges *************** + **********************************************************************/ +static ccdg_edge_t *get_ccdg_edge_betw_nodes(const ccdg_node_t * ccdg_node1, + const ccdg_node_t * ccdg_node2) +{ + ccdg_edge_t *ccdg_edge_iter = NULL; + uint8_t i = 0; + + CL_ASSERT(ccdg_node1 && ccdg_node2); + + for (i = 0, ccdg_edge_iter = ccdg_node1->edges; + i < ccdg_node1->num_edges; i++, ccdg_edge_iter++) { + if (ccdg_edge_iter->to_ccdg_node == ccdg_node2) + return ccdg_edge_iter; + } + return NULL; +} + +/********************************************************************** + **********************************************************************/ + +/******* helper functions to compare and sort candidate channels******* + **** for local backtracking in case of a temporary routing impasse ***/ +static inline int compare_backtracking_candidates_by_distance(const void *btc1, + const void *btc2) +{ + backtracking_candidate_t *backtracking_candidate1 = + (backtracking_candidate_t *) btc1; + backtracking_candidate_t *backtracking_candidate2 = + (backtracking_candidate_t *) btc2; + ccdg_node_t *ccdg_node1 = NULL, *ccdg_node2 = NULL; + + CL_ASSERT(backtracking_candidate1 && backtracking_candidate2); + + ccdg_node1 = + backtracking_candidate1->orig_used_ccdg_node_for_adj_netw_node; + ccdg_node2 = + backtracking_candidate2->orig_used_ccdg_node_for_adj_netw_node; + CL_ASSERT(ccdg_node1 && ccdg_node2); + + if (ccdg_node1->distance < ccdg_node2->distance) + return -1; + else if (ccdg_node1->distance > ccdg_node2->distance) + return 1; + else + return 0; +} + +/* use stdlib to sort the node array w.r.t the channel distance (dijkstra) */ +static inline void +sort_backtracking_candidates_by_distance(backtracking_candidate_t * + backtracking_candidate_array, + const size_t num_elem_in_array) +{ + CL_ASSERT(backtracking_candidate_array); + qsort(backtracking_candidate_array, num_elem_in_array, + sizeof(backtracking_candidate_t), + compare_backtracking_candidates_by_distance); +} + +/********************************************************************** + **********************************************************************/ + +/****** helper functions to manage disjoint subgraphs of the ccdg ***** + ******* init colors, set/get routines, merge of subgraphs, etc. ******/ +static inline void init_ccdg_node_color(const ccdg_t * ccdg, + ccdg_node_t * ccdg_node) +{ + CL_ASSERT(ccdg && ccdg->color_array && ccdg_node); + ccdg_node->color = &(ccdg->color_array[UNUSED]); + ccdg_node->wet_paint = FALSE; +} + +static inline void init_ccdg_edge_color(const ccdg_t * ccdg, + ccdg_edge_t * ccdg_edge) +{ + CL_ASSERT(ccdg && ccdg->color_array && ccdg_edge); + ccdg_edge->color = &(ccdg->color_array[UNUSED]); + ccdg_edge->wet_paint = FALSE; +} + +static void init_ccdg_colors(const ccdg_t * ccdg) +{ + ccdg_node_t *ccdg_node_iter = NULL; + ccdg_edge_t *ccdg_edge_iter = NULL; + uint32_t i = 0, j = 0; + + CL_ASSERT(ccdg); + + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) { + init_ccdg_node_color(ccdg, ccdg_node_iter); + for (j = 0, ccdg_edge_iter = ccdg_node_iter->edges; + j < ccdg_node_iter->num_edges; j++, ccdg_edge_iter++) { + init_ccdg_edge_color(ccdg, ccdg_edge_iter); + } + } +} + +static int reset_ccdg_color_array(const osm_ucast_mgr_t * mgr, ccdg_t * ccdg, + const uint16_t * num_destinations, + const uint8_t max_vl, const uint8_t max_lmc) +{ + int32_t max_destinations = 1; + uint8_t vl = 0; + color_t *color_iter = NULL; + uint16_t i = 0; + + CL_ASSERT(mgr && ccdg && num_destinations); + + if (!ccdg->color_array) { + for (vl = 0; vl < max_vl; vl++) { + if (num_destinations[vl] > max_destinations) + max_destinations = num_destinations[vl]; + } + /* worst case: multiple routing steps per base lid (lmc>0) */ + /* 1 color for each destination for the cCDG color coding */ + max_destinations *= (1 << max_lmc); + /* plus 3 colors for statuses: blocked, unused, escape paths */ + max_destinations += 3; + ccdg->color_array = + (color_t *) malloc(max_destinations * sizeof(color_t)); + if (!ccdg->color_array) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE19: cannot allocate memory for ccdg color array\n"); + return -1; + } + ccdg->num_colors = max_destinations; + } + + memset(ccdg->color_array, 0, ccdg->num_colors * sizeof(color_t)); + for (i = 0, color_iter = ccdg->color_array; i < ccdg->num_colors; + i++, color_iter++) { + color_iter->color_id = i; + color_iter->real_col = color_iter; + } + + return 0; +} + +static inline void init_ccdg_escape_path_node_color(const ccdg_t * ccdg, + ccdg_node_t * ccdg_node) +{ + CL_ASSERT(ccdg && ccdg->color_array && ccdg_node); + ccdg_node->color = &(ccdg->color_array[ESCAPEPATHCOLOR]); +} + +static inline void init_ccdg_escape_path_edge_color_betw_nodes(const ccdg_t * + ccdg, + const ccdg_node_t + * ccdg_node1, + const ccdg_node_t + * ccdg_node2) +{ + ccdg_edge_t *ccdg_edge = NULL; + + CL_ASSERT(ccdg && ccdg->color_array && ccdg_node1 && ccdg_node2); + ccdg_edge = get_ccdg_edge_betw_nodes(ccdg_node1, ccdg_node2); + CL_ASSERT(ccdg_edge); + ccdg_edge->color = &(ccdg->color_array[ESCAPEPATHCOLOR]); +} + +static inline void set_ccdg_edge_into_blocked_state(const ccdg_t * ccdg, + ccdg_edge_t * ccdg_edge) +{ + CL_ASSERT(ccdg && ccdg_edge); + ccdg_edge->color = &(ccdg->color_array[BLOCKED]); +} + +static inline uint16_t get_ccdg_node_color(const ccdg_t * ccdg, + const ccdg_node_t * ccdg_node) +{ + CL_ASSERT(ccdg && ccdg_node && ccdg_node->color); + return ccdg_node->color->real_col->color_id; +} + +static inline uint16_t get_ccdg_edge_color(const ccdg_t * ccdg, + const ccdg_edge_t * ccdg_edge) +{ + CL_ASSERT(ccdg_edge && ccdg_edge->color); + return ccdg_edge->color->real_col->color_id; +} + +static inline uint16_t get_ccdg_edge_color_betw_nodes(const ccdg_t * ccdg, + const ccdg_node_t * + ccdg_node1, + const ccdg_node_t * + ccdg_node2, + ccdg_edge_t * ccdg_edge) +{ + CL_ASSERT(ccdg && ccdg_node1 && ccdg_node2); + + if (!ccdg_edge) { + ccdg_edge = get_ccdg_edge_betw_nodes(ccdg_node1, ccdg_node2); + } + CL_ASSERT(ccdg_edge && ccdg_edge->to_ccdg_node == ccdg_node2); + return get_ccdg_edge_color(ccdg, ccdg_edge); +} + +// only allowed to be called from specific fake channels in the init phase of dijk +static inline void change_fake_ccdg_node_color(const ccdg_t * ccdg, + ccdg_node_t * ccdg_node, + const int32_t color) +{ + CL_ASSERT(ccdg && ccdg->color_array && ccdg_node); + CL_ASSERT(ccdg_node->channel_id.local_lid == + ccdg_node->channel_id.remote_lid + && (0 == + (ccdg_node->channel_id.local_port | ccdg_node->channel_id. + remote_port))); + + if (get_ccdg_node_color(ccdg, ccdg_node) > UNUSED) { + ccdg_node->color->real_col = &(ccdg->color_array[color]); + } else { + ccdg_node->color = &(ccdg->color_array[color]); + } +} + +static inline void reset_ccdg_node_color(const ccdg_t * ccdg, + ccdg_node_t * ccdg_node) +{ + CL_ASSERT(ccdg && ccdg_node && ccdg_node->color); + + if (ccdg_node->wet_paint) { + ccdg_node->color = &(ccdg->color_array[UNUSED]); + ccdg_node->wet_paint = FALSE; + } +} + +static inline void reset_ccdg_edge_color(const ccdg_t * ccdg, + ccdg_edge_t * ccdg_edge) +{ + CL_ASSERT(ccdg && ccdg_edge && ccdg_edge->color); + if (ccdg_edge->wet_paint) { + CL_ASSERT(BLOCKED != get_ccdg_edge_color(ccdg, ccdg_edge)); + ccdg_edge->color = &(ccdg->color_array[UNUSED]); + ccdg_edge->wet_paint = FALSE; + } +} + +static inline void reset_ccdg_edge_color_betw_nodes(const ccdg_t * ccdg, + const ccdg_node_t * + ccdg_node1, + const ccdg_node_t * + ccdg_node2, + ccdg_edge_t * ccdg_edge) +{ + CL_ASSERT(ccdg && ccdg_node1 && ccdg_node2); + + if (!ccdg_edge) { + ccdg_edge = get_ccdg_edge_betw_nodes(ccdg_node1, ccdg_node2); + } + CL_ASSERT(ccdg_edge && ccdg_edge->to_ccdg_node == ccdg_node2); + reset_ccdg_edge_color(ccdg, ccdg_edge); +} + +static inline void add_ccdg_edge_betw_nodes_to_colored_subccdg(const ccdg_t * + ccdg, + const ccdg_node_t + * ccdg_node1, + const ccdg_node_t + * ccdg_node2, + ccdg_edge_t * + ccdg_edge) +{ + CL_ASSERT(ccdg && ccdg_node1 && ccdg_node2); + CL_ASSERT(get_ccdg_node_color(ccdg, ccdg_node1) == + get_ccdg_node_color(ccdg, ccdg_node2)); + + if (!ccdg_edge) { + ccdg_edge = get_ccdg_edge_betw_nodes(ccdg_node1, ccdg_node2); + } + CL_ASSERT(ccdg_edge && ccdg_edge->to_ccdg_node == ccdg_node2 + && UNUSED == get_ccdg_edge_color(ccdg, ccdg_edge)); + ccdg_edge->color = ccdg_node1->color; + ccdg_edge->wet_paint = TRUE; +} + +static inline void add_ccdg_node_to_colored_subccdg(const ccdg_t * ccdg, + const ccdg_node_t * + ccdg_node1, + ccdg_node_t * ccdg_node2) +{ + + CL_ASSERT(ccdg && ccdg_node1 && ccdg_node2); + CL_ASSERT(get_ccdg_node_color(ccdg, ccdg_node1) > UNUSED + && get_ccdg_node_color(ccdg, ccdg_node2) == UNUSED); + + ccdg_node2->color = ccdg_node1->color; + ccdg_node2->wet_paint = TRUE; + add_ccdg_edge_betw_nodes_to_colored_subccdg(ccdg, ccdg_node1, + ccdg_node2, NULL); +} + +static inline void merge_two_colored_subccdg_by_nodes(const ccdg_t * ccdg, + const ccdg_node_t * + ccdg_node1, + ccdg_node_t * ccdg_node2) +{ + CL_ASSERT(ccdg && ccdg_node1 && ccdg_node2); + CL_ASSERT(get_ccdg_node_color(ccdg, ccdg_node1) > UNUSED + && get_ccdg_node_color(ccdg, ccdg_node2) > UNUSED + && get_ccdg_node_color(ccdg, + ccdg_node1) > get_ccdg_node_color(ccdg, + ccdg_node2)); + + ccdg_node2->color->real_col = ccdg_node1->color; + add_ccdg_edge_betw_nodes_to_colored_subccdg(ccdg, ccdg_node1, + ccdg_node2, NULL); +} + +static inline void dry_ccdg_node_color(ccdg_node_t * ccdg_node) +{ + CL_ASSERT(ccdg_node); + if (ccdg_node->wet_paint) + ccdg_node->wet_paint = FALSE; +} + +static inline void dry_ccdg_edge_color_betw_nodes(const ccdg_t * ccdg, + const ccdg_node_t * + ccdg_node1, + const ccdg_node_t * + ccdg_node2) +{ + ccdg_edge_t *ccdg_edge = NULL; + + CL_ASSERT(ccdg && ccdg_node1 && ccdg_node2); + CL_ASSERT(get_ccdg_node_color(ccdg, ccdg_node1) == + get_ccdg_node_color(ccdg, ccdg_node2)); + + ccdg_edge = get_ccdg_edge_betw_nodes(ccdg_node1, ccdg_node2); + /* the following assertion (color equivalence) doesn't hold if we had a + reset to escap paths in combination with the fake channels -> filter + */ + CL_ASSERT(ccdg_edge + && (get_ccdg_node_color(ccdg, ccdg_node1) == + get_ccdg_edge_color(ccdg, ccdg_edge) + || 0 == + (ccdg_node1->channel_id.local_port | ccdg_node1-> + channel_id.remote_port))); + if (ccdg_edge->wet_paint) + ccdg_edge->wet_paint = FALSE; +} + +static inline void fix_ccdg_node_color(ccdg_node_t * ccdg_node) +{ + CL_ASSERT(ccdg_node && ccdg_node->color); + if (ccdg_node->color->real_col != ccdg_node->color) + ccdg_node->color = ccdg_node->color->real_col; +} + +static inline void fix_ccdg_edge_color(ccdg_edge_t * ccdg_edge) +{ + CL_ASSERT(ccdg_edge && ccdg_edge->color); + if (ccdg_edge->color->real_col != ccdg_edge->color) + ccdg_edge->color = ccdg_edge->color->real_col; +} + +static void fix_ccdg_colors(const osm_ucast_mgr_t * mgr, + const network_t * network, + const network_node_t * source_netw_node, + const ccdg_t * ccdg, + const ccdg_node_t * source_ccdg_node) +{ + network_node_t *network_node = NULL, *netw_node_iter = NULL; + ccdg_node_t *ccdg_node = NULL, *pre_ccdg_node = NULL; + ccdg_node_t *ccdg_node_iter = NULL; + ccdg_edge_t *ccdg_edge_iter = NULL; + uint16_t i = 0; + uint32_t j = 0; + uint8_t k = 0; + + CL_ASSERT(mgr && network && source_netw_node && ccdg + && source_ccdg_node); + + /* dry all colors of ccdg nodes/edges which are actually used */ + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + if (!netw_node_iter->used_link) { + CL_ASSERT(netw_node_iter == source_netw_node); + continue; + } + ccdg_node = netw_node_iter->used_link->corresponding_ccdg_node; + CL_ASSERT(ccdg_node); + network_node = + get_network_node_by_lid(network, + ccdg_node->channel_id.local_lid); + CL_ASSERT(network_node); + + dry_ccdg_node_color(ccdg_node); + if (network_node == source_netw_node) { + dry_ccdg_edge_color_betw_nodes(ccdg, source_ccdg_node, + ccdg_node); + } else { + CL_ASSERT(network_node->used_link); + pre_ccdg_node = + network_node->used_link->corresponding_ccdg_node; + CL_ASSERT(pre_ccdg_node); + dry_ccdg_edge_color_betw_nodes(ccdg, pre_ccdg_node, + ccdg_node); + } + } + + /* everything else which is still wet now can be reset and afterwards + we simply fix the colors, meaning subgraph merges are made official + */ + for (j = 0, ccdg_node_iter = ccdg->nodes; j < ccdg->num_nodes; + j++, ccdg_node_iter++) { + reset_ccdg_node_color(ccdg, ccdg_node_iter); + for (k = 0, ccdg_edge_iter = ccdg_node_iter->edges; + k < ccdg_node_iter->num_edges; k++, ccdg_edge_iter++) { + reset_ccdg_edge_color_betw_nodes(ccdg, ccdg_node_iter, + ccdg_edge_iter-> + to_ccdg_node, + ccdg_edge_iter); + fix_ccdg_edge_color(ccdg_edge_iter); + } + fix_ccdg_node_color(ccdg_node_iter); + } +} + +/********************************************************************** + **********************************************************************/ + +/****** debugging functions to print the network or complete cdg ****** + ************* or whatever else we need to print **********************/ +static inline void print_network_link(const osm_ucast_mgr_t * mgr, + const network_link_t * link, + const uint8_t i) +{ + network_node_t *adj_node = NULL; + + CL_ASSERT(mgr && link); + + adj_node = link->to_network_node; + if (adj_node) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + " link[%" PRIu8 "][name, lid, guid]" " = [%s, %" PRIu16 + ", 0x%016" PRIx64 "]\n", i, + adj_node->sw->p_node->print_desc, + cl_ntoh16(adj_node->lid), cl_ntoh64(adj_node->guid)); + } +} + +static inline void print_network_node(const osm_ucast_mgr_t * mgr, + const network_node_t * node, + const uint16_t i, + const boolean_t print_links) +{ + channel_t *channel_id = NULL; + uint8_t j = 0; + + CL_ASSERT(mgr && node); + + OSM_LOG(mgr->p_log, OSM_LOG_INFO, "node[%" PRIu32 "] at %p:\n", i, + node); + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + " [name, lid, guid, num_terminals, switch_pointer]" " = [%s, %" + PRIu16 ", 0x%016" PRIx64 ", %" PRIu8 ", %p]\n", + node->sw->p_node->print_desc, cl_ntoh16(node->lid), + cl_ntoh64(node->guid), node->num_terminals, node->sw); + if (print_links) { + for (j = 0; j < node->num_links; j++) + print_network_link(mgr, &(node->links[j]), j); + } + if (print_links && node->escape_path) { + channel_id = + (channel_t *) & (node->escape_path-> + corresponding_ccdg_node->channel_id); + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + " [escape_path] = [(%" PRIu16 ",%" PRIu8 ")->(%" PRIu16 + ",%" PRIu8 ")]\n", cl_ntoh16(channel_id->local_lid), + channel_id->local_port, + cl_ntoh16(channel_id->remote_lid), + channel_id->remote_port); + } +} + +static void print_network(const osm_ucast_mgr_t * mgr, + const network_t * network) +{ + uint32_t i = 0; + network_node_t *netw_node_iter = NULL; + + CL_ASSERT(mgr && network); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) + print_network_node(mgr, netw_node_iter, i, TRUE); + + OSM_LOG_EXIT(mgr->p_log); +} + +static inline void print_ccdg_node(const osm_ucast_mgr_t * mgr, + const ccdg_t * ccdg, + const ccdg_node_t * ccdg_node, + const uint32_t i, + const boolean_t print_colors) +{ + uint8_t j = 0; + ccdg_node_t *adj_ccdg_node = NULL; + channel_t *channel_id = NULL; + ccdg_edge_t *ccdg_edge_iter = NULL; + + CL_ASSERT(mgr && ccdg && ccdg_node); + + channel_id = (channel_t *) & (ccdg_node->channel_id); + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, "ccdg[%" PRIu32 "] at %p:\n", i, + ccdg_node); + if (print_colors) { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " [local_lid, local_port, remote_lid, remote_port, color]" + " = [%" PRIu16 ", %" PRIu8 ", %" PRIu16 ", %" PRIu8 + ", %" PRIi32 "]\n", cl_ntoh16(channel_id->local_lid), + channel_id->local_port, + cl_ntoh16(channel_id->remote_lid), + channel_id->remote_port, get_ccdg_node_color(ccdg, + ccdg_node)); + } else { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " [local_lid, local_port, remote_lid, remote_port]" + " = [%" PRIu16 ", %" PRIu8 ", %" PRIu16 ", %" PRIu8 + "]\n", cl_ntoh16(channel_id->local_lid), + channel_id->local_port, + cl_ntoh16(channel_id->remote_lid), + channel_id->remote_port); + } + + for (j = 0, ccdg_edge_iter = ccdg_node->edges; j < ccdg_node->num_edges; + j++, ccdg_edge_iter++) { + adj_ccdg_node = ccdg_edge_iter->to_ccdg_node; + CL_ASSERT(adj_ccdg_node); + if (print_colors) { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " edge_towards[%" PRIu8 + "][local_lid, local_port, remote_lid, remote_port, edge_color]" + " = [%" PRIu16 ", %" PRIu8 ", %" PRIu16 ", %" + PRIu8 ", %" PRIi32 "]\n", j, + cl_ntoh16(adj_ccdg_node->channel_id.local_lid), + adj_ccdg_node->channel_id.local_port, + cl_ntoh16(adj_ccdg_node->channel_id.remote_lid), + adj_ccdg_node->channel_id.remote_port, + get_ccdg_edge_color(ccdg, ccdg_edge_iter)); + } else { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " edge_towards[%" PRIu8 + "][local_lid, local_port, remote_lid, remote_port]" + " = [%" PRIu16 ", %" PRIu8 ", %" PRIu16 ", %" + PRIu8 "]\n", j, + cl_ntoh16(adj_ccdg_node->channel_id.local_lid), + adj_ccdg_node->channel_id.local_port, + cl_ntoh16(adj_ccdg_node->channel_id.remote_lid), + adj_ccdg_node->channel_id.remote_port); + } + } +} + +static void print_ccdg(const osm_ucast_mgr_t * mgr, const ccdg_t * ccdg, + const boolean_t print_colors) +{ + uint32_t i = 0; + ccdg_node_t *ccdg_node_iter = NULL; + + CL_ASSERT(mgr && ccdg); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) + print_ccdg_node(mgr, ccdg, ccdg_node_iter, i, print_colors); + + OSM_LOG_EXIT(mgr->p_log); +} + +static void print_destination_distribution(const osm_ucast_mgr_t * mgr, + ib_net16_t ** destinations, + const uint16_t * num_dest) +{ + uint16_t i = 0; + uint8_t vl = 0; + ib_net16_t *dlid_iter = NULL; + osm_port_t *dest_port = NULL; + + CL_ASSERT(mgr && destinations && num_dest); + OSM_LOG_ENTER(mgr->p_log); + + for (vl = 0; vl < IB_MAX_NUM_VLS; vl++) { + if (!destinations[vl]) + continue; + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "destination lids (base lid) for vl %" PRIu8 ":\n", vl); + dlid_iter = (ib_net16_t *) destinations[vl]; + for (i = 0; i < num_dest[vl]; i++, dlid_iter++) { + dest_port = + osm_get_port_by_lid(mgr->p_subn, *dlid_iter); + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + " %" PRIu16 " (%s)\n", cl_ntoh16(*dlid_iter), + dest_port->p_node->print_desc); + } + } + + OSM_LOG_EXIT(mgr->p_log); +} + +static void print_spanning_tree(const osm_ucast_mgr_t * mgr, + const network_t * network) +{ + uint32_t i = 0; + network_node_t *local_node = NULL, *netw_node_iter = NULL; + network_link_t *link = NULL; + + CL_ASSERT(mgr && network); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + print_network_node(mgr, netw_node_iter, i, FALSE); + link = netw_node_iter->escape_path; + if (!link) { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " no link; is root of the spanning tree\n"); + continue; + } + local_node = + get_network_node_by_lid(network, link->link_info.local_lid); + CL_ASSERT(local_node); + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " link to get here: [name=%s, lid=%" PRIu16 ", port=%" + PRIu8 "]" " --> [name=%s, lid=%" PRIu16 ", port=%" PRIu8 + "]\n", local_node->sw->p_node->print_desc, + cl_ntoh16(link->link_info.local_lid), + link->link_info.local_port, + netw_node_iter->sw->p_node->print_desc, + cl_ntoh16(link->link_info.remote_lid), + link->link_info.remote_port); + } + + OSM_LOG_EXIT(mgr->p_log); +} + +static void print_routes(const osm_ucast_mgr_t * mgr, const network_t * network, + const osm_port_t * dest_port, const ib_net16_t dlid) +{ + network_node_t *curr_node = NULL, *netw_node_iter = NULL; + uint8_t rem_port = 0; + ib_net16_t r_lid = 0; + uint16_t i = 0; + + CL_ASSERT(mgr && network && dest_port && dlid > 0); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + curr_node = netw_node_iter; + + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Route from switch 0x%016" PRIx64 + " (%s) to destination 0x%016" PRIx64 " (%s):\n", + curr_node->guid, curr_node->sw->p_node->print_desc, + cl_ntoh64(osm_node_get_node_guid(dest_port->p_node)), + dest_port->p_node->print_desc); + + while (curr_node->used_link) { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " 0x%016" PRIx64 " (%s) routes thru port %" + PRIu8 "\n", curr_node->guid, + curr_node->sw->p_node->print_desc, + curr_node->used_link->link_info.remote_port); + + r_lid = + (ib_net16_t) curr_node->used_link->link_info. + local_lid; + curr_node = get_network_node_by_lid(network, r_lid); + CL_ASSERT(curr_node); + } + if (osm_node_get_type(dest_port->p_node) == IB_NODE_TYPE_CA) { + (void)osm_node_get_remote_node(dest_port->p_node, + dest_port->p_physp-> + port_num, &rem_port); + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + " 0x%016" PRIx64 " (%s) routes thru port %" + PRIu8 "\n", curr_node->guid, + curr_node->sw->p_node->print_desc, rem_port); + } + } +} + +static inline void print_channel_id(const osm_ucast_mgr_t * mgr, + const channel_t channel_id, + const boolean_t console) +{ + CL_ASSERT(mgr); + + if (console) { + printf("Channel Info [(LID,Port) -> (LID,Port)] = [(%" PRIu16 + ",%" PRIu8 ") -> (%" PRIu16 ",%" PRIu8 ")]\n", + channel_id.local_lid, channel_id.local_port, + channel_id.remote_lid, channel_id.remote_port); + } else { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "Channel Info [(LID,Port) -> (LID,Port)] = [(%" PRIu16 + ",%" PRIu8 ") -> (%" PRIu16 ",%" PRIu8 ")]\n", + channel_id.local_lid, channel_id.local_port, + channel_id.remote_lid, channel_id.remote_port); + } +} + +/********************************************************************** + **********************************************************************/ + +/* get the larges number of virtual lanes which is supported by all nodes + in the subnet, or use user supplied number (if it is smaller) +*/ +static uint8_t get_max_num_vls(const osm_ucast_mgr_t * mgr) +{ + uint32_t i = 0; + uint8_t vls_avail = 0xFF, port_vls_avail = 0; + cl_qmap_t *switch_tbl = NULL; + cl_map_item_t *item = NULL; + osm_switch_t *sw = NULL; + + CL_ASSERT(mgr); + OSM_LOG_ENTER(mgr->p_log); + + /* traverse all switches to get the number of available virtual lanes + in the subnet + */ + switch_tbl = &(mgr->p_subn->sw_guid_tbl); + for (item = cl_qmap_head(switch_tbl); item != cl_qmap_end(switch_tbl); + item = cl_qmap_next(item)) { + sw = (osm_switch_t *) item; + + /* include management port 0 only in case a TCA is attached + (this assumes that p_physp->p_remote_physp is only valid with + TCA attached and NULL otherwise); it is neccessary because + without TCA the port only shows VL0 in VLCap/OperVLs + */ + for (i = 0; i < osm_node_get_num_physp(sw->p_node); i++) { + osm_physp_t *p_physp = + osm_node_get_physp_ptr(sw->p_node, i); + + if (p_physp && p_physp->p_remote_physp) { + port_vls_avail = + ib_port_info_get_op_vls(&p_physp-> + port_info); + if (port_vls_avail + && port_vls_avail < vls_avail) + vls_avail = port_vls_avail; + } + } + } + + /* ib_port_info_get_op_vls gives values 1 ... 5 (s. IBAS 14.2.5.6) */ + vls_avail = 1 << (vls_avail - 1); + + /* set boundaries (s. IBAS 3.5.7) */ + if (vls_avail > 15) + vls_avail = 15; + if (vls_avail < 1) + vls_avail = 1; + + /* now check if the user requested a different maximum #VLs */ + if (mgr->p_subn->opt.nue_max_num_vls) { + if (mgr->p_subn->opt.nue_max_num_vls <= vls_avail) + vls_avail = mgr->p_subn->opt.nue_max_num_vls; + else + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "WRN NUE47: user requested maximum #VLs is larger than supported #VLs\n"); + } + + OSM_LOG_EXIT(mgr->p_log); + return vls_avail; +} + +static int create_context(nue_context_t * nue_ctx) +{ + uint16_t max_lid_ho = 0; + + CL_ASSERT(nue_ctx); + + /* and now initialize internals, i.e. network and ccdg */ + construct_network(&(nue_ctx->network)); + construct_ccdg(&(nue_ctx->ccdg)); + + /* we also need an array of all lids to distribute across VLs */ + CL_ASSERT(IB_MAX_NUM_VLS); + max_lid_ho = nue_ctx->mgr->p_subn->max_ucast_lid_ho; + memset(nue_ctx->num_destinations, 0, IB_MAX_NUM_VLS * sizeof(uint16_t)); + memset(nue_ctx->destinations, 0, IB_MAX_NUM_VLS * sizeof(ib_net16_t *)); + nue_ctx->num_destinations[0] = max_lid_ho; + nue_ctx->destinations[0] = + (ib_net16_t *) malloc(max_lid_ho * sizeof(ib_net16_t)); + if (!nue_ctx->destinations[0]) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE07: cannot allocate dlid array\n"); + destroy_context(nue_ctx); + return -1; + } + + /* and an array for the mapping of src/dest path to VL */ + nue_ctx->dlid_to_vl_mapping = + (uint8_t *) malloc(max_lid_ho * sizeof(uint8_t)); + if (!nue_ctx->dlid_to_vl_mapping) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE06: cannot allocate dlid_to_vl_mapping\n"); + destroy_context(nue_ctx); + return -1; + } + memset(nue_ctx->dlid_to_vl_mapping, OSM_DEFAULT_SL, + max_lid_ho * sizeof(uint8_t)); + + return 0; +} + +static nue_context_t *nue_create_context(const osm_opensm_t * osm, + const osm_routing_engine_type_t + routing_type) +{ + nue_context_t *nue_ctx = NULL; + int err = 0; + + CL_ASSERT(osm); + + /* allocate memory for nue context */ + nue_ctx = (nue_context_t *) malloc(sizeof(nue_context_t)); + if (nue_ctx) { + /* set initial values with stuff provided by caller */ + nue_ctx->routing_type = routing_type; + nue_ctx->mgr = (osm_ucast_mgr_t *) & (osm->sm.ucast_mgr); + err = create_context(nue_ctx); + if (err) { + free(nue_ctx); + return NULL; + } + } else { + OSM_LOG(osm->sm.ucast_mgr.p_log, OSM_LOG_ERROR, + "ERR NUE01: cannot allocate memory for nue_ctx\n"); + return NULL; + } + + return nue_ctx; +} + +/* count the total number of Hca/Tca (or LIDs for lmc>0) in the fabric + (even include base/enhanced switch port 0; base SP0 will have lmc=0); + and while we are already on it, we save the base lids for later +*/ +static uint64_t get_base_lids_and_number_of_lids(nue_context_t * nue_ctx) +{ + cl_qmap_t *port_tbl = NULL; + cl_map_item_t *item = NULL; + osm_port_t *port = NULL; + uint64_t total_num_destination_lids = 0; + uint8_t lmc = 0, max_lmc = 0, ntype = 0; + uint16_t total_num_base_lids = 0; + ib_net16_t *dlid_iter = NULL; + ib_net64_t port_guid = 0; + + CL_ASSERT(nue_ctx); + OSM_LOG_ENTER(nue_ctx->mgr->p_log); + + dlid_iter = (ib_net16_t *) nue_ctx->destinations[0]; + port_tbl = (cl_qmap_t *) & (nue_ctx->mgr->p_subn->port_guid_tbl); + for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl); + item = cl_qmap_next(item)) { + port = (osm_port_t *) item; + ntype = osm_node_get_type(port->p_node); + /* check if link is healthy, otherwise ignore CA */ + if (ntype == IB_NODE_TYPE_CA && port->p_physp && + !osm_link_is_healthy(port->p_physp)) { + port_guid = osm_node_get_node_guid(port->p_node); + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_INFO, + "WRN NUE44: ignoring CA 0x%016" PRIx64 + " due to unhealthy to/from adjacent switch\n", + cl_ntoh64(port_guid)); + } + if (ntype == IB_NODE_TYPE_CA || ntype == IB_NODE_TYPE_SWITCH) { + /* count num destinations to get initial link weight */ + lmc = osm_port_get_lmc(port); + total_num_destination_lids += (1 << lmc); + if (lmc > max_lmc) + max_lmc = lmc; + + /* and store the base lids */ + *dlid_iter++ = osm_port_get_base_lid(port); + total_num_base_lids++; + } + } + nue_ctx->num_destinations[0] = total_num_base_lids; + nue_ctx->max_lmc = max_lmc; + + /* we skip a realloc of nue_ctx->destinations[0] since it will be done + in the distribution function later anyways + */ + + OSM_LOG_EXIT(nue_ctx->mgr->p_log); + return total_num_destination_lids; +} + +static int build_complete_cdg(const osm_ucast_mgr_t * mgr, + const network_t * network, ccdg_t * ccdg, + const uint32_t total_num_sw_to_sw_links) +{ + uint64_t i = 0, j = 0, k = 0; + channel_t channel_id; + network_node_t *adj_netw_node = NULL, *netw_node_iter = NULL; + network_link_t *link = NULL, *netw_link_iter = NULL; + ccdg_node_t *ccdg_node_iter = NULL; + ccdg_edge_t *ccdg_edge = NULL, *ccdg_edges = NULL; + ccdg_edge_t *ccdg_edge_iter = NULL; + ib_net16_t l_lid = 0, r_lid = 0; + uint8_t l_port = 0, r_port = 0, num_edges = 0; + + CL_ASSERT(mgr && network && ccdg); + OSM_LOG_ENTER(mgr->p_log); + + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "Building complete channel dependency graph for nue routing\n"); + + /* we have two types of ccdg nodes, real channels and fake entries, + the fake entries are needed as source ccdg node for the routing + */ + ccdg->num_nodes = total_num_sw_to_sw_links + network->num_nodes; + ccdg->nodes = + (ccdg_node_t *) malloc(ccdg->num_nodes * sizeof(ccdg_node_t)); + if (!ccdg->nodes) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE05: can't allocate memory for ccdg nodes\n"); + return -1; + } + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) + construct_ccdg_node(ccdg_node_iter); + + ccdg_node_iter = (ccdg_node_t *) ccdg->nodes; + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + /* first we add the fake channel */ + channel_id.local_lid = netw_node_iter->lid; + channel_id.local_port = 0; + channel_id.remote_lid = netw_node_iter->lid; + channel_id.remote_port = 0; + + /* the fake channel connects to all real channels of + this node + */ + num_edges = netw_node_iter->num_links; + ccdg_edges = + (ccdg_edge_t *) malloc(num_edges * sizeof(ccdg_edge_t)); + if (num_edges && !ccdg_edges) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE35: cannot allocate memory for" + " ccdg edges of fake channel\n"); + return -1; + } + for (k = 0, ccdg_edge_iter = ccdg_edges; k < num_edges; + k++, ccdg_edge_iter++) + construct_ccdg_edge(ccdg_edge_iter); + + /* init ccdg edges for this fake ccdg node */ + for (k = 0, ccdg_edge_iter = ccdg_edges; k < num_edges; + k++, ccdg_edge_iter++) { + link = (network_link_t *) & (netw_node_iter->links[k]); + init_ccdg_edge(ccdg_edge_iter, link->link_info); + } + + init_ccdg_node(ccdg_node_iter++, channel_id, num_edges, + ccdg_edges, NULL); + + /* and afterwards the real channels */ + for (j = 0, netw_link_iter = netw_node_iter->links; + j < netw_node_iter->num_links; + j++, netw_link_iter++, ccdg_node_iter++) { + channel_id = netw_link_iter->link_info; + l_lid = channel_id.local_lid; + l_port = channel_id.local_port; + adj_netw_node = netw_link_iter->to_network_node; + CL_ASSERT(adj_netw_node && adj_netw_node->num_links); + + /* we can ignore reverse path, so it is #links - 1 */ + num_edges = adj_netw_node->num_links - 1; + ccdg_edges = + (ccdg_edge_t *) malloc(num_edges * + sizeof(ccdg_edge_t)); + if (!ccdg_edges) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE08: can't allocate memory for" + " ccdg edges\n"); + return -1; + } + for (k = 0, ccdg_edge_iter = ccdg_edges; k < num_edges; + k++, ccdg_edge_iter++) + construct_ccdg_edge(ccdg_edge_iter); + + /* init ccdg edges for this ccdg node */ + for (k = 0, ccdg_edge_iter = ccdg_edges; + k < adj_netw_node->num_links; k++) { + /* filter the reverse path */ + link = + (network_link_t *) & (adj_netw_node-> + links[k]); + r_lid = link->link_info.remote_lid; + r_port = link->link_info.remote_port; + /* theoretically, we could ignore every reverse + path (for multigraphs), not only the one with + the same port => room for future optimization + */ + if (l_lid == r_lid && l_port == r_port) + continue; + + init_ccdg_edge(ccdg_edge_iter++, + link->link_info); + } + + init_ccdg_node(ccdg_node_iter, channel_id, num_edges, + ccdg_edges, netw_link_iter); + } + } + + /* sort the node array to find individual nodes easier with bsearch */ + sort_ccdg_nodes_by_channel_id(ccdg); + + /* now we need to add the last piece of information to the ccdg edge + and connect the ccdg_nodes and corresponding network links + */ + ccdg_node_iter = (ccdg_node_t *) ccdg->nodes; + for (i = 0; i < ccdg->num_nodes; i++, ccdg_node_iter++) { + for (j = 0; j < ccdg_node_iter->num_edges; j++) { + ccdg_edge = + (ccdg_edge_t *) & (ccdg_node_iter->edges[j]); + channel_id = ccdg_edge->to_channel_id; + ccdg_edge->to_ccdg_node = + get_ccdg_node_by_channel_id(ccdg, channel_id); + CL_ASSERT(ccdg_edge->to_ccdg_node); + } + + if (ccdg_node_iter->corresponding_netw_link) + ccdg_node_iter->corresponding_netw_link-> + corresponding_ccdg_node = ccdg_node_iter; + else { + /* make sure it's a fake channel otherwise */ + CL_ASSERT(ccdg_node_iter->channel_id.local_lid == + ccdg_node_iter->channel_id.remote_lid + && (0 == + (ccdg_node_iter->channel_id. + local_port | ccdg_node_iter->channel_id. + remote_port))); + } + } + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +/* traverse subnet to gather information about the connected switches */ +static int nue_discover_network(void *context) +{ + nue_context_t *nue_ctx = (nue_context_t *) context; + osm_ucast_mgr_t *mgr = NULL; + cl_qmap_t *switch_tbl = NULL; + cl_map_item_t *item = NULL; + osm_switch_t *sw = NULL; + osm_physp_t *physp_ptr = NULL; + network_t *network = NULL; + uint64_t i = 0, j = 0; + uint64_t total_num_destination_lids = 0, total_num_switches = 0; + uint32_t total_num_sw_to_sw_links = 0; + uint64_t init_weight = 0; + ib_net16_t lid = 0, r_lid = 0; + ib_net64_t guid = 0; + uint8_t lmc = 0; + uint8_t num_base_terminals = 0, num_terminals = 0; + uint8_t num_sw_to_sw_links = 0; + network_node_t *netw_node_iter = NULL; + network_link_t *link = NULL, *links = NULL, *realloc_links = NULL; + network_link_t *netw_link_iter = NULL; + osm_node_t *r_node = NULL; + uint8_t port = 0, r_port = 0; + boolean_t has_fdr10 = FALSE; + int err = 0; + + if (nue_ctx) + mgr = (osm_ucast_mgr_t *) nue_ctx->mgr; + else + return -1; + has_fdr10 = (1 == mgr->p_subn->opt.fdr10) ? TRUE : FALSE; + + OSM_LOG_ENTER(mgr->p_log); + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "Building network graph for nue routing\n"); + + /* if this pointer isn't NULL, this is a reroute step; + old context will be destroyed and we set up a new/clean context + */ + if (nue_ctx->network.nodes) { + destroy_context(nue_ctx); + create_context(nue_ctx); + } + + /* acquire basic information about the network */ + nue_ctx->max_vl = get_max_num_vls(mgr); + if (nue_ctx->max_vl != 1 && !(mgr->p_subn->opt.qos)) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "WRN NUE48: Nue routing with nue_max_num_vls == %" PRIu8 + " should enable QoS for valid SL2VL mapping, " + " using nue_max_num_vls 1\n", + nue_ctx->max_vl); + nue_ctx->max_vl = 1; + } + total_num_destination_lids = get_base_lids_and_number_of_lids(nue_ctx); + init_weight = total_num_destination_lids * total_num_destination_lids; + + switch_tbl = &(mgr->p_subn->sw_guid_tbl); + total_num_switches = cl_qmap_count(switch_tbl); + + network = (network_t *) & (nue_ctx->network); + network->num_nodes = total_num_switches; + network->nodes = + (network_node_t *) malloc(total_num_switches * + sizeof(network_node_t)); + if (!network->nodes) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE02: can't allocate memory for network nodes\n"); + destroy_context(context); + return -1; + } + for (i = 0, netw_node_iter = network->nodes; i < total_num_switches; + i++, netw_node_iter++) + construct_network_node(netw_node_iter); + + netw_node_iter = (network_node_t *) network->nodes; + for (item = cl_qmap_head(switch_tbl); item != cl_qmap_end(switch_tbl); + item = cl_qmap_next(item)) { + sw = (osm_switch_t *) item; + guid = osm_node_get_node_guid(sw->p_node); + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Processing switch with GUID 0x%016" PRIx64 "\n", + cl_ntoh64(guid)); + + lid = osm_node_get_base_lid(sw->p_node, 0); + num_base_terminals = 0; + + /* add SP0 to number of CA conneted to a switch */ + lmc = osm_node_get_lmc(sw->p_node, 0); + num_terminals = (1 << lmc); + + /* we start with the maximum and resize the link array later */ + links = + (network_link_t *) malloc(sw->num_ports * + sizeof(network_link_t)); + if (!links) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE03: cannot allocate memory for link\n"); + destroy_context(context); + return -1; + } + for (i = 0, netw_link_iter = links; i < sw->num_ports; + i++, netw_link_iter++) + construct_network_link(netw_link_iter); + + /* iterate over all ports (including management port 0) */ + for (port = 0, i = 0; port < sw->num_ports; port++) { + /* get the remote node behind this port */ + r_node = + osm_node_get_remote_node(sw->p_node, port, &r_port); + /* if there is no remote node on this port or it is + the same switch, then try next port + */ + if (!r_node || r_node->sw == sw) + continue; + /* make sure the link is healthy */ + physp_ptr = osm_node_get_physp_ptr(sw->p_node, port); + if (!physp_ptr || !osm_link_is_healthy(physp_ptr)) + continue; + /* if there is a Hca connected, then count and cycle */ + if (!r_node->sw) { + num_base_terminals++; + lmc = + osm_node_get_lmc(r_node, (uint32_t) r_port); + num_terminals += (1 << lmc); + continue; + } + /* filter out throttled links to improve performance */ + if (mgr->p_subn->opt.avoid_throttled_links && + osm_link_is_throttled(physp_ptr, has_fdr10)) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "Detected and ignoring throttled link:" + " 0x%016" PRIx64 "/P%" PRIu8 + " <--> 0x%016" PRIx64 "/P%" PRIu8 "\n", + cl_ntoh64(osm_node_get_node_guid(sw->p_node)), + port, + cl_ntoh64(osm_node_get_node_guid(r_node)), + r_port); + continue; + } + /* initialize link with all we know right now */ + r_lid = osm_node_get_base_lid(r_node, 0); + init_network_link(&(links[i++]), lid, port, r_lid, + r_port, init_weight); + } + num_sw_to_sw_links = (uint8_t) i; + total_num_sw_to_sw_links += (uint32_t) num_sw_to_sw_links; + + /* we don't increase in size, so omit check of the return val */ + if (num_sw_to_sw_links < sw->num_ports) { + realloc_links = + realloc(links, + num_sw_to_sw_links * + sizeof(network_link_t)); + + if (num_sw_to_sw_links && !realloc_links) + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "WRN NUE42: cannot resize memory for links\n"); + else if (!num_sw_to_sw_links) + links = NULL; + else + links = realloc_links; + } + + /* initialize everything for the internal node representation */ + err = init_network_node(netw_node_iter++, lid, guid, + num_base_terminals, num_terminals, + num_sw_to_sw_links, links, sw); + if (err) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "ERR NUE46: cannot allocate memory for stack_used_links\n"); + destroy_context(context); + return -1; + } + } + + /* sort the node array to find individual nodes easier with bsearch */ + sort_network_nodes_by_lid(network); + + /* now we need to add the last piece of information to the links */ + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + for (j = 0, netw_link_iter = netw_node_iter->links; + j < netw_node_iter->num_links; j++, netw_link_iter++) { + link = netw_link_iter; + lid = link->link_info.remote_lid; + link->to_network_node = + get_network_node_by_lid(network, lid); + CL_ASSERT(link->to_network_node); + } + } + + /* print the discovered network graph */ + if (OSM_LOG_IS_ACTIVE_V2(mgr->p_log, OSM_LOG_DEBUG)) + print_network(mgr, network); + + err = + build_complete_cdg(mgr, &(nue_ctx->network), &(nue_ctx->ccdg), + total_num_sw_to_sw_links); + if (err) { + destroy_context(context); + return -1; + } + + /* print the constructed complete channel dependency graph */ + if (OSM_LOG_IS_ACTIVE_V2(mgr->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Complete channel dependency graph of the discovered network\n"); + print_ccdg(mgr, &(nue_ctx->ccdg), FALSE); + } + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +#if defined (ENABLE_METIS_FOR_NUE) +static int distribute_lids_with_metis(nue_context_t * nue_ctx, + const boolean_t include_sw) +{ + osm_switch_t *sw = NULL; + osm_node_t *r_node = NULL; + osm_port_t *port = NULL; + osm_physp_t *physp_ptr = NULL; + uint8_t ntype = 0; + ib_net16_t *desti_arr = NULL, *dlid_iter = NULL; + ib_net16_t *dlid_arr_iter[IB_MAX_NUM_VLS]; + ib_net16_t r_lid = 0; + uint16_t i = 0, num_desti = 0; + uint8_t num_adj = 0, l_port = 0, r_port = 0; + uint32_t total_num_adjnc = 0; + network_t *network = NULL; + network_node_t *netw_node_iter = NULL; + metis_context_t metis_ctx; + idx_t *xadj_iter = NULL, *adjncy_iter = NULL; + uint8_t partition = 0; + int ret = METIS_OK; + + CL_ASSERT(nue_ctx); + + desti_arr = (ib_net16_t *) nue_ctx->destinations[0]; + num_desti = nue_ctx->num_destinations[0]; + + construct_metis_context(&metis_ctx); + init_metis_context(&metis_ctx, (idx_t) num_desti, + (idx_t) nue_ctx->max_vl, (idx_t) - 1, (idx_t) 0); + + /* theoretically, sorting this array might be not the best idea for + later iterations over the destinations for each routing step with + Dijkstra's since we might lose temporal locality of HCAs; at least + for dfsssp processing all CAs at the same switch before jumping to + the next sw yields better results => room for future optimizations + */ + sort_destinations_by_lid(desti_arr, (uint32_t) num_desti); + + /* count the number links (sw<->sw and ca<->sw) in the subnet */ + network = (network_t *) & (nue_ctx->network); + for (i = 0, netw_node_iter = network->nodes, total_num_adjnc = 0; + i < network->num_nodes; i++, netw_node_iter++) + total_num_adjnc += + (netw_node_iter->num_base_terminals + + netw_node_iter->num_links); + + metis_ctx.xadj = + (idx_t *) malloc((*(metis_ctx.nvtxs) + 1) * sizeof(idx_t)); + if (!metis_ctx.xadj) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE10: can't allocate memory for xadj\n"); + destroy_metis_context(&metis_ctx); + return -1; + } + metis_ctx.xadj[0] = 0; + + metis_ctx.adjncy = + (idx_t *) malloc(2 * total_num_adjnc * sizeof(idx_t)); + if (total_num_adjnc && !metis_ctx.adjncy) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE11: can't allocate memory for adjncy\n"); + destroy_metis_context(&metis_ctx); + return -1; + } + + metis_ctx.part = (idx_t *) malloc(*(metis_ctx.nvtxs) * sizeof(idx_t)); + if (!metis_ctx.part) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE12: can't allocate memory for partition\n"); + destroy_metis_context(&metis_ctx); + return -1; + } + + /* fill up the xadj and adjncy arrays */ + dlid_iter = (ib_net16_t *) desti_arr; + xadj_iter = (idx_t *) metis_ctx.xadj; + adjncy_iter = (idx_t *) metis_ctx.adjncy; + for (i = 0; i < nue_ctx->num_destinations[0]; + i++, dlid_iter++, xadj_iter++) { + port = osm_get_port_by_lid(nue_ctx->mgr->p_subn, *dlid_iter); + ntype = osm_node_get_type(port->p_node); + /* if base dlid is a CA then adjncy is only a switch */ + if (ntype == IB_NODE_TYPE_CA) { + r_node = + osm_node_get_remote_node(port->p_node, + port->p_physp->port_num, + &r_port); + if (!r_node + || osm_node_get_type(r_node) != + IB_NODE_TYPE_SWITCH) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE13: found CA attached to" + " something other than a switch; nue" + " cannot handle this case\n"); + destroy_metis_context(&metis_ctx); + return -1; + } + r_lid = osm_node_get_base_lid(r_node, 0); + + xadj_iter[1] = xadj_iter[0] + 1; + *adjncy_iter++ = + get_lid(desti_arr, num_desti, r_lid) - desti_arr; + } /* otherwise we have to check a bunch of ports */ + else if (ntype == IB_NODE_TYPE_SWITCH) { + sw = port->p_node->sw; + num_adj = 0; + for (l_port = 0; l_port < sw->num_ports; l_port++) { + /* get the remote node behind this port */ + r_node = + osm_node_get_remote_node(sw->p_node, l_port, + &r_port); + /* if there is no remote node on this port + or it is the same switch, then try next port + */ + if (!r_node || r_node->sw == sw) + continue; + /* make sure the link is healthy */ + physp_ptr = + osm_node_get_physp_ptr(sw->p_node, l_port); + if (!physp_ptr + || !osm_link_is_healthy(physp_ptr)) + continue; + + ntype = osm_node_get_type(r_node); + if (ntype == IB_NODE_TYPE_CA) + r_lid = + osm_node_get_base_lid(r_node, + r_port); + else if (ntype == IB_NODE_TYPE_SWITCH) + r_lid = + osm_node_get_base_lid(r_node, 0); + *adjncy_iter++ = + get_lid(desti_arr, num_desti, + r_lid) - desti_arr; + num_adj++; + } + xadj_iter[1] = xadj_iter[0] + num_adj; + } + } + + /* metis doesnt like nparts == 1 so we fake it if needed */ + if (*(metis_ctx.nparts) == 1) + memset(metis_ctx.part, 0, *(metis_ctx.nvtxs) * sizeof(idx_t)); + else + ret = + METIS_PartGraphKway(metis_ctx.nvtxs, metis_ctx.ncon, + metis_ctx.xadj, metis_ctx.adjncy, NULL, + NULL, NULL, metis_ctx.nparts, NULL, + NULL, metis_ctx.options, + metis_ctx.objval, metis_ctx.part); + if (ret != METIS_OK) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE20: metis partitioning failed (ret=%d)\n", ret); + destroy_metis_context(&metis_ctx); + return -1; + } + + memset(nue_ctx->num_destinations, 0, IB_MAX_NUM_VLS * sizeof(uint16_t)); + for (i = 0; i < *(metis_ctx.nvtxs); i++) + nue_ctx->num_destinations[metis_ctx.part[i]]++; + + for (i = 0; i < *(metis_ctx.nparts); i++) { + nue_ctx->destinations[i] = + (ib_net16_t *) malloc(nue_ctx->num_destinations[i] * + sizeof(ib_net16_t)); + if (nue_ctx->num_destinations[i] && !nue_ctx->destinations[i]) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE14: cannot allocate dlid array\n"); + destroy_metis_context(&metis_ctx); + free(desti_arr); + return -1; + } + } + + memset(nue_ctx->num_destinations, 0, IB_MAX_NUM_VLS * sizeof(uint16_t)); + memcpy(dlid_arr_iter, nue_ctx->destinations, + IB_MAX_NUM_VLS * sizeof(ib_net16_t *)); + for (i = 0; i < *(metis_ctx.nvtxs); i++) { + if (!include_sw) { + port = + osm_get_port_by_lid(nue_ctx->mgr->p_subn, + desti_arr[i]); + ntype = osm_node_get_type(port->p_node); + if (ntype == IB_NODE_TYPE_SWITCH) + continue; + } + partition = (uint8_t) metis_ctx.part[i]; + *dlid_arr_iter[partition] = desti_arr[i]; + dlid_arr_iter[partition]++; + nue_ctx->num_destinations[partition]++; + } + + destroy_metis_context(&metis_ctx); + free(desti_arr); + return 0; +} +#else +static int distribute_lids_semi_randomly(nue_context_t * nue_ctx, + const boolean_t include_sw) +{ + uint8_t vl = 0; + uint16_t num_dest = 0, temp_sum = 0, i = 0, max_num_desti_per_layer = 0; + ib_net16_t *all_dest = NULL, *dlid_iter = NULL, *partition = NULL; + osm_port_t *dest_port = NULL; + uint8_t ntype = 0; + + CL_ASSERT(nue_ctx && nue_ctx->destinations[0]); + + all_dest = nue_ctx->destinations[0]; + num_dest = nue_ctx->num_destinations[0]; + + for (vl = nue_ctx->max_vl - 1; vl > 0; vl--) { + nue_ctx->num_destinations[vl] = num_dest / nue_ctx->max_vl; + temp_sum += nue_ctx->num_destinations[vl]; + if (max_num_desti_per_layer < nue_ctx->num_destinations[vl]) + max_num_desti_per_layer = nue_ctx->num_destinations[vl]; + } + nue_ctx->num_destinations[0] = num_dest - temp_sum; + if (max_num_desti_per_layer < nue_ctx->num_destinations[0]) + max_num_desti_per_layer = nue_ctx->num_destinations[0]; + + for (vl = 0; vl < nue_ctx->max_vl; vl++) { + nue_ctx->destinations[vl] = + (ib_net16_t *) malloc(max_num_desti_per_layer * + sizeof(ib_net16_t)); + if (!nue_ctx->destinations[vl]) { + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_ERROR, + "ERR NUE09: cannot allocate memory for" + " destinations[%" PRIu8 "]\n", vl); + return -1; + } + } + + memset(nue_ctx->num_destinations, 0, IB_MAX_NUM_VLS * sizeof(uint16_t)); + dlid_iter = (ib_net16_t *) all_dest; + for (i = 0, vl = 0; i < num_dest; i++, dlid_iter++) { + if (!include_sw) { + dest_port = + osm_get_port_by_lid(nue_ctx->mgr->p_subn, + *dlid_iter); + ntype = osm_node_get_type(dest_port->p_node); + if (ntype == IB_NODE_TYPE_SWITCH) + continue; + } + partition = (ib_net16_t *) nue_ctx->destinations[vl]; + partition[nue_ctx->num_destinations[vl]] = *dlid_iter; + nue_ctx->num_destinations[vl]++; + vl = (vl + 1) % nue_ctx->max_vl; + } + free(all_dest); + + return 0; +} +#endif + +static inline int distribute_lids_onto_virtual_layers(nue_context_t * nue_ctx, + const boolean_t + include_sw) +{ + CL_ASSERT(nue_ctx); + OSM_LOG(nue_ctx->mgr->p_log, OSM_LOG_INFO, + "Distributing destination lids onto available VLs\n"); + +#if defined (ENABLE_METIS_FOR_NUE) + return distribute_lids_with_metis(nue_ctx, include_sw); +#else + return distribute_lids_semi_randomly(nue_ctx, include_sw); +#endif +} + +/* returns the input lid if it belongs to a switch or the lid of the adjacent + switch otherwise + */ +static inline ib_net16_t get_switch_lid(const osm_ucast_mgr_t * mgr, + const ib_net16_t lid) +{ + osm_node_t *o_rem_node = NULL; + osm_port_t *o_port = NULL; + uint8_t rem_port = 0; + ib_net16_t switch_lid = 0; + + CL_ASSERT(mgr && lid > 0); + + switch_lid = lid; + o_port = osm_get_port_by_lid(mgr->p_subn, lid); + CL_ASSERT(o_port); + if (osm_node_get_type(o_port->p_node) == IB_NODE_TYPE_CA) { + o_rem_node = + osm_node_get_remote_node(o_port->p_node, + o_port->p_physp->port_num, + &rem_port); + CL_ASSERT(o_rem_node); + switch_lid = osm_node_get_base_lid(o_rem_node, 0); + } + return switch_lid; +} + +/* the function determines the convex hull of a subset of nodes of the network, + this convex hull is the enclosure of all shortest paths between these nodes, + therefore we calculate a spanning tree from each node and which is traversed + in the opposite direction to collect all nodes along the shortest paths + */ +static int calculate_convex_subnetwork(const osm_ucast_mgr_t * mgr, + const network_t * network, + ib_net16_t * destinations, + const uint16_t num_destinations) +{ + network_node_t *netw_node_iter1 = NULL, *netw_node_iter2 = NULL; + network_node_t *network_node1 = NULL, *network_node2 = NULL; + network_node_t *nodeU = NULL, *nodeV = NULL; + network_node_t **fifoQ = NULL, **fifoQ_head = NULL, **fifoQ_tail = NULL; + network_link_t *netw_link_iter = NULL; + ib_net16_t *desti_iter = NULL; + ib_net16_t dlid = 0; + uint16_t i = 0, j = 0, k = 0; + + CL_ASSERT(mgr && network && destinations && num_destinations > 0); + OSM_LOG_ENTER(mgr->p_log); + + fifoQ = + (network_node_t **) calloc(network->num_nodes, + sizeof(network_node_t *)); + if (!fifoQ) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE21: cannot allocate memory for the fifo queue\n"); + return -1; + } + + for (i = 0, netw_node_iter1 = network->nodes; i < network->num_nodes; + i++, netw_node_iter1++) { + netw_node_iter1->in_convex_hull = FALSE; + netw_node_iter1->has_adj_destinations = FALSE; + } + + /* switches adjacent to terminals in the desti_array are definitely + in the convex hull as well + */ + for (i = 0, desti_iter = destinations; i < num_destinations; + i++, desti_iter++) { + dlid = get_switch_lid(mgr, *desti_iter); + network_node1 = get_network_node_by_lid(network, dlid); + CL_ASSERT(network_node1); + network_node1->in_convex_hull = TRUE; + network_node1->has_adj_destinations = TRUE; + } + + for (i = 0, netw_node_iter1 = network->nodes; i < network->num_nodes; + i++, netw_node_iter1++) { + network_node1 = netw_node_iter1; + + if (!network_node1->in_convex_hull) + continue; + + for (j = 0, netw_node_iter2 = network->nodes; + j < network->num_nodes; j++, netw_node_iter2++) { + netw_node_iter2->distance = INFINITY; + netw_node_iter2->processed = FALSE; + } + + network_node1->distance = 0; + network_node1->processed = TRUE; + + fifoQ_head = fifoQ_tail = fifoQ; + *fifoQ_tail++ = network_node1; + + while (fifoQ_tail - fifoQ_head > 0) { + nodeU = *fifoQ_head++; + for (j = 0, netw_link_iter = nodeU->links; + j < nodeU->num_links; j++, netw_link_iter++) { + nodeV = netw_link_iter->to_network_node; + if (nodeV->distance == INFINITY) { + nodeV->distance = nodeU->distance + 1; + *fifoQ_tail++ = nodeV; + } + } + } + + for (j = 0, netw_node_iter2 = network->nodes; + j < network->num_nodes; j++, netw_node_iter2++) { + network_node2 = netw_node_iter2; + + if (!network_node2->in_convex_hull + || network_node2->processed) + continue; + + network_node2->processed = TRUE; + + fifoQ_head = fifoQ_tail = fifoQ; + *fifoQ_tail++ = network_node2; + + while (fifoQ_tail - fifoQ_head > 0) { + nodeV = *fifoQ_head++; + for (k = 0, netw_link_iter = nodeV->links; + k < nodeV->num_links; + k++, netw_link_iter++) { + nodeU = netw_link_iter->to_network_node; + if (nodeU->processed) + continue; + if (nodeU->distance + 1 == + nodeV->distance) { + nodeU->in_convex_hull = TRUE; + nodeU->processed = TRUE; + *fifoQ_tail++ = nodeU; + } + } + } + } + } + + free(fifoQ); + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +static void determine_num_adj_terminals_in_convex_hull(const osm_ucast_mgr_t * + mgr, + const network_t * + network, + ib_net16_t * + destinations, + const uint16_t + num_destinations) +{ + ib_net16_t dlid = 0, *desti_iter = NULL; + network_node_t *network_node = NULL; + uint16_t i = 0; + + CL_ASSERT(mgr && network && destinations && num_destinations > 0); + + for (i = 0, desti_iter = destinations; i < num_destinations; + i++, desti_iter++) { + dlid = get_switch_lid(mgr, *desti_iter); + if (dlid != *desti_iter) { + network_node = get_network_node_by_lid(network, dlid); + CL_ASSERT(network_node && network_node->in_convex_hull); + network_node->num_adj_terminals_in_convex_hull += 1; + } + } +} + +static void reset_delta_for_betw_centrality(const network_t * network) +{ + network_node_t *netw_node_iter = NULL; + uint16_t i = 0; + + CL_ASSERT(network); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) + netw_node_iter->delta = 0.0; +} + +static void reset_sigma_distance_Ps_for_betw_centrality(const network_t * + network) +{ + network_node_t *netw_node_iter = NULL; + uint16_t i = 0; + + CL_ASSERT(network); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + netw_node_iter->num_elem_in_Ps = 0; + netw_node_iter->sigma = 0; + netw_node_iter->distance = INFINITY; + } +} + +/* the function implements a slightly modified version of Brandes' algorithms + for betweenness centrality; we calculate this value only for switches, since + terminals shouldn't be the most central node w.r.t. a convex hull anyways + */ +static int get_central_node_wrt_subnetwork(const osm_ucast_mgr_t * mgr, + const network_t * network, + ib_net16_t * destinations, + const uint16_t num_destinations, + network_node_t ** central_node, + uint16_t * central_node_index) +{ + network_node_t *netw_node_iter = NULL; + network_node_t *network_node = NULL, *nodeU = NULL, *nodeV = NULL; + network_node_t **fifoQ = NULL, **fifoQ_head = NULL, **fifoQ_tail = NULL; + network_node_t **lifoQ = NULL, **lifoQ_head = + NULL, **restore_lifoQ_head = NULL; + network_link_t *netw_link_iter = NULL; + double max_betw_centrality = -1.0; + uint16_t i = 0, j = 0, k = 0, update_for_adj = 0; + + CL_ASSERT(mgr && network); + OSM_LOG_ENTER(mgr->p_log); + + fifoQ = + (network_node_t **) calloc(network->num_nodes, + sizeof(network_node_t *)); + if (!fifoQ) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE22: cannot allocate memory for the fifo queue\n"); + return -1; + } + lifoQ = + (network_node_t **) calloc(network->num_nodes, + sizeof(network_node_t *)); + if (!lifoQ) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE23: cannot allocate memory for the lifo queue\n"); + free(fifoQ); + return -1; + } + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + netw_node_iter->betw_centrality = 0.0; + + netw_node_iter->num_adj_terminals_in_convex_hull = 0; + netw_node_iter->num_elem_in_Ps = 0; + + /* Ps holds a list of all shortest paths thru this node */ + if (netw_node_iter->Ps) { + memset(netw_node_iter->Ps, 0, + netw_node_iter->num_links * + sizeof(network_node_t *)); + } else { + netw_node_iter->Ps = + (network_node_t **) calloc(netw_node_iter-> + num_links, + sizeof(network_node_t + *)); + if (netw_node_iter->num_links && !netw_node_iter->Ps) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE24: cannot allocate memory for Ps array\n"); + while (netw_node_iter != network->nodes) { + netw_node_iter--; + free(netw_node_iter->Ps); + } + free(fifoQ); + free(lifoQ); + return -1; + } + } + } + + determine_num_adj_terminals_in_convex_hull(mgr, network, destinations, + num_destinations); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + network_node = netw_node_iter; + if (!network_node->in_convex_hull) + continue; + + reset_sigma_distance_Ps_for_betw_centrality(network); + + network_node->sigma = 1; + network_node->distance = 0; + + lifoQ_head = lifoQ; + fifoQ_head = fifoQ_tail = fifoQ; + *fifoQ_tail++ = network_node; + + while (fifoQ_tail - fifoQ_head > 0) { + nodeU = *fifoQ_head++; + *lifoQ_head++ = nodeU; + + for (k = 0, netw_link_iter = nodeU->links; + k < nodeU->num_links; k++, netw_link_iter++) { + nodeV = netw_link_iter->to_network_node; + if (!nodeV->in_convex_hull) + continue; + if (nodeV->distance == INFINITY) { + nodeV->distance = nodeU->distance + 1; + *fifoQ_tail++ = nodeV; + } + if (nodeV->distance == nodeU->distance + 1) { + nodeV->sigma += nodeU->sigma; + /* if it crashes here then nodeU gets + added multiple times => we have to + redesign Ps + */ + CL_ASSERT(nodeV->num_elem_in_Ps < + nodeV->num_links); + nodeV->Ps[nodeV->num_elem_in_Ps++] = + nodeU; + } + } + } + + /* since we don't have the terminals stored we have to execute + the following loop multipe times (1x for the switch and + 1x for each terminal which is in the convex hull) + */ + restore_lifoQ_head = lifoQ_head; + for (j = 0; + j < network_node->num_adj_terminals_in_convex_hull + 1; + j++) { + reset_delta_for_betw_centrality(network); + + lifoQ_head = restore_lifoQ_head; + while (lifoQ_head - lifoQ > 0) { + nodeV = *(--lifoQ_head); + + if (network_node != nodeV) + update_for_adj = + nodeV-> + num_adj_terminals_in_convex_hull; + else if (j == 0) + update_for_adj = + nodeV-> + num_adj_terminals_in_convex_hull; + else if (nodeV-> + num_adj_terminals_in_convex_hull > 0) + update_for_adj = + nodeV-> + num_adj_terminals_in_convex_hull - + 1; + + /* following not part of original Brandes' algo + but needed because we don't have terminals: + - delta(terminal)=0.0 => omit last term; + - sigma(terminal) is always sigma(adjSW) + */ + for (k = 0; k < update_for_adj; k++) + nodeV->delta += + (1.0 * nodeV->sigma) / nodeV->sigma; + + for (k = 0; k < nodeV->num_elem_in_Ps; k++) { + nodeU = nodeV->Ps[k]; + nodeU->delta += + (1.0 * nodeU->sigma) / + nodeV->sigma * (1 + nodeV->delta); + } + + /* if j>0 then we simulate an terminal => means + we have to update the betw_centrality of + it's adjacent switch + */ + if (j > 0 || network_node != nodeV) + nodeV->betw_centrality += nodeV->delta; + } + } + } + + *central_node = NULL; + *central_node_index = 0; + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + if (!netw_node_iter->in_convex_hull) + continue; + + if (max_betw_centrality < netw_node_iter->betw_centrality) { + *central_node = netw_node_iter; + *central_node_index = i; + max_betw_centrality = netw_node_iter->betw_centrality; + } + } + + free(fifoQ); + free(lifoQ); + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +/* callback function for the cl_heap to update the heap index */ +static void update_netw_heap_index(const void *context, const size_t new_index) +{ + network_node_t *heap_elem = (network_node_t *) context; + if (heap_elem) + heap_elem->heap_index = new_index; +} + +/* trivial spanning tree calculation for the network (similar to Dijkstra's + algorithm) which includes the link weights, too, so that we don't end up + with similar escape paths for each virtual layer + */ +static int calculate_spanning_tree_in_network(const osm_ucast_mgr_t * mgr, + network_t * network, + network_node_t * root_node) +{ + network_node_t *curr_node = NULL, *adj_node = NULL, *netw_node_iter = + NULL; + network_link_t *curr_link = NULL, *netw_link_iter = NULL; + cl_status_t ret = CL_SUCCESS; + uint64_t new_distance = 0; + uint16_t i = 0; + + CL_ASSERT(mgr && network && root_node); + OSM_LOG_ENTER(mgr->p_log); + + /* build an 4-ary heap to find the node with minimum distance */ + if (!cl_is_heap_inited(&network->heap)) + ret = + cl_heap_init(&network->heap, (size_t) network->num_nodes, 4, + &update_netw_heap_index, NULL); + else + ret = + cl_heap_resize(&network->heap, (size_t) network->num_nodes); + if (CL_SUCCESS != ret) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE15: cannot allocate memory or resize heap\n"); + return -1; + } + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + netw_node_iter->distance = INFINITY; + netw_node_iter->escape_path = NULL; + + ret = cl_heap_insert(&network->heap, INFINITY, netw_node_iter); + if (CL_SUCCESS != ret) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE16: cl_heap_insert failed\n"); + return -1; + } + } + + /* we use the root_node as source in Dijkstra's algorithm to caluclate + a spanning tree for the network + */ + root_node->distance = 0; + ret = + cl_heap_modify_key(&network->heap, root_node->distance, + root_node->heap_index); + if (CL_SUCCESS != ret) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE17: index out of bounds in cl_heap_modify_key\n"); + return -1; + } + + curr_node = (network_node_t *) cl_heap_extract_root(&network->heap); + while (curr_node) { + /* add/update nodes which aren't discovered but accessible */ + for (i = 0, netw_link_iter = curr_node->links; + i < curr_node->num_links; i++, netw_link_iter++) { + curr_link = netw_link_iter; + adj_node = curr_link->to_network_node; + new_distance = curr_node->distance + curr_link->weight; + if (new_distance < adj_node->distance) { + adj_node->escape_path = curr_link; + adj_node->distance = new_distance; + ret = + cl_heap_modify_key(&network->heap, + adj_node->distance, + adj_node->heap_index); + if (CL_SUCCESS != ret) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE18: index out of bounds in cl_heap_modify_key\n"); + return -1; + } + } + } + + curr_node = + (network_node_t *) cl_heap_extract_root(&network->heap); + } + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +/* escape paths are initial channel dependencies which aren't to be 'broken', + meaning: they are virtual paths building a backbone in case Nue runs into + a impass and can't find all routes towards one destination + escape paths are derived from a spanning tree rooted at the most central + node w.r.t. the destination nodes in the current virtual layer + */ +static int mark_escape_paths(const osm_ucast_mgr_t * mgr, network_t * network, + const ccdg_t * ccdg, ib_net16_t * destinations, + const uint16_t num_destinations, + const boolean_t verify_network_integrity) +{ + network_node_t *central_node = NULL, *netw_node_iter = NULL; + network_node_t *network_node1 = NULL, *network_node2 = NULL; + network_link_t *curr_link = NULL, *next_link = NULL, *adj_link = NULL; + network_link_t **links_going_into_central_node = NULL; + channel_t channel_id; + ccdg_node_t *curr_ccdg_node = NULL, *next_ccdg_node = NULL; + ccdg_node_t *rev_curr_ccdg_node = NULL, *rev_next_ccdg_node = NULL; + ib_net16_t lid = 0; + uint16_t i = 0, j = 0, undiscovered = 0, central_node_index = 0; + int err = 0; + + CL_ASSERT(mgr && network && ccdg && destinations + && num_destinations > 0); + OSM_LOG_ENTER(mgr->p_log); + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "Initialize complete CDG with escape paths\n"); + + err = + calculate_convex_subnetwork(mgr, network, destinations, + num_destinations); + if (err) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE25: calculation of the convex subgraph failed;" + " unable to proceed\n"); + return -1; + } + + err = + get_central_node_wrt_subnetwork(mgr, network, destinations, + num_destinations, ¢ral_node, + ¢ral_node_index); + if (err) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE26: unable to find a central node; unable to" + " proceed\n"); + return -1; + } + OSM_LOG(mgr->p_log, OSM_LOG_INFO, "central node:\n"); + print_network_node(mgr, central_node, central_node_index, FALSE); + + err = calculate_spanning_tree_in_network(mgr, network, central_node); + if (err) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE27: spanning tree algorithm for the escape" + " paths failed; unable to proceed\n"); + return -1; + } else if (verify_network_integrity) { + /* sanity check to determine connectivity issues */ + for (i = 0, netw_node_iter = network->nodes; + i < network->num_nodes; i++, netw_node_iter++) + undiscovered += (netw_node_iter->escape_path) ? 0 : 1; + /* escape_path is not initialied for the central_node, but for + the rest it must be, or otherwise the network is bisected + */ + if (undiscovered > 1) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE45: unsupported network state (detached" + " and inaccessible switches found; gracefully" + " shutdown this routing engine)\n"); + return -1; + } + } + /* print the network after the spanning tree has been caluclated */ + if (OSM_LOG_IS_ACTIVE_V2(mgr->p_log, OSM_LOG_DEBUG)) + print_spanning_tree(mgr, network); + + links_going_into_central_node = + (network_link_t **) calloc(central_node->num_links, + sizeof(network_link_t *)); + if (central_node->num_links && !links_going_into_central_node) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE28: cannot allocate memory for" + " links_going_into_central_node array\n"); + return -1; + } + + /* mark the escape paths in the complete CDG towards/from the root */ + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + network_node1 = netw_node_iter; + + curr_link = network_node1->escape_path; + while (curr_link) { + lid = curr_link->link_info.local_lid; + network_node2 = get_network_node_by_lid(network, lid); + CL_ASSERT(network_node2); + + /* color the curr_link in the ccdg (i.e., ccdg node) */ + curr_ccdg_node = curr_link->corresponding_ccdg_node; + CL_ASSERT(curr_ccdg_node); + init_ccdg_escape_path_node_color(ccdg, curr_ccdg_node); + + next_link = network_node2->escape_path; + if (!next_link) { + /* all nodes should have an escape path, except + the root node + */ + CL_ASSERT(lid == central_node->lid); + + /* a hashmap may be better to do this job of + tracking which links are used and which are + not, but for now it does the trick and isn't + performance-critical => room for future + optimizations + */ + for (j = 0; j < central_node->num_links; j++) { + if (!links_going_into_central_node[j]) { + links_going_into_central_node[j] + = curr_link; + break; + } else + if (links_going_into_central_node[j] + == curr_link) + break; + } + + break; + } + + /* color the next_link in the ccdg (i.e., ccdg node) */ + next_ccdg_node = next_link->corresponding_ccdg_node; + CL_ASSERT(next_ccdg_node); + init_ccdg_escape_path_node_color(ccdg, next_ccdg_node); + + /* and we have to color the edge between next & curr */ + init_ccdg_escape_path_edge_color_betw_nodes(ccdg, + next_ccdg_node, + curr_ccdg_node); + + /* check if we have to add the reverse path as well */ + if (network_node1->has_adj_destinations) { + channel_id = + get_inverted_channel_id(curr_link-> + link_info); + rev_curr_ccdg_node = + get_ccdg_node_by_channel_id(ccdg, + channel_id); + CL_ASSERT(rev_curr_ccdg_node); + channel_id = + get_inverted_channel_id(next_link-> + link_info); + rev_next_ccdg_node = + get_ccdg_node_by_channel_id(ccdg, + channel_id); + CL_ASSERT(rev_next_ccdg_node); + + /* set ccdg node color of the reverse path */ + init_ccdg_escape_path_node_color(ccdg, + rev_curr_ccdg_node); + init_ccdg_escape_path_node_color(ccdg, + rev_next_ccdg_node); + /* including ccdg edges */ + init_ccdg_escape_path_edge_color_betw_nodes + (ccdg, rev_curr_ccdg_node, + rev_next_ccdg_node); + + /* and even color turns if there are any */ + for (j = 0; j < network_node2->num_links; j++) { + adj_link = + network_node2->links[j]. + to_network_node->escape_path; + if (!adj_link) + continue; + if (curr_link->link_info.local_lid == + adj_link->link_info.local_lid + && curr_link->link_info. + remote_lid != + adj_link->link_info.remote_lid) { + init_ccdg_escape_path_edge_color_betw_nodes + (ccdg, rev_curr_ccdg_node, + adj_link-> + corresponding_ccdg_node); + } + } + } + + curr_link = next_link; + } + } + + /* mark escape paths around central node */ + for (i = 0; + i < central_node->num_links && links_going_into_central_node[i]; + i++) { + curr_link = links_going_into_central_node[i]; + for (j = 0; + j < central_node->num_links + && links_going_into_central_node[j]; j++) { + next_link = links_going_into_central_node[j]; + if (curr_link == next_link) + continue; + + CL_ASSERT(curr_link && next_link); + curr_ccdg_node = curr_link->corresponding_ccdg_node; + rev_next_ccdg_node = + get_ccdg_node_by_channel_id(ccdg, + get_inverted_channel_id + (next_link->link_info)); + CL_ASSERT(curr_ccdg_node && rev_next_ccdg_node); + + /* set color of links going into/out of central node */ + init_ccdg_escape_path_node_color(ccdg, curr_ccdg_node); + init_ccdg_escape_path_node_color(ccdg, + rev_next_ccdg_node); + /* including ccdg edges */ + init_ccdg_escape_path_edge_color_betw_nodes(ccdg, + rev_next_ccdg_node, + curr_ccdg_node); + } + } + + free(links_going_into_central_node); + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +#if defined (_DEBUG_) +static boolean_t deep_cpy_ccdg(const osm_ucast_mgr_t * mgr, + const ccdg_t * in_ccdg, ccdg_t * out_ccdg) +{ + ccdg_node_t *in_ccdg_node_iter = NULL, *out_ccdg_node_iter = NULL; + ccdg_node_t *ccdg_node_iter = NULL; + ccdg_edge_t *in_ccdg_edge_iter = NULL, *out_ccdg_edge_iter = NULL; + uint32_t i = 0, j = 0, k = 0; + + CL_ASSERT(in_ccdg && in_ccdg->nodes); + OSM_LOG_ENTER(mgr->p_log); + + if (!out_ccdg->nodes) { + out_ccdg->nodes = + (ccdg_node_t *) malloc(in_ccdg->num_nodes * + sizeof(ccdg_node_t)); + if (!out_ccdg->nodes) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE29: cannot allocate memory for ccdg nodes\n"); + return FALSE; + } + } else { + for (i = 0, out_ccdg_node_iter = out_ccdg->nodes; + i < out_ccdg->num_nodes; i++, out_ccdg_node_iter++) { + if (out_ccdg_node_iter->edges) + free(out_ccdg_node_iter->edges); + } + } + memset(out_ccdg->nodes, 0, in_ccdg->num_nodes * sizeof(ccdg_node_t)); + out_ccdg->num_nodes = 0; + + for (i = 0, in_ccdg_node_iter = in_ccdg->nodes, out_ccdg_node_iter = + out_ccdg->nodes; i < in_ccdg->num_nodes; + i++, in_ccdg_node_iter++) { + if (get_ccdg_node_color(in_ccdg, in_ccdg_node_iter) < + ESCAPEPATHCOLOR) + continue; + + out_ccdg_node_iter->channel_id = in_ccdg_node_iter->channel_id; + out_ccdg_node_iter->status = WHITE; + out_ccdg_node_iter->edges = + (ccdg_edge_t *) calloc(in_ccdg_node_iter->num_edges, + sizeof(ccdg_edge_t)); + if (!out_ccdg_node_iter->edges) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE30: cannot allocate memory for ccdg edges\n"); + destroy_ccdg(out_ccdg); + return FALSE; + } + + for (j = 0, in_ccdg_edge_iter = + in_ccdg_node_iter->edges, out_ccdg_edge_iter = + out_ccdg_node_iter->edges; + j < in_ccdg_node_iter->num_edges; + j++, in_ccdg_edge_iter++) { + if (get_ccdg_edge_color(in_ccdg, in_ccdg_edge_iter) < + ESCAPEPATHCOLOR) + continue; + + out_ccdg_edge_iter->to_channel_id = + in_ccdg_edge_iter->to_channel_id; + + out_ccdg_edge_iter++; + out_ccdg_node_iter->num_edges++; + } + + out_ccdg_node_iter++; + out_ccdg->num_nodes++; + } + + for (i = 0, out_ccdg_node_iter = out_ccdg->nodes; + i < out_ccdg->num_nodes; i++, out_ccdg_node_iter++) { + for (j = 0, out_ccdg_edge_iter = out_ccdg_node_iter->edges; + j < out_ccdg_node_iter->num_edges; + j++, out_ccdg_edge_iter++) { + for (k = 0, ccdg_node_iter = out_ccdg->nodes; + k < out_ccdg->num_nodes; k++, ccdg_node_iter++) { + if (0 == + compare_two_channel_id(& + (out_ccdg_edge_iter-> + to_channel_id), + &(ccdg_node_iter-> + channel_id))) { + out_ccdg_edge_iter->to_ccdg_node = + ccdg_node_iter; + break; + } + } + } + } + + OSM_LOG_EXIT(mgr->p_log); + return TRUE; +} + +static boolean_t is_channel_id_in_verify_ccdg_node_list(const ccdg_t * ccdg, + const channel_t * + channel_id, + ccdg_node_t ** + out_ccdg_node) +{ + ccdg_node_t *ccdg_node_iter = NULL; + uint32_t i = 0; + + CL_ASSERT(ccdg && ccdg->nodes && channel_id); + *out_ccdg_node = NULL; + + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) { + if (0 == + compare_two_channel_id(channel_id, + &(ccdg_node_iter->channel_id))) { + *out_ccdg_node = ccdg_node_iter; + return TRUE; + } + } + + return FALSE; +} + +static boolean_t is_channel_id_in_verify_ccdg_edge_list(const ccdg_node_t * + ccdg_node, + const channel_t * + channel_id, + ccdg_edge_t ** + out_ccdg_edge) +{ + ccdg_edge_t *ccdg_edge_iter = NULL; + uint8_t i = 0; + + CL_ASSERT(ccdg_node && ccdg_node->edges && channel_id); + *out_ccdg_edge = NULL; + for (i = 0, ccdg_edge_iter = ccdg_node->edges; i < ccdg_node->num_edges; + i++, ccdg_edge_iter++) { + if (0 == + compare_two_channel_id(channel_id, + &(ccdg_edge_iter->to_channel_id))) { + *out_ccdg_edge = ccdg_edge_iter; + return TRUE; + } + } + + return FALSE; +} + +static boolean_t add_paths_to_verify_ccdg(const osm_ucast_mgr_t * mgr, + const network_t * network, + const ib_net16_t desti, + const ccdg_t * ccdg, + ccdg_t * verify_ccdg, + const boolean_t + fallback_to_escape_paths) +{ + network_node_t *network_node = NULL, *netw_node_iter = NULL; + ccdg_node_t *ccdg_node = NULL; + ccdg_edge_t *ccdg_edge = NULL; + uint16_t i = 0; + channel_t *route[64]; /* can't have more than 64 hops, see IB specs */ + uint8_t num_hops = 0, j = 0; + ib_net16_t curr_lid = 0; + channel_t *channel_id1 = NULL, *channel_id2 = NULL; + + CL_ASSERT(mgr && network && network->nodes && ccdg && ccdg->nodes + && verify_ccdg && verify_ccdg->nodes); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + if (netw_node_iter->lid == desti) + continue; + + num_hops = 0; + curr_lid = netw_node_iter->lid; + + /* desti is the switch's lid if original desti is a terminal */ + do { + network_node = + get_network_node_by_lid(network, curr_lid); + CL_ASSERT(network_node && network_node->used_link); + route[num_hops] = &(network_node->used_link->link_info); + curr_lid = route[num_hops]->local_lid; + num_hops++; + } while (curr_lid != desti); + if (num_hops < 2) + continue; + + for (j = num_hops - 1; j > 0; j--) { + channel_id1 = route[j]; + channel_id2 = route[j - 1]; + + if (!is_channel_id_in_verify_ccdg_node_list + (verify_ccdg, channel_id2, &ccdg_node)) { + CL_ASSERT(ccdg->num_nodes > + verify_ccdg->num_nodes); + verify_ccdg->nodes[verify_ccdg->num_nodes]. + channel_id = *channel_id2; + verify_ccdg->nodes[verify_ccdg->num_nodes]. + status = WHITE; + ccdg_node = + get_ccdg_node_by_channel_id(ccdg, + *channel_id2); + CL_ASSERT(ccdg_node); + verify_ccdg->nodes[verify_ccdg->num_nodes]. + edges = + (ccdg_edge_t *) calloc(ccdg_node->num_edges, + sizeof(ccdg_edge_t)); + if (!verify_ccdg->nodes[verify_ccdg->num_nodes]. + edges) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE31: cannot allocate memory for ccdg edges\n"); + destroy_ccdg(verify_ccdg); + return FALSE; + } + verify_ccdg->nodes[verify_ccdg->num_nodes]. + num_edges = 0; + verify_ccdg->num_nodes++; + } + + if (is_channel_id_in_verify_ccdg_node_list + (verify_ccdg, channel_id1, &ccdg_node)) { + if (!is_channel_id_in_verify_ccdg_edge_list + (ccdg_node, channel_id2, &ccdg_edge)) { + /* escape_paths must not add anything + to the verify_ccdg + */ + CL_ASSERT(FALSE == fallback_to_escape_paths); + + ccdg_node->edges[ccdg_node->num_edges]. + to_channel_id = *channel_id2; + CL_ASSERT + (is_channel_id_in_verify_ccdg_node_list + (verify_ccdg, channel_id2, + &(ccdg_node-> + edges[ccdg_node->num_edges]. + to_ccdg_node))); + ccdg_node->num_edges++; + } + } else { + CL_ASSERT(ccdg->num_nodes > + verify_ccdg->num_nodes); + verify_ccdg->nodes[verify_ccdg->num_nodes]. + channel_id = *channel_id1; + verify_ccdg->nodes[verify_ccdg->num_nodes]. + status = WHITE; + ccdg_node = + get_ccdg_node_by_channel_id(ccdg, + *channel_id1); + CL_ASSERT(ccdg_node); + verify_ccdg->nodes[verify_ccdg->num_nodes]. + edges = + (ccdg_edge_t *) calloc(ccdg_node->num_edges, + sizeof(ccdg_edge_t)); + if (!verify_ccdg->nodes[verify_ccdg->num_nodes]. + edges) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE32: cannot allocate memory for ccdg edges\n"); + destroy_ccdg(verify_ccdg); + return FALSE; + } + verify_ccdg->nodes[verify_ccdg->num_nodes]. + num_edges = 0; + verify_ccdg->nodes[verify_ccdg->num_nodes]. + edges[0].to_channel_id = *channel_id2; + CL_ASSERT(is_channel_id_in_verify_ccdg_node_list + (verify_ccdg, channel_id2, + &(verify_ccdg-> + nodes[verify_ccdg->num_nodes]. + edges[0].to_ccdg_node))); + verify_ccdg->nodes[verify_ccdg->num_nodes]. + num_edges++; + verify_ccdg->num_nodes++; + } + } + } + + OSM_LOG_EXIT(mgr->p_log); + return TRUE; +} + +static boolean_t is_ccdg_cycle_free(const osm_ucast_mgr_t * mgr, + const ccdg_t * ccdg) +{ + ccdg_node_t *ccdg_node_iter = NULL, *curr_node = NULL, *next_node = + NULL; + ccdg_edge_t *ccdg_edge_iter = NULL; + uint32_t i = 0, j = 0; + + CL_ASSERT(mgr && ccdg && ccdg->nodes); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) { + ccdg_node_iter->status = WHITE; + ccdg_node_iter->pre = NULL; + } + + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) { + CL_ASSERT(ccdg_node_iter->status != GRAY); + if (ccdg_node_iter->status == BLACK) + continue; + + ccdg_node_iter->status = GRAY; + + curr_node = ccdg_node_iter; + while (curr_node) { + next_node = NULL; + for (j = 0, ccdg_edge_iter = curr_node->edges; + j < curr_node->num_edges; j++, ccdg_edge_iter++) { + if (ccdg_edge_iter->to_ccdg_node->status == + WHITE) { + next_node = + ccdg_edge_iter->to_ccdg_node; + next_node->status = GRAY; + next_node->pre = curr_node; + break; + } else if (ccdg_edge_iter->to_ccdg_node-> + status == GRAY) + return FALSE; + } + if (!next_node) { + curr_node->status = BLACK; + curr_node = curr_node->pre; + } else + curr_node = next_node; + } + } + + OSM_LOG_EXIT(mgr->p_log); + return TRUE; +} +#endif + +/* callback function for the cl_heap to update the heap index of a ccdg node */ +static void update_ccdg_heap_index(const void *context, const size_t new_index) +{ + ccdg_node_t *heap_elem = (ccdg_node_t *) context; + if (heap_elem) + heap_elem->heap_index = new_index; +} + +/* we reached an impass and we have to use the escape paths as fallback to + have valid paths towards the current destination + */ +static void use_escape_paths_to_solve_impass(const osm_ucast_mgr_t * mgr, + const network_t * network, + const osm_port_t * dest_port, + const ib_net16_t dlid) +{ + network_node_t *network_node1 = NULL, *network_node2 = NULL; + network_node_t *netw_node_iter = NULL; + network_link_t *curr_link = NULL, *reverse_link = NULL; + network_link_t *netw_link_iter = NULL; + ib_net16_t r_lid = 0; + channel_t reverse_channel_id; + uint16_t i = 0; + uint8_t j = 0; + + CL_ASSERT(mgr && network && dlid > 0); + OSM_LOG_ENTER(mgr->p_log); + + /* first let's copy all pre-computed escape paths into the used_links */ + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) + netw_node_iter->used_link = netw_node_iter->escape_path; + + /* get the source node (or adj switch) of the current routing step */ + network_node1 = + get_network_node_by_lid(network, get_switch_lid(mgr, dlid)); + CL_ASSERT(network_node1); + /* if the used_link is NULL, then dlid (or adj switch) is also the + root of the spanning tree for the escape paths (and all links are + in the correct direction) + */ + if (network_node1->used_link) { + /* otherwise we have to reverse a few of the used_links */ + curr_link = network_node1->used_link; + network_node1->used_link = NULL; + /* we only have to reverse until the real spanning tree root */ + while (curr_link) { + r_lid = (ib_net16_t) curr_link->link_info.local_lid; + network_node2 = get_network_node_by_lid(network, r_lid); + CL_ASSERT(network_node2); + reverse_channel_id = + get_inverted_channel_id(curr_link->link_info); + /* search the reverse link */ + reverse_link = NULL; + for (j = 0, netw_link_iter = network_node1->links; + j < network_node1->num_links; + j++, netw_link_iter++) { + if (0 == + compare_two_channel_id(&reverse_channel_id, + &(netw_link_iter-> + link_info))) + reverse_link = netw_link_iter; + } + CL_ASSERT(reverse_link); + curr_link = network_node2->used_link; + network_node2->used_link = reverse_link; + network_node1 = network_node2; + } + } + + OSM_LOG_EXIT(mgr->p_log); +} + +// check if we find a way from the source to the target -> yes: cycle +static boolean_t found_path_between_ccdg_nodes_in_subgraph(const osm_ucast_mgr_t + * mgr, + const ccdg_t * ccdg, + ccdg_node_t * source, + const ccdg_node_t * + target, + const int32_t color) +{ + ccdg_node_t *curr_ccdg_node = NULL, *next_ccdg_node = NULL; + ccdg_node_t *ccdg_node_iter = NULL; + ccdg_edge_t *ccdg_edge_iter = NULL; + uint32_t i = 0; + uint8_t j = 0; + boolean_t found_path = FALSE; + + CL_ASSERT(mgr && ccdg && source && target && source != target); + OSM_LOG_ENTER(mgr->p_log); + + curr_ccdg_node = source; + curr_ccdg_node->next_edge_idx = 0; + curr_ccdg_node->pre = NULL; + + do { + next_ccdg_node = NULL; + for (j = curr_ccdg_node->next_edge_idx, ccdg_edge_iter = + curr_ccdg_node->edges + curr_ccdg_node->next_edge_idx; + j < curr_ccdg_node->num_edges; j++, ccdg_edge_iter++) { + CL_ASSERT(ccdg_edge_iter->to_ccdg_node); + if (BLACK == ccdg_edge_iter->to_ccdg_node->status + || get_ccdg_edge_color(ccdg, + ccdg_edge_iter) <= UNUSED) + continue; + CL_ASSERT(color == + get_ccdg_edge_color(ccdg, ccdg_edge_iter)); + + if (ccdg_edge_iter->to_ccdg_node == target) { + found_path = TRUE; + } else { + next_ccdg_node = ccdg_edge_iter->to_ccdg_node; + curr_ccdg_node->next_edge_idx = j + 1; + } + break; + } + + if (next_ccdg_node) { + next_ccdg_node->next_edge_idx = 0; + next_ccdg_node->pre = curr_ccdg_node; + } else { + curr_ccdg_node->status = BLACK; + next_ccdg_node = curr_ccdg_node->pre; + } + curr_ccdg_node = next_ccdg_node; + } while (!found_path && curr_ccdg_node); + + /* reset changed status fields */ + for (i = 0, ccdg_node_iter = ccdg->nodes; i < ccdg->num_nodes; + i++, ccdg_node_iter++) + if (ccdg_node_iter->status == BLACK) + ccdg_node_iter->status = WHITE; + + OSM_LOG_EXIT(mgr->p_log); + return found_path; +} + +static boolean_t using_edge_induces_cycle_in_ccdg(const osm_ucast_mgr_t * mgr, + const ccdg_t * ccdg, + const ccdg_node_t * head, + ccdg_edge_t * ccdg_edge, + const int32_t color) +{ + ccdg_node_t *tail = NULL; + boolean_t cycle_induced = TRUE; + + CL_ASSERT(mgr && ccdg && head && ccdg_edge && color > 0); + OSM_LOG_ENTER(mgr->p_log); + + tail = ccdg_edge->to_ccdg_node; + CL_ASSERT(tail && get_ccdg_edge_color(ccdg, ccdg_edge) != BLOCKED); + + if (get_ccdg_edge_color(ccdg, ccdg_edge) > UNUSED) { + cycle_induced = FALSE; + CL_ASSERT(get_ccdg_node_color(ccdg, head) == + get_ccdg_node_color(ccdg, tail) + && get_ccdg_node_color(ccdg, + head) == + get_ccdg_edge_color(ccdg, ccdg_edge)); + } else { + if (color == get_ccdg_node_color(ccdg, tail)) { + /* trying to add an edge to a acyclic subgraph */ + if (found_path_between_ccdg_nodes_in_subgraph + (mgr, ccdg, tail, head, color)) { + set_ccdg_edge_into_blocked_state(ccdg, + ccdg_edge); + cycle_induced = TRUE; + } else { + add_ccdg_edge_betw_nodes_to_colored_subccdg + (ccdg, head, tail, ccdg_edge); + cycle_induced = FALSE; + } + } else { + /* connecting two disjoint acyclic subgraphs */ + if (UNUSED == get_ccdg_node_color(ccdg, tail)) { + add_ccdg_node_to_colored_subccdg(ccdg, head, + tail); + } else { + merge_two_colored_subccdg_by_nodes(ccdg, head, + tail); + } + cycle_induced = FALSE; + } + } + + OSM_LOG_EXIT(mgr->p_log); + return cycle_induced; +} + +/* only add the link if it's not yet in the stack */ +static inline void add_link_to_stack_of_used_links(network_node_t * + network_node, + network_link_t * link) +{ + uint8_t i = 0; + + CL_ASSERT(network_node && link); + for (i = 0; i < network_node->num_elem_in_link_stack; i++) { + if (link == network_node->stack_used_links[i]) + break; + } + /* add link if not found in stack */ + if (i == network_node->num_elem_in_link_stack) + network_node->stack_used_links[network_node-> + num_elem_in_link_stack++] = link; +} + +/* check alternative paths within a small radius to find/use valid channel + dependencies which won't close a cycle in the CCDG + */ +static ccdg_node_t *attempt_local_backtracking(const osm_ucast_mgr_t * mgr, + const network_t * network, + const network_node_t * + source_netw_node, + const ccdg_t * ccdg, + const int32_t color) +{ + network_node_t *unreachable_netw_node = NULL, *network_node = NULL; + network_node_t *adj_netw_node = NULL, *netw_node_iter = NULL; + network_link_t *netw_link_iter1 = NULL, *netw_link_iter2 = NULL; + uint16_t i = 0; + uint8_t j = 0, k = 0, m = 0; + ccdg_node_t *depended_channels[UINT8_MAX]; /* max = switch radix */ + backtracking_candidate_t *potential_candidates = + NULL, *realloc_candidates = NULL; + uint32_t max_elem_in_candidates_array = (uint32_t) UINT8_MAX; + uint32_t num_potential_candidates = 0, n = 0; + uint8_t num_depended_channels = 0; + ccdg_node_t *ccdg_node = NULL, *depended_ccdg_node = NULL; + ccdg_node_t *pre_ccdg_node = NULL, *pre_pre_ccdg_node = NULL; + ccdg_edge_t *pre_ccdg_edge = NULL, *ccdg_edge = NULL; + boolean_t was_wet_before = FALSE; + ccdg_node_t *new_channel_to_unreachable_netw_node = NULL; + + CL_ASSERT(mgr && network && ccdg); + OSM_LOG_ENTER(mgr->p_log); + + potential_candidates = + (backtracking_candidate_t *) malloc(max_elem_in_candidates_array * + sizeof + (backtracking_candidate_t)); + if (!potential_candidates) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "WRN NUE40: cannot allocate memory for potential channel candidates; skipping local backtracking\n"); + return NULL; + } + + /* search for nodes which don't have been found by the function + route_via_modified_dijkstra_on_ccdg yet + */ + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + if (INFINITY != netw_node_iter->distance) + continue; + + unreachable_netw_node = netw_node_iter; + + num_potential_candidates = 0; + + for (j = 0, netw_link_iter1 = unreachable_netw_node->links; + j < unreachable_netw_node->num_links; + j++, netw_link_iter1++) { + adj_netw_node = netw_link_iter1->to_network_node; + if (INFINITY == adj_netw_node->distance) + continue; + + num_depended_channels = 0; + + /* search for depended nodes surrounding the + adj_netw_node, i.e., whether any of them receive + traffic from adj_netw_node or not + */ + for (k = 0, netw_link_iter2 = adj_netw_node->links; + k < adj_netw_node->num_links; + k++, netw_link_iter2++) { + if (!netw_link_iter2->to_network_node-> + used_link) + continue; + /* if true, then we found a 'dependent' node */ + if (netw_link_iter2->to_network_node-> + used_link->link_info.local_lid == + adj_netw_node->lid) { + depended_channels[num_depended_channels] + = + netw_link_iter2->to_network_node-> + used_link->corresponding_ccdg_node; + CL_ASSERT(depended_channels + [num_depended_channels]); + num_depended_channels++; + } + } + + /* check if any of the channels in stack of used_links + does not violate the current dependent condition, + meaning we can savely replace used_link of + adj_netw_node with one link stored in the stack + */ + for (k = 0; k < adj_netw_node->num_elem_in_link_stack; + k++) { + CL_ASSERT(adj_netw_node->stack_used_links + && adj_netw_node-> + stack_used_links[k]); + ccdg_node = + adj_netw_node->stack_used_links[k]-> + corresponding_ccdg_node; + CL_ASSERT(ccdg_node); + for (m = 0; m < num_depended_channels; m++) { + depended_ccdg_node = + depended_channels[m]; + + /* filter reverse channels */ + if (ccdg_node->channel_id.local_lid == + depended_ccdg_node->channel_id. + remote_lid + && ccdg_node->channel_id. + remote_lid == + depended_ccdg_node->channel_id. + local_lid) + break; + + /* we only want channels which have a + 'real' color (>= escape_path_color) + */ + if (get_ccdg_edge_color_betw_nodes + (ccdg, ccdg_node, + depended_ccdg_node, + NULL) < ESCAPEPATHCOLOR) + break; + } + /* if we checked all and nothing discards us + from switching to ccdg_node for adj_netw_node + then store ccdg_node as potential candidate + */ + if (m == num_depended_channels) { + if (num_potential_candidates == + max_elem_in_candidates_array) { + max_elem_in_candidates_array *= + 2; + CL_ASSERT + (max_elem_in_candidates_array); + realloc_candidates = + realloc + (potential_candidates, + max_elem_in_candidates_array + * + sizeof + (backtracking_candidate_t)); + if (!realloc_candidates) { + OSM_LOG(mgr->p_log, + OSM_LOG_INFO, + "WRN NUE41: cannot allocate memory for potential channel candidates; skipping local backtracking\n"); + free(potential_candidates); + return NULL; + } else { + potential_candidates = + realloc_candidates; + } + } + potential_candidates + [num_potential_candidates]. + link_to_adj_netw_node = + netw_link_iter1; + potential_candidates + [num_potential_candidates]. + orig_used_ccdg_node_for_adj_netw_node + = ccdg_node; + num_potential_candidates++; + } + } + } + + /* jump to next network node if this one has no candidates */ + if (!num_potential_candidates) + continue; + + /* sort the candidates by dijkstra's distance to prefer + the 'best' possible option for a replacement + */ + sort_backtracking_candidates_by_distance(potential_candidates, + (size_t) + num_potential_candidates); + + for (n = 0; n < num_potential_candidates; n++) { + pre_ccdg_node = + potential_candidates[n]. + orig_used_ccdg_node_for_adj_netw_node; + CL_ASSERT(pre_ccdg_node); + ccdg_node = + get_ccdg_node_by_channel_id(ccdg, + get_inverted_channel_id + (potential_candidates + [n]. + link_to_adj_netw_node-> + link_info)); + CL_ASSERT(ccdg_node); + + /* check if using the node tripple (x)->adj->unreachable + is possible or if the channel dep. is BLOCKED already + */ + if (BLOCKED == + get_ccdg_edge_color_betw_nodes(ccdg, pre_ccdg_node, + ccdg_node, NULL)) + continue; + + network_node = + get_network_node_by_lid(network, + pre_ccdg_node->channel_id. + local_lid); + CL_ASSERT(network_node); + if (network_node == source_netw_node) + continue; + CL_ASSERT(network_node->used_link); + pre_pre_ccdg_node = + network_node->used_link->corresponding_ccdg_node; + + /* still a slim chance for reverse channel -> filter */ + if (pre_ccdg_node->channel_id.local_lid == + pre_pre_ccdg_node->channel_id.remote_lid + && pre_ccdg_node->channel_id.remote_lid == + pre_pre_ccdg_node->channel_id.local_lid) + continue; + + pre_ccdg_edge = + get_ccdg_edge_betw_nodes(pre_pre_ccdg_node, + pre_ccdg_node); + CL_ASSERT(pre_ccdg_edge && pre_ccdg_edge->color); + + /* filter BLOCKED dependencies */ + if (BLOCKED == + get_ccdg_edge_color_betw_nodes(ccdg, + pre_pre_ccdg_node, + pre_ccdg_node, + pre_ccdg_edge)) + continue; + + /* check if we can use this pre_ccdg_edge, or start + over but leave the color as is + */ + was_wet_before = pre_ccdg_edge->wet_paint; + if (using_edge_induces_cycle_in_ccdg + (mgr, ccdg, pre_pre_ccdg_node, pre_ccdg_edge, + color)) { + continue; + } + + ccdg_edge = + get_ccdg_edge_betw_nodes(pre_ccdg_node, ccdg_node); + CL_ASSERT(ccdg_edge); + /* now check the next edge, but reset the previous if + the check for this one fails + */ + if (using_edge_induces_cycle_in_ccdg + (mgr, ccdg, pre_ccdg_node, ccdg_edge, color)) { + /* only reset if it was UNSED before or colored + in a previous routing step (but not in the + current step for the current destination + */ + if (!was_wet_before) { + reset_ccdg_edge_color(ccdg, + pre_ccdg_edge); + } + continue; + } + + /* if we came this far, then we have a viable candidate + and can update the dijkstra's distance and used_link + information for the previously unreachable node; + if we find multiple options then we add them to the + stack as usual + */ + if (!new_channel_to_unreachable_netw_node) { + ccdg_node->distance = + pre_ccdg_node->distance + + ccdg_node->corresponding_netw_link->weight; + unreachable_netw_node->used_link = + ccdg_node->corresponding_netw_link; + unreachable_netw_node->distance = + ccdg_node->distance; + + potential_candidates[n].link_to_adj_netw_node-> + to_network_node->used_link = + pre_ccdg_node->corresponding_netw_link; + potential_candidates[n].link_to_adj_netw_node-> + to_network_node->distance = + pre_ccdg_node->distance; + + new_channel_to_unreachable_netw_node = + ccdg_node; + } else { + ccdg_node->distance = + pre_ccdg_node->distance + + ccdg_node->corresponding_netw_link->weight; + add_link_to_stack_of_used_links + (unreachable_netw_node, + ccdg_node->corresponding_netw_link); + } + } + + /* leave early when we found a suitable way into an unreachable + network node, route_via_modified_dijkstra_on_ccdg handles + the rest and might call the backtracking again + */ + if (new_channel_to_unreachable_netw_node) + break; + } + + /* we don't need the potential candidates anymore */ + free(potential_candidates); + + if (new_channel_to_unreachable_netw_node) + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "backtracking worked; found path to LID %" PRIu16 + " (%s)\n", cl_ntoh16(unreachable_netw_node->lid), + unreachable_netw_node->sw->p_node->print_desc); + + OSM_LOG_EXIT(mgr->p_log); + return new_channel_to_unreachable_netw_node; +} + +static boolean_t attempt_shortcut_discovery(const osm_ucast_mgr_t * mgr, + const network_t * network, + const network_node_t * + potential_shortcut_netw_node, + const ccdg_t * ccdg, + const ccdg_node_t * + potential_shortcut_ccdg_node, + const int32_t color) +{ + network_node_t *network_node = NULL; + ccdg_node_t *pre_old_ccdg_node = NULL, *old_ccdg_node = NULL; + ccdg_edge_t *ccdg_edge = NULL; + ccdg_edge_t *dependent_edges[UINT8_MAX]; + boolean_t was_wet_before[UINT8_MAX]; + uint8_t num_dependent_edges = 0; + network_link_t *netw_link_iter = NULL; + uint8_t i = 0, reset_until_break_point = 0; + boolean_t valid_shortcut = TRUE; + + CL_ASSERT(mgr && network && potential_shortcut_netw_node && ccdg + && potential_shortcut_ccdg_node + && potential_shortcut_netw_node->used_link + && color > ESCAPEPATHCOLOR); + OSM_LOG_ENTER(mgr->p_log); + + old_ccdg_node = + potential_shortcut_netw_node->used_link->corresponding_ccdg_node; + CL_ASSERT(old_ccdg_node + && old_ccdg_node != potential_shortcut_ccdg_node); + + network_node = + get_network_node_by_lid(network, + old_ccdg_node->channel_id.local_lid); + CL_ASSERT(network_node && network_node->used_link); + pre_old_ccdg_node = network_node->used_link->corresponding_ccdg_node; + CL_ASSERT(pre_old_ccdg_node); + + /* find dependent netw nodes, meaning nodes which the potential_shortcut + will relay traffic to + */ + for (i = 0, netw_link_iter = potential_shortcut_netw_node->links; + i < potential_shortcut_netw_node->num_links; + i++, netw_link_iter++) { + if (!netw_link_iter->to_network_node->used_link) + continue; + if (potential_shortcut_netw_node->lid == + netw_link_iter->to_network_node->used_link->link_info. + local_lid) { + dependent_edges[num_dependent_edges] = + get_ccdg_edge_betw_nodes + (potential_shortcut_ccdg_node, + netw_link_iter->to_network_node->used_link-> + corresponding_ccdg_node); + CL_ASSERT(dependent_edges[num_dependent_edges]); + num_dependent_edges++; + } + } + + /* save the colors for later, in case we have to reset them */ + for (i = 0; i < num_dependent_edges; i++) { + was_wet_before[i] = dependent_edges[i]->wet_paint; + } + + /* verify that using potential_shortcut_ccdg_node doesn't induce + any cycles in the complete CDG in combination with existing paths; + otherwise we break out and reset to previous state + */ + for (i = 0; i < num_dependent_edges; i++) { + ccdg_edge = dependent_edges[i]; + if (BLOCKED == get_ccdg_edge_color(ccdg, ccdg_edge)) { + valid_shortcut = FALSE; + reset_until_break_point = i; + break; + } else + if (using_edge_induces_cycle_in_ccdg + (mgr, ccdg, potential_shortcut_ccdg_node, ccdg_edge, + color)) { + valid_shortcut = FALSE; + reset_until_break_point = i; + break; + } + } + + if (valid_shortcut) { + /* if the shortcut is valid and no new deadlock scenarios arise, + then we can reset the channel dependencies which were in + place before the shortcut was discovered for the path which + led to this node (essentially reverting a minor part of the + induced dependencies in the cCDG around the node whose + distance was shortened by the shortcut; see Section 6.2.6.3 + of reference [2] for more details) + */ + reset_ccdg_edge_color_betw_nodes(ccdg, pre_old_ccdg_node, + old_ccdg_node, NULL); + for (i = 0; i < num_dependent_edges; i++) { + reset_ccdg_edge_color_betw_nodes(ccdg, old_ccdg_node, + dependent_edges[i]-> + to_ccdg_node, + dependent_edges[i]); + } + } else { + /* due to some BLOCKED edge we can't use the potential shortcut, + and hence we have to reset the color to whatever it was + previous; however we don't reset the BLOCKED edge itself, + since it's very likely that it will "re-block" later again + */ + for (i = 0; i < reset_until_break_point; i++) { + if (BLOCKED != + get_ccdg_edge_color(ccdg, dependent_edges[i])) { + if (!was_wet_before[i]) { + reset_ccdg_edge_color(ccdg, + dependent_edges + [i]); + } + } + } + } + + OSM_LOG_EXIT(mgr->p_log); + return valid_shortcut; +} + +static int route_via_modified_dijkstra_on_ccdg(const osm_ucast_mgr_t * mgr, + const network_t * network, + ccdg_t * ccdg, + const osm_port_t * dest_port, + const ib_net16_t dlid, + const int32_t source_color, + boolean_t * + fallback_to_escape_paths) +{ + network_node_t *network_node = NULL, *source_netw_node = NULL; + network_node_t *new_discovered_netw_node = NULL, *netw_node_iter = NULL; + ccdg_node_t *source_ccdg_node = NULL; + ccdg_node_t *curr_ccdg_node = NULL, *next_ccdg_node = NULL; + ccdg_node_t *old_ccdg_node = NULL, *pre_old_ccdg_node = NULL; + ccdg_edge_t *ccdg_edge_iter = NULL; + ib_net16_t dijk_source_lid = dlid; + channel_t source_channel_id; + uint64_t new_distance = 0; + cl_status_t ret = CL_SUCCESS; + uint32_t i = 0; + uint8_t j = 0; + uint16_t num_netw_nodes_found = 0; + int32_t last_active_backtracking_step = 0; + boolean_t iterate_over_used = FALSE; + + CL_ASSERT(mgr && network && ccdg && dlid > 0); + OSM_LOG_ENTER(mgr->p_log); + + *fallback_to_escape_paths = FALSE; + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + netw_node_iter->distance = INFINITY; + netw_node_iter->used_link = NULL; + netw_node_iter->num_elem_in_link_stack = 0; + /* resetting stack of used_links not really necessary, but good + to sanitize memory for eventual debugging + */ + memset(netw_node_iter->stack_used_links, 0, + netw_node_iter->num_links * sizeof(network_link_t *)); + netw_node_iter->hops = 0; + netw_node_iter->found_after_backtracking_step = -1; + } + + /* build a 4-ary heap to find the ccdg node with minimum distance */ + if (!cl_is_heap_inited(&ccdg->heap)) + ret = + cl_heap_init(&ccdg->heap, (size_t) ccdg->num_nodes, 4, + &update_ccdg_heap_index, NULL); + else + ret = cl_heap_resize(&ccdg->heap, (size_t) ccdg->num_nodes); + if (CL_SUCCESS != ret) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE33: cannot allocate memory or resize heap\n"); + return -1; + } + + /* get the first switch, i.e,, 'source' of the dijkstra step w.r.t the + network, and initialize some values + */ + source_netw_node = + get_network_node_by_lid(network, get_switch_lid(mgr, dlid)); + CL_ASSERT(source_netw_node); + source_netw_node->distance = 0; + source_netw_node->hops = + (osm_node_get_type(dest_port->p_node) == + IB_NODE_TYPE_SWITCH) ? 0 : 1; + num_netw_nodes_found = 1; + + /* do the same for the real 'source', i.e., the ccdg node */ + source_channel_id.local_lid = source_netw_node->lid; + source_channel_id.local_port = 0; + source_channel_id.remote_lid = source_netw_node->lid; + source_channel_id.remote_port = 0; + source_ccdg_node = get_ccdg_node_by_channel_id(ccdg, source_channel_id); + CL_ASSERT(source_ccdg_node); + change_fake_ccdg_node_color(ccdg, source_ccdg_node, source_color); + source_ccdg_node->distance = 0; + + curr_ccdg_node = source_ccdg_node; + do { + /* first iterate over the unused and then the used edges */ + iterate_over_used = TRUE; + for (j = 0, ccdg_edge_iter = curr_ccdg_node->edges; + j < 2 * curr_ccdg_node->num_edges; j++, ccdg_edge_iter++) { + /* reset edge iterator and now check unused */ + if (j == curr_ccdg_node->num_edges) { + ccdg_edge_iter = curr_ccdg_node->edges; + iterate_over_used = FALSE; + } + + if (iterate_over_used) { + if (get_ccdg_edge_color(ccdg, ccdg_edge_iter) < + ESCAPEPATHCOLOR) + continue; + } else { + if (UNUSED != + get_ccdg_edge_color(ccdg, ccdg_edge_iter)) + continue; + } + + next_ccdg_node = ccdg_edge_iter->to_ccdg_node; + new_distance = + curr_ccdg_node->distance + + next_ccdg_node->corresponding_netw_link->weight; + new_discovered_netw_node = + next_ccdg_node->corresponding_netw_link-> + to_network_node; + + if (new_distance < new_discovered_netw_node->distance) { + /* verify, that the ccdg_edge will not close a + cycle, or block the edge indefinitely + */ + if (using_edge_induces_cycle_in_ccdg + (mgr, ccdg, curr_ccdg_node, ccdg_edge_iter, + source_color)) + continue; + + if (last_active_backtracking_step + && new_discovered_netw_node->used_link + && last_active_backtracking_step != + new_discovered_netw_node-> + found_after_backtracking_step) { + /* check for shortcuts only for nodes + we have discovered before the first + backtracking step + */ + if (attempt_shortcut_discovery + (mgr, network, + new_discovered_netw_node, ccdg, + next_ccdg_node, source_color)) { + OSM_LOG(mgr->p_log, + OSM_LOG_INFO, + "found new shortcut towards LID %" + PRIu16 + " (%s) after successful backtracking\n", + new_discovered_netw_node-> + lid, + new_discovered_netw_node-> + sw->p_node->print_desc); + } else { + continue; + } + } else { + + /* check if this node was discovered on + a different path, then clean up the + heap (-> remove outdated ccdg_node) + */ + if (new_discovered_netw_node->used_link) { + old_ccdg_node = + new_discovered_netw_node-> + used_link-> + corresponding_ccdg_node; + CL_ASSERT(cl_is_stored_in_heap + (&ccdg->heap, + old_ccdg_node, + old_ccdg_node-> + heap_index)); + old_ccdg_node = + cl_heap_delete(&ccdg->heap, + old_ccdg_node-> + heap_index); + network_node = + get_network_node_by_lid + (network, + old_ccdg_node->channel_id. + local_lid); + CL_ASSERT(network_node); + if (network_node->used_link) { + pre_old_ccdg_node = + network_node-> + used_link-> + corresponding_ccdg_node; + CL_ASSERT + (pre_old_ccdg_node); + reset_ccdg_edge_color_betw_nodes + (ccdg, + pre_old_ccdg_node, + old_ccdg_node, + NULL); + } else { + CL_ASSERT(network_node-> + lid == + source_netw_node-> + lid); + } + } else { + num_netw_nodes_found++; + new_discovered_netw_node-> + found_after_backtracking_step + = + last_active_backtracking_step; + } + + /* update the heap with the new + ccdg_node (only for non-shortcuts) + */ + next_ccdg_node->distance = new_distance; + ret = + cl_heap_insert(&ccdg->heap, + next_ccdg_node-> + distance, + next_ccdg_node); + if (CL_SUCCESS != ret) { + OSM_LOG(mgr->p_log, + OSM_LOG_ERROR, + "ERR NUE34: cl_heap_insert failed\n"); + return -1; + } + } + + /* write new distance, used_link, hops, etc. */ + new_discovered_netw_node->distance = + new_distance; + new_discovered_netw_node->used_link = + next_ccdg_node->corresponding_netw_link; + if (source_ccdg_node != curr_ccdg_node) + new_discovered_netw_node->hops = + curr_ccdg_node-> + corresponding_netw_link-> + to_network_node->hops + 1; + else + new_discovered_netw_node->hops = + source_netw_node->hops + 1; + + } else if (get_ccdg_edge_color(ccdg, ccdg_edge_iter) > + UNUSED) { + if (last_active_backtracking_step + && new_discovered_netw_node->used_link) + continue; + next_ccdg_node->distance = new_distance; + add_link_to_stack_of_used_links + (new_discovered_netw_node, + next_ccdg_node->corresponding_netw_link); + } + } + CL_ASSERT((source_ccdg_node == + curr_ccdg_node) ? TRUE : (uint64_t) curr_ccdg_node-> + corresponding_netw_link->to_network_node->used_link); + + curr_ccdg_node = + (ccdg_node_t *) cl_heap_extract_root(&ccdg->heap); + + if (!curr_ccdg_node) { + /* verify that all netw_nodes have been discovered, + and if NOT, we have to performe a local backtracking + or even worse: fall back to the escape paths + */ + if (num_netw_nodes_found != network->num_nodes) { + curr_ccdg_node = + attempt_local_backtracking(mgr, network, + source_netw_node, + ccdg, + source_color); + if (curr_ccdg_node) { + num_netw_nodes_found++; + last_active_backtracking_step += 1; + } else { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "unsolvable impass reached; fallback to escape paths for destination LID %" + PRIu16 " (%s)\n", + cl_ntoh16(dlid), + dest_port->p_node->print_desc); + use_escape_paths_to_solve_impass(mgr, + network, + dest_port, + dijk_source_lid); + *fallback_to_escape_paths = TRUE; + last_active_backtracking_step = 0; + } + } + } + } while (curr_ccdg_node); + + if (last_active_backtracking_step) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "backtracking worked for destination LID %" PRIu16 + " (%s)\n", cl_ntoh16(dlid), + dest_port->p_node->print_desc); + } + + /* fix the colors in the ccdg, i.e., overwrite real colors with the + backup colors, for the next iteration + */ + fix_ccdg_colors(mgr, network, source_netw_node, ccdg, source_ccdg_node); + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +/* update the edge weights along the path towards to the destination of the + current routing step; the parameter desti is assumed to be a switch lid even + if the real desti is a terminal (then desti is the adjacent switch's lid) + */ +static void update_network_link_weights(const osm_ucast_mgr_t * mgr, + const network_t * network, + const ib_net16_t desti) +{ + network_node_t *network_node = NULL, *netw_node_iter = NULL; + network_link_t *network_link = NULL; + ib_net16_t curr_lid = 0; + uint8_t additional_weight = 0; + uint16_t i = 0; + + CL_ASSERT(mgr && network && desti > 0); + OSM_LOG_ENTER(mgr->p_log); + + /* desti is the switch's lid if original desti is a terminal */ + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + if (netw_node_iter->lid == desti) + continue; + + /* num_terminals includes already the switch itself */ + additional_weight = netw_node_iter->num_terminals; + curr_lid = netw_node_iter->lid; + + do { + network_node = + get_network_node_by_lid(network, curr_lid); + CL_ASSERT(network_node); + network_link = network_node->used_link; + network_link->weight += additional_weight; + curr_lid = network_link->link_info.local_lid; + } while (curr_lid != desti); + } + + OSM_LOG_EXIT(mgr->p_log); +} + +static void init_linear_forwarding_tables(const osm_ucast_mgr_t * mgr, + const network_t * network) +{ + network_node_t *netw_node_iter = NULL; + osm_switch_t *sw = NULL; + uint16_t i = 0, lid = 0, min_lid_ho = 0, max_lid_ho = 0; + + CL_ASSERT(mgr && network); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + sw = (osm_switch_t *) netw_node_iter->sw; + /* initialize LIDs in buffer to invalid port number */ + memset(sw->new_lft, OSM_NO_PATH, sw->max_lid_ho + 1); + /* initialize LFT and hop count for bsp0/esp0 of the switch */ + min_lid_ho = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0)); + max_lid_ho = + min_lid_ho + (1 << osm_node_get_lmc(sw->p_node, 0)) - 1; + for (lid = min_lid_ho; lid <= max_lid_ho; lid++) { + /* for each switch the port to the 'self'lid is the + management port 0 + */ + sw->new_lft[lid] = 0; + /* and the hop count to the 'self'lid is 0 */ + osm_switch_set_hops(sw, lid, 0, 0); + } + } + + OSM_LOG_EXIT(mgr->p_log); +} + +/* update the linear forwarding tables of all switches with the informations + from the last routing step performed with our modified dijkstra on the ccdg +*/ +static void update_linear_forwarding_tables(const osm_ucast_mgr_t * mgr, + const network_t * network, + const osm_port_t * dest_port, + const ib_net16_t dlid) +{ + network_node_t *netw_node_iter = NULL; + osm_switch_t *sw = NULL; + uint8_t hops = 0, exit_port = 0; + osm_physp_t *phys_port = NULL; + boolean_t is_ignored_by_port_prof = FALSE; + uint16_t i = 0; + cl_status_t ret = CL_SUCCESS; + + CL_ASSERT(mgr && network && dest_port && dlid > 0); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + /* if no route goes thru this switch, then it must the + adjacent switch for a terminal + */ + if (!netw_node_iter->used_link) { + CL_ASSERT(netw_node_iter->lid == + get_switch_lid(mgr, dlid)); + /* the 'route' to port 0 was configured already in + our init_linear_forwarding_tables function + */ + if (osm_node_get_type(dest_port->p_node) == + IB_NODE_TYPE_SWITCH) + continue; + + (void)osm_node_get_remote_node(dest_port->p_node, + dest_port->p_physp-> + port_num, &exit_port); + } else { + exit_port = + netw_node_iter->used_link->link_info.remote_port; + } + + sw = netw_node_iter->sw; + hops = netw_node_iter->hops; + /* the used_link is the link that was used in dijkstra to reach + this node, so the remote_port is the local port on this node + */ + + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Routing LID %" PRIu16 " thru port %" PRIu8 + " for switch 0x%016" PRIx64 "\n", cl_ntoh16(dlid), + exit_port, + cl_ntoh64(osm_node_get_node_guid(sw->p_node))); + + phys_port = osm_node_get_physp_ptr(sw->p_node, exit_port); + + /* we would like to optionally ignore this port in equalization + as in the case of the Mellanox Anafa Internal PCI TCA port + */ + is_ignored_by_port_prof = phys_port->is_prof_ignored; + + /* We also would ignore this route if the target lid is of + a switch and the port_profile_switch_node is not TRUE + */ + if (!mgr->p_subn->opt.port_profile_switch_nodes) { + is_ignored_by_port_prof |= + (osm_node_get_type(dest_port->p_node) == + IB_NODE_TYPE_SWITCH); + } + + /* set port in LFT, but switches use host byte order */ + sw->new_lft[cl_ntoh16(dlid)] = exit_port; + + /* update the number of path routing thru this port */ + if (!is_ignored_by_port_prof) + osm_switch_count_path(sw, exit_port); + + /* set the hop count from this switch to the dlid */ + ret = osm_switch_set_hops(sw, cl_ntoh16(dlid), exit_port, hops); + if (CL_SUCCESS != ret) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE04: cannot set hops for LID %" PRIu16 + " for switch 0x%016" PRIx64 "\n", + cl_ntoh16(dlid), + cl_ntoh64(osm_node_get_node_guid(sw->p_node))); + } + } + + OSM_LOG_EXIT(mgr->p_log); +} + +static inline void update_dlid_to_vl_mapping(uint8_t * dlid_to_vl_mapping, + const ib_net16_t dlid, + const uint8_t virtual_layer) +{ + CL_ASSERT(dlid_to_vl_mapping && dlid > 0); + dlid_to_vl_mapping[cl_ntoh16(dlid)] = virtual_layer; +} + +static int nue_do_ucast_routing(void *context) +{ + nue_context_t *nue_ctx = (nue_context_t *) context; + osm_ucast_mgr_t *mgr = NULL; + osm_port_t *dest_port = NULL; + boolean_t include_switches = FALSE; + ib_net16_t *dlid_iter = NULL; + uint16_t lid = 0, min_lid_ho = 0, max_lid_ho = 0; + uint16_t i = 0; + uint8_t vl = 0; + uint8_t ntype = 0; + int err = 0; + int32_t color = 0; + boolean_t process_sw = FALSE, fallback_to_escape_paths = FALSE; + network_node_t *netw_node_iter = NULL; +#if defined (_DEBUG_) + ccdg_t verify_ccdg = {.num_nodes = 0, .nodes = NULL, .num_colors = 0, + .color_array = NULL}; +#endif + + if (nue_ctx) + mgr = (osm_ucast_mgr_t *) nue_ctx->mgr; + else + return -1; + + OSM_LOG_ENTER(mgr->p_log); + OSM_LOG(mgr->p_log, OSM_LOG_INFO, "Start routing process with nue\n"); + + init_linear_forwarding_tables(mgr, &(nue_ctx->network)); + + if (mgr->p_subn->opt.nue_include_switches) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + " ...and consider switches as traffic sinks\n"); + include_switches = mgr->p_subn->opt.nue_include_switches; + } + + /* assign destination lids to different virtual layers */ + err = distribute_lids_onto_virtual_layers(nue_ctx, include_switches); + if (err) { + destroy_context(nue_ctx); + return -1; + } + if (OSM_LOG_IS_ACTIVE_V2(mgr->p_log, OSM_LOG_DEBUG)) + print_destination_distribution(mgr, nue_ctx->destinations, + nue_ctx->num_destinations); + + for (vl = 0; vl < nue_ctx->max_vl; vl++) { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Processing virtual layer %" PRIu8 "\n", vl); + + if (!nue_ctx->num_destinations[vl]) { + OSM_LOG(mgr->p_log, OSM_LOG_INFO, + "WRN NUE43: no desti in this VL; skipping\n"); + continue; + } + + color = ESCAPEPATHCOLOR + 1; + err = + reset_ccdg_color_array(mgr, &(nue_ctx->ccdg), + nue_ctx->num_destinations, + nue_ctx->max_vl, nue_ctx->max_lmc); + if (err) { + destroy_context(nue_ctx); + return -1; + } + init_ccdg_colors(&(nue_ctx->ccdg)); + + err = + mark_escape_paths(mgr, &(nue_ctx->network), + &(nue_ctx->ccdg), + nue_ctx->destinations[vl], + nue_ctx->num_destinations[vl], + (0 == vl) ? TRUE : FALSE); + if (err) { + destroy_context(nue_ctx); + return -1; + } + if (OSM_LOG_IS_ACTIVE_V2(mgr->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Complete CDG including escape paths for" + " virtual layer %" PRIu8 "\n", vl); + print_ccdg(mgr, &(nue_ctx->ccdg), TRUE); + } + + /* in the debug mode we monitor the correctness more closely */ + CL_ASSERT(deep_cpy_ccdg(mgr, &(nue_ctx->ccdg), &verify_ccdg)); + + process_sw = FALSE; + do { + dlid_iter = (ib_net16_t *) nue_ctx->destinations[vl]; + for (i = 0; i < nue_ctx->num_destinations[vl]; + i++, dlid_iter++) { + dest_port = + osm_get_port_by_lid(mgr->p_subn, + *dlid_iter); + ntype = osm_node_get_type(dest_port->p_node); + if (ntype == IB_NODE_TYPE_CA) { + if (process_sw) + continue; + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Processing Hca with GUID" + " 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid + (dest_port->p_node))); + } else if (ntype == IB_NODE_TYPE_SWITCH) { + if (!process_sw) + continue; + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Processing switch with GUID" + " 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid + (dest_port->p_node))); + } + + /* distribute the LID range across the ports + that can reach those LIDs to have disjoint + paths for one destination port with lmc>0; + for switches with bsp0: min=max; with esp0: + max>min if lmc>0 + */ + osm_port_get_lid_range_ho(dest_port, + &min_lid_ho, + &max_lid_ho); + for (lid = min_lid_ho; lid <= max_lid_ho; lid++) { + /* search a path from all nodes to dlid + without closing a cycle in the ccdg + */ + err = + route_via_modified_dijkstra_on_ccdg + (mgr, &(nue_ctx->network), + &(nue_ctx->ccdg), dest_port, + cl_hton16(lid), color++, + &fallback_to_escape_paths); + if (err) { + destroy_context(nue_ctx); + return -1; + } + /* check intermediate steps for cycles + in the complete cdg + */ + CL_ASSERT(add_paths_to_verify_ccdg + (mgr, &(nue_ctx->network), + get_switch_lid(mgr, + cl_hton16 + (lid)), + &(nue_ctx->ccdg), + &verify_ccdg, + fallback_to_escape_paths)); + CL_ASSERT(is_ccdg_cycle_free + (mgr, &verify_ccdg)); + /* print the updated complete cdg after + the routing for this desti is done + */ + if (OSM_LOG_IS_ACTIVE_V2 + (mgr->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(mgr->p_log, + OSM_LOG_DEBUG, + "Complete CDG after routing destination LID %" + PRIu16 + " for virtual layer %" + PRIu8 "\n", lid, vl); + print_ccdg(mgr, + &(nue_ctx->ccdg), + TRUE); + } + + /* and print the calculated routes */ + if (OSM_LOG_IS_ACTIVE_V2 + (mgr->p_log, OSM_LOG_DEBUG)) { + OSM_LOG(mgr->p_log, + OSM_LOG_DEBUG, + "Calculated paths towards destination LID %" + PRIu16 "\n", lid); + print_routes(mgr, + &(nue_ctx-> + network), + dest_port, + cl_hton16(lid)); + } + + /* update linear forwarding tables of + all switches towards this desti + */ + update_linear_forwarding_tables(mgr, + & + (nue_ctx-> + network), + dest_port, + cl_hton16 + (lid)); + + /* traverse the calculated paths and + update link weights for the next + step to increase the path balancing + */ + update_network_link_weights(mgr, + &(nue_ctx-> + network), + get_switch_lid + (mgr, + cl_hton16 + (lid))); + + /* and finally update the mapping of + 'destination to virtual layer' + */ + update_dlid_to_vl_mapping(nue_ctx-> + dlid_to_vl_mapping, + cl_hton16 + (lid), vl); + } + } + if (!process_sw && include_switches) + process_sw = TRUE; + else + break; + } while (TRUE); + + /* do a final check if ccdg is acyclic after processing all */ + CL_ASSERT(is_ccdg_cycle_free(mgr, &verify_ccdg)); + } + + /* if switches haven't been included in the original destinations set + then it's only because they send no real data traffic and therefore + aren't considered for deadlock-free routing, meaning we have to add + switch<->switch paths separately but can use a more simpler version + of Dijkstra's algo on the network, and don't have to use the + route_via_modified_dijkstra_on_ccdg function on the complete CDG + */ + if (!include_switches) { + for (i = 0, netw_node_iter = nue_ctx->network.nodes; + i < nue_ctx->network.num_nodes; i++, netw_node_iter++) { + dest_port = + osm_get_port_by_lid(mgr->p_subn, + netw_node_iter->lid); + OSM_LOG(mgr->p_log, OSM_LOG_DEBUG, + "Processing switch with GUID 0x%016" PRIx64 + "\n", + cl_ntoh64(osm_node_get_node_guid + (dest_port->p_node))); + + osm_port_get_lid_range_ho(dest_port, &min_lid_ho, + &max_lid_ho); + for (lid = min_lid_ho; lid <= max_lid_ho; lid++) { + /* use our simple multi-graph Dijkstra's algo */ + err = + calculate_spanning_tree_in_network(mgr, + & + (nue_ctx-> + network), + netw_node_iter); + if (err) { + destroy_context(nue_ctx); + return -1; + } + + /* the previous fucntion uses the escape path + variable to store the actual path, so we + have to copy it to the used_link variable + */ + use_escape_paths_to_solve_impass(mgr, + &(nue_ctx-> + network), + dest_port, + cl_hton16 + (lid)); + + /* and now we can proceed with the usual, i.e., + updating link weights and forwarding tables + */ + update_linear_forwarding_tables(mgr, + &(nue_ctx-> + network), + dest_port, + cl_hton16(lid)); + update_network_link_weights(mgr, + &(nue_ctx->network), + cl_hton16(lid)); + + /* and we add them to VL0 */ + update_dlid_to_vl_mapping(nue_ctx-> + dlid_to_vl_mapping, + cl_hton16(lid), 0); + } + } + } + + OSM_LOG_EXIT(mgr->p_log); + return 0; +} + +/* reset is_mc_member and num_of_mcm for future computations */ +static void reset_mgrp_membership(const network_t * network) +{ + network_node_t *netw_node_iter = NULL; + uint16_t i = 0; + + CL_ASSERT(network && network->nodes); + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + CL_ASSERT(netw_node_iter->sw); + + if (netw_node_iter->dropped) + continue; + + netw_node_iter->sw->is_mc_member = 0; + netw_node_iter->sw->num_of_mcm = 0; + } +} + +static inline void mcast_cleanup(const network_t * network, + cl_qlist_t * mcastgrp_port_list) +{ + reset_mgrp_membership(network); + osm_mcast_drop_port_list(mcastgrp_port_list); +} + +/* the function updates the multicast group membership information + similar to create_mgrp_switch_map (see osm_mcast_mgr.c) + => with it we can identify whether a switch needs to be processed or not + in our update_mcft function + */ +static void update_mgrp_membership(cl_qlist_t * port_list) +{ + osm_mcast_work_obj_t *work_obj = NULL; + osm_port_t *osm_port = NULL; + osm_node_t *rem_node = NULL; + cl_list_item_t *item = NULL; + uint8_t rem_port = 0; + + CL_ASSERT(port_list); + for (item = cl_qlist_head(port_list); item != cl_qlist_end(port_list); + item = cl_qlist_next(item)) { + work_obj = cl_item_obj(item, work_obj, list_item); + osm_port = work_obj->p_port; + if (IB_NODE_TYPE_CA == osm_node_get_type(osm_port->p_node)) { + rem_node = + osm_node_get_remote_node(osm_port->p_node, + osm_port->p_physp-> + port_num, &rem_port); + CL_ASSERT(rem_node && rem_node->sw); + rem_node->sw->num_of_mcm++; + } else { + CL_ASSERT(osm_port->p_node->sw); + osm_port->p_node->sw->is_mc_member = 1; + } + } +} + +/* update the multicast forwarding tables of all switches with the information + from the previous mcast routing step for the current mlid + */ +static void update_mcast_forwarding_tables(const osm_ucast_mgr_t * mgr, + const network_t * network, + const uint16_t mlid_ho, + const cl_qmap_t * port_map, + const network_node_t * root_node) +{ + network_node_t *network_node = NULL, *netw_node_iter = NULL; + osm_switch_t *sw = NULL; + osm_mcast_tbl_t *mcast_tbl = NULL; + uint16_t i = 0; + osm_node_t *rem_node = NULL; + uint8_t port = 0, rem_port = 0, upstream_port = 0, downstream_port = 0; + osm_physp_t *osm_physp = NULL; + ib_net64_t guid = 0; + + CL_ASSERT(mgr && network && network->nodes && port_map && root_node); + OSM_LOG_ENTER(mgr->p_log); + + for (i = 0, netw_node_iter = network->nodes; i < network->num_nodes; + i++, netw_node_iter++) { + CL_ASSERT(netw_node_iter->sw); + + network_node = netw_node_iter; + sw = network_node->sw; + + if (network_node->dropped) + continue; + + OSM_LOG(mgr->p_log, OSM_LOG_VERBOSE, + "Processing switch 0x%016" PRIx64 " (%s) for MLID 0x%" + PRIX16 "\n", cl_ntoh64(network_node->guid), + sw->p_node->print_desc, mlid_ho); + + /* if switch does not support mcast or no ports of this switch + are part or the mcast group, then jump to the next switch + */ + if (FALSE == osm_switch_supports_mcast(sw) + || (0 == sw->num_of_mcm && !(sw->is_mc_member))) + continue; + + mcast_tbl = osm_switch_get_mcast_tbl_ptr(network_node->sw); + + /* add all ports of this switch to the mcast table, + if these are part of the mcast group + */ + if (sw->is_mc_member) + osm_mcast_tbl_set(mcast_tbl, mlid_ho, 0); + for (port = 1; port < sw->num_ports; port++) { + /* get the node behind the port */ + rem_node = + osm_node_get_remote_node(sw->p_node, port, + &rem_port); + /* check if connected and it's not the same switch */ + if (!rem_node || sw == rem_node->sw) + continue; + /* make sure the link is healthy */ + osm_physp = osm_node_get_physp_ptr(sw->p_node, port); + if (!osm_physp || !osm_link_is_healthy(osm_physp)) + continue; + /* we do not add upstream ports in this step */ + if (IB_NODE_TYPE_CA != osm_node_get_type(rem_node)) + continue; + + /* add the exit port to the mcast forwarding table */ + guid = + osm_physp_get_port_guid(osm_node_get_physp_ptr + (rem_node, rem_port)); + if (cl_qmap_get(port_map, guid) != + cl_qmap_end(port_map)) + osm_mcast_tbl_set(mcast_tbl, mlid_ho, port); + } + + /* now we have to add the upstream port of 'this' switch and + the downstream port of the next switch to the mcast table + until we reach the root_sw + */ + while (network_node != root_node) { + /* the escape_path variable holds the link that was + used in the spanning tree calculation to reach + this node, so remote_port in link_info is the + local (upstream) port for on network_node->sw + */ + upstream_port = + network_node->escape_path->link_info.remote_port; + osm_mcast_tbl_set(mcast_tbl, mlid_ho, upstream_port); + + /* now we go one step in direction root_sw and add the + downstream port for the spanning tree + */ + downstream_port = + network_node->escape_path->link_info.local_port; + network_node = + get_network_node_by_lid(network, network_node-> + escape_path->link_info. + local_lid); + CL_ASSERT(network_node); + mcast_tbl = + osm_switch_get_mcast_tbl_ptr(network_node->sw); + osm_mcast_tbl_set(mcast_tbl, mlid_ho, downstream_port); + } + } + + OSM_LOG_EXIT(mgr->p_log); +} + +/* Nue configures multicast forwarding tables by utilizing a spanning tree + calculation routed at a subnet switch suggested by OpenSM's internal + osm_mcast_mgr_find_root_switch(...) fn; however, Nue routing currently does + not guarantee deadlock-freedom for the set of multicast routes on all + topologies, nor for the combination of deadlock-free unicast routes with + the additional multicast routes + */ +static ib_api_status_t nue_do_mcast_routing(void *context, + osm_mgrp_box_t * mbox) +{ + nue_context_t *nue_ctx = (nue_context_t *) context; + osm_ucast_mgr_t *mgr = NULL; + cl_qlist_t mcastgrp_port_list; + cl_qmap_t mcastgrp_port_map; + uint16_t num_mcast_ports = 0, i = 0; + osm_switch_t *root_sw = NULL, *osm_sw = NULL; + network_t *network = NULL; + network_node_t *root_node = NULL, *network_node = NULL; + network_node_t *netw_node_iter = NULL; + ib_net64_t guid = 0; + int err = 0; + + if (nue_ctx) + mgr = (osm_ucast_mgr_t *) nue_ctx->mgr; + else + return IB_ERROR; + + CL_ASSERT(mgr && mbox); + OSM_LOG_ENTER(mgr->p_log); + + /* using the ucast cache feature with nue might mean that a leaf sw + got removed (and got back) without calling nue_discover_network and + therefore the stored netw (and pointers to osm's internal switches) + could be outdated (here we have no knowledge if it has happened, so + unfortunately a check is necessary... still better than rebuilding + nue_ctx->network every time we arrive here) + */ + if (mgr->p_subn->opt.use_ucast_cache && mgr->cache_valid) { + network = (network_t *) & (nue_ctx->network); + for (i = 0, netw_node_iter = network->nodes; + i < network->num_nodes; i++, netw_node_iter++) { + CL_ASSERT(netw_node_iter->sw); + + network_node = netw_node_iter; + guid = network_node->guid; + osm_sw = osm_get_switch_by_guid(mgr->p_subn, guid); + if (osm_sw) { + /* check if switch came back from the dead */ + if (network_node->dropped) + network_node->dropped = FALSE; + + /* verify that sw object has not been moved + (this can happen for a leaf switch, if it + was dropped and came back later without a + rerouting), otherwise we have to update + nue's internal switch pointer with the new + sw pointer + */ + if (osm_sw == network_node->sw) + continue; + else + network_node->sw = osm_sw; + } else { + /* if a switch from adj_list is not in the + sw_guid_tbl anymore, then the only reason is + that it was a leaf switch and opensm dropped + it without calling a rerouting + -> calling calculate_spanning_tree_in_network + is no problem, since it is a leaf and + different from root_sw + -> only update_mcast_forwarding_tables and + reset_mgrp_membership need to be aware of + these dropped switches + */ + if (!network_node->dropped) + network_node->dropped = TRUE; + } + } + } + + /* create a map and a list of all ports which are member in the mcast + group (a map to search elements and a list for iteration) + */ + err = + osm_mcast_make_port_list_and_map(&mcastgrp_port_list, + &mcastgrp_port_map, mbox); + if (err) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE36: insufficient memory to make port list for" + " MLID 0x%" PRIX16 "\n", mbox->mlid); + mcast_cleanup(&(nue_ctx->network), &mcastgrp_port_list); + return IB_ERROR; + } + + num_mcast_ports = (uint16_t) cl_qlist_count(&mcastgrp_port_list); + if (num_mcast_ports < 2) { + OSM_LOG(mgr->p_log, OSM_LOG_VERBOSE, + "MLID 0x%" PRIX16 " has %" PRIu16 + " member; nothing to do\n", mbox->mlid, + num_mcast_ports); + mcast_cleanup(&(nue_ctx->network), &mcastgrp_port_list); + return IB_SUCCESS; + } + + /* find the root switch for the spanning tree, which has the smallest + hops count to all LIDs in the mcast group + */ + root_sw = osm_mcast_mgr_find_root_switch(mgr->sm, &mcastgrp_port_list); + if (!root_sw) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE37: unable to locate a suitable root switch for" + " MLID 0x%" PRIX16 "\n", mbox->mlid); + mcast_cleanup(&(nue_ctx->network), &mcastgrp_port_list); + return IB_ERROR; + } + + /* find the root_sw in Nue's internal network node list */ + root_node = + get_network_node_by_lid(&(nue_ctx->network), + osm_node_get_base_lid(root_sw->p_node, 0)); + if (!root_node) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE38: cannot find root_sw with LID %" PRIu16 + " in network node list while processing MLID 0x%" PRIX16 + "\n", + cl_ntoh16(osm_node_get_base_lid(root_sw->p_node, 0)), + mbox->mlid); + mcast_cleanup(&(nue_ctx->network), &mcastgrp_port_list); + return IB_ERROR; + } + + /* calculate_spanning_tree_in_network does a bit more than needed + for the current problem, since we potentially only need a spanning + tree for a subgraph of the fabric, but performing the simple + Dijkstra's algorithm from the root_sw does not take too long; + we can reuse the subnet structure from the ucast routing, and + do not even have to reset the link weights (therefore the mcast + panning tree will use less 'crowded' links in the network); + only issue: calculate_spanning_tree_in_network encodes the tree + in the escape_path variable and not in the used_link of the nodes, + which is important for update_mcast_forwarding_tables function + */ + err = + calculate_spanning_tree_in_network(mgr, &(nue_ctx->network), + root_node); + if (err) { + OSM_LOG(mgr->p_log, OSM_LOG_ERROR, + "ERR NUE39: failed to calculate spanning tree for" + " MLID 0x%" PRIX16 "\n", mbox->mlid); + mcast_cleanup(&(nue_ctx->network), &mcastgrp_port_list); + return IB_ERROR; + } + + /* set mcast group membership again for update_mcft, because for + some reason it has been reset by osm_mcast_mgr_find_root_switch fn + */ + update_mgrp_membership(&mcastgrp_port_list); + + /* update the mcast forwarding tables of the switches in the fabric */ + update_mcast_forwarding_tables(mgr, &(nue_ctx->network), mbox->mlid, + &mcastgrp_port_map, root_node); + + mcast_cleanup(&(nue_ctx->network), &mcastgrp_port_list); + OSM_LOG_EXIT(mgr->p_log); + return IB_SUCCESS; +} + +static uint8_t nue_get_vl_for_path(void *context, + const uint8_t hint_for_default_sl, + const ib_net16_t slid, const ib_net16_t dlid) +{ + nue_context_t *nue_ctx = (nue_context_t *) context; + osm_port_t *dest_port = NULL; + osm_ucast_mgr_t *mgr = NULL; + + if (nue_ctx && nue_ctx->routing_type == OSM_ROUTING_ENGINE_TYPE_NUE) + mgr = (osm_ucast_mgr_t *) nue_ctx->mgr; + else + return hint_for_default_sl; + + /* Assuming Nue was only allowed to use one virtual layer, then the + actual path-to-vl mapping is irrelevant, since all paths can be + assigned to any VL without creating credit loops. Hence, we can just + return the suggested/hinted SL to support various QoS levels. + */ + if (1 == nue_ctx->max_vl) + return hint_for_default_sl; + + dest_port = osm_get_port_by_lid(mgr->p_subn, dlid); + if (!dest_port) + return hint_for_default_sl; + + if (!nue_ctx->dlid_to_vl_mapping) + return hint_for_default_sl; + + return nue_ctx->dlid_to_vl_mapping[cl_ntoh16(dlid)]; +} + +static void destroy_context(nue_context_t * nue_ctx) +{ + uint8_t i = 0; + + destroy_network(&(nue_ctx->network)); + destroy_ccdg(&(nue_ctx->ccdg)); + + for (i = 0; i < IB_MAX_NUM_VLS; i++) { + if (nue_ctx->destinations[i]) { + free(nue_ctx->destinations[i]); + nue_ctx->destinations[i] = NULL; + } + } + + if (nue_ctx->dlid_to_vl_mapping) { + free(nue_ctx->dlid_to_vl_mapping); + nue_ctx->dlid_to_vl_mapping = NULL; + } +} + +static void nue_destroy_context(void *context) +{ + nue_context_t *nue_ctx = (nue_context_t *) context; + if (!nue_ctx) + return; + destroy_context(nue_ctx); + free(context); +} + +int osm_ucast_nue_setup(struct osm_routing_engine *r, osm_opensm_t * osm) +{ + /* create context container and add ucast management object */ + nue_context_t *nue_context = + nue_create_context(osm, OSM_ROUTING_ENGINE_TYPE_NUE); + if (!nue_context) + return 1; /* alloc failed -> skip this routing */ + + /* reset function pointers to nue routines */ + r->context = (void *)nue_context; + r->build_lid_matrices = nue_discover_network; + r->ucast_build_fwd_tables = nue_do_ucast_routing; + r->ucast_dump_tables = NULL; + r->update_sl2vl = NULL; + r->update_vlarb = NULL; + r->path_sl = nue_get_vl_for_path; + r->mcast_build_stree = nue_do_mcast_routing; + r->destroy = nue_destroy_context; + + return 0; +} diff --git a/opensm/osm_ucast_updn.c b/opensm/osm_ucast_updn.c new file mode 100644 index 0000000..d29f12a --- /dev/null +++ b/opensm/osm_ucast_updn.c @@ -0,0 +1,677 @@ +/* + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007,2009 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of Up Down Algorithm using ranking & Min Hop + * Calculation functions + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_UCAST_UPDN_C +#include +#include +#include + +/* //////////////////////////// */ +/* Local types */ +/* //////////////////////////// */ + +/* direction */ +typedef enum updn_switch_dir { + UP = 0, + DOWN +} updn_switch_dir_t; + +/* updn structure */ +typedef struct updn { + unsigned num_roots; + osm_opensm_t *p_osm; +} updn_t; + +struct updn_node { + cl_list_item_t list; + osm_switch_t *sw; + uint64_t id; + updn_switch_dir_t dir; + unsigned rank; + unsigned visited; +}; + +/* This function returns direction based on rank and guid info of current & + remote ports */ +static updn_switch_dir_t updn_get_dir(unsigned cur_rank, unsigned rem_rank, + uint64_t cur_id, uint64_t rem_id) +{ + /* HACK: comes to solve root nodes connection, in a classic subnet root nodes do not connect + directly, but in case they are we assign to root node an UP direction to allow UPDN to discover + the subnet correctly (and not from the point of view of the last root node). + */ + if (!cur_rank && !rem_rank) + return UP; + + if (cur_rank < rem_rank) + return DOWN; + else if (cur_rank > rem_rank) + return UP; + else { + /* Equal rank, decide by id number, bigger == UP direction */ + if (cur_id > rem_id) + return UP; + else + return DOWN; + } +} + +/********************************************************************** + * This function does the bfs of min hop table calculation by guid index + * as a starting point. + **********************************************************************/ +static int updn_bfs_by_node(IN osm_log_t * p_log, IN osm_subn_t * p_subn, + IN osm_switch_t * p_sw) +{ + uint8_t pn, pn_rem; + cl_qlist_t list; + uint16_t lid; + struct updn_node *u; + updn_switch_dir_t next_dir, current_dir; + + OSM_LOG_ENTER(p_log); + + lid = osm_node_get_base_lid(p_sw->p_node, 0); + lid = cl_ntoh16(lid); + osm_switch_set_hops(p_sw, lid, 0, 0); + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Starting from switch - port GUID 0x%" PRIx64 " lid %u\n", + cl_ntoh64(p_sw->p_node->node_info.port_guid), lid); + + u = p_sw->priv; + u->dir = UP; + + /* Update list with the new element */ + cl_qlist_init(&list); + cl_qlist_insert_tail(&list, &u->list); + + /* BFS the list till no next element */ + while (!cl_is_qlist_empty(&list)) { + u = (struct updn_node *)cl_qlist_remove_head(&list); + u->visited = 0; /* cleanup */ + current_dir = u->dir; + /* Go over all ports of the switch and find unvisited remote nodes */ + for (pn = 1; pn < u->sw->num_ports; pn++) { + osm_node_t *p_remote_node; + struct updn_node *rem_u; + uint8_t current_min_hop, remote_min_hop, + set_hop_return_value; + osm_switch_t *p_remote_sw; + + p_remote_node = + osm_node_get_remote_node(u->sw->p_node, pn, + &pn_rem); + /* If no remote node OR remote node is not a SWITCH + continue to next pn */ + if (!p_remote_node || !p_remote_node->sw) + continue; + /* Fetch remote guid only after validation of remote node */ + p_remote_sw = p_remote_node->sw; + rem_u = p_remote_sw->priv; + /* Decide which direction to mark it (UP/DOWN) */ + next_dir = updn_get_dir(u->rank, rem_u->rank, + u->id, rem_u->id); + + /* Check if this is a legal step : the only illegal step is going + from DOWN to UP */ + if ((current_dir == DOWN) && (next_dir == UP)) { + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Avoiding move from 0x%016" PRIx64 + " to 0x%016" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(u->sw->p_node)), + cl_ntoh64(osm_node_get_node_guid(p_remote_node))); + /* Illegal step */ + continue; + } + /* Set MinHop value for the current lid */ + current_min_hop = osm_switch_get_least_hops(u->sw, lid); + /* Check hop count if better insert into list && update + the remote node Min Hop Table */ + remote_min_hop = + osm_switch_get_hop_count(p_remote_sw, lid, pn_rem); + if (current_min_hop + 1 < remote_min_hop) { + set_hop_return_value = + osm_switch_set_hops(p_remote_sw, lid, + pn_rem, + current_min_hop + 1); + if (set_hop_return_value) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AA01: " + "Invalid value returned from set min hop is: %d\n", + set_hop_return_value); + } + /* Check if remote port has already been visited */ + if (!rem_u->visited) { + /* Insert updn_switch item into the list */ + rem_u->dir = next_dir; + rem_u->visited = 1; + cl_qlist_insert_tail(&list, + &rem_u->list); + } + } + } + } + + OSM_LOG_EXIT(p_log); + return 0; +} + +/* NOTE : PLS check if we need to decide that the first */ +/* rank is a SWITCH for BFS purpose */ +static int updn_subn_rank(IN updn_t * p_updn) +{ + osm_switch_t *p_sw; + osm_physp_t *p_physp, *p_remote_physp; + cl_qlist_t list; + cl_map_item_t *item; + struct updn_node *u, *remote_u; + uint8_t num_ports, port_num; + osm_log_t *p_log = &p_updn->p_osm->log; + unsigned max_rank = 0; + + OSM_LOG_ENTER(p_log); + cl_qlist_init(&list); + + /* add all roots to the list */ + for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + u = p_sw->priv; + if (!u->rank) + cl_qlist_insert_tail(&list, &u->list); + } + + /* BFS the list till it's empty */ + while (!cl_is_qlist_empty(&list)) { + u = (struct updn_node *)cl_qlist_remove_head(&list); + /* Go over all remote nodes and rank them (if not already visited) */ + p_sw = u->sw; + num_ports = p_sw->num_ports; + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Handling switch GUID 0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); + for (port_num = 1; port_num < num_ports; port_num++) { + ib_net64_t port_guid; + + /* Current port fetched in order to get remote side */ + p_physp = + osm_node_get_physp_ptr(p_sw->p_node, port_num); + + if (!p_physp) + continue; + + p_remote_physp = p_physp->p_remote_physp; + + /* + make sure that all the following occur on p_remote_physp: + 1. The port isn't NULL + 2. It is a switch + */ + if (p_remote_physp && p_remote_physp->p_node->sw) { + remote_u = p_remote_physp->p_node->sw->priv; + port_guid = p_remote_physp->port_guid; + + if (remote_u->rank > u->rank + 1) { + remote_u->rank = u->rank + 1; + max_rank = remote_u->rank; + cl_qlist_insert_tail(&list, + &remote_u->list); + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Rank of port GUID 0x%" PRIx64 + " = %u\n", cl_ntoh64(port_guid), + remote_u->rank); + } + } + } + } + + /* Print Summary of ranking */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Subnet ranking completed. Max Node Rank = %d\n", max_rank); + OSM_LOG_EXIT(p_log); + return 0; +} + +/* hack: preserve min hops entries to any other root switches */ +static void updn_clear_non_root_hops(updn_t * updn, osm_switch_t * sw) +{ + osm_port_t *port; + unsigned i; + + for (i = 0; i < sw->num_hops; i++) + if (sw->hops[i]) { + port = osm_get_port_by_lid_ho(&updn->p_osm->subn, i); + if (!port || !port->p_node->sw + || ((struct updn_node *)port->p_node->sw->priv)-> + rank != 0) + memset(sw->hops[i], 0xff, sw->num_ports); + } +} + +static int updn_set_min_hop_table(IN updn_t * p_updn) +{ + osm_subn_t *p_subn = &p_updn->p_osm->subn; + osm_log_t *p_log = &p_updn->p_osm->log; + osm_switch_t *p_sw; + cl_map_item_t *item; + + OSM_LOG_ENTER(p_log); + + /* Go over all the switches in the subnet - for each init their Min Hop + Table */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Init Min Hop Table of all switches [\n"); + + for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + /* Clear Min Hop Table */ + if (p_subn->opt.connect_roots) + updn_clear_non_root_hops(p_updn, p_sw); + else + osm_switch_clear_hops(p_sw); + } + + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "Init Min Hop Table of all switches ]\n"); + + /* Now do the BFS for each port in the subnet */ + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "BFS through all port guids in the subnet [\n"); + + for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + updn_bfs_by_node(p_log, p_subn, p_sw); + } + + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "BFS through all port guids in the subnet ]\n"); + /* Cleanup */ + OSM_LOG_EXIT(p_log); + return 0; +} + +static int updn_build_lid_matrices(IN updn_t * p_updn) +{ + int status; + + OSM_LOG_ENTER(&p_updn->p_osm->log); + + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_VERBOSE, + "Ranking all port guids in the list\n"); + if (!p_updn->num_roots) { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR AA0A: " + "No guids were provided or number of guids is 0\n"); + status = -1; + goto _exit; + } + + /* Check if it's not a switched subnet */ + if (cl_is_qmap_empty(&p_updn->p_osm->subn.sw_guid_tbl)) { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR AA0B: " + "This is not a switched subnet, cannot perform UPDN algorithm\n"); + status = -1; + goto _exit; + } + + /* Rank the subnet switches */ + if (updn_subn_rank(p_updn)) { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR AA0E: " + "Failed to assign ranks\n"); + status = -1; + goto _exit; + } + + /* After multiple ranking need to set Min Hop Table by UpDn algorithm */ + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_VERBOSE, + "Setting all switches' Min Hop Table\n"); + status = updn_set_min_hop_table(p_updn); + +_exit: + OSM_LOG_EXIT(&p_updn->p_osm->log); + return status; +} + +static struct updn_node *create_updn_node(osm_switch_t * sw) +{ + struct updn_node *u; + + u = malloc(sizeof(*u)); + if (!u) + return NULL; + memset(u, 0, sizeof(*u)); + u->sw = sw; + u->id = cl_ntoh64(osm_node_get_node_guid(sw->p_node)); + u->rank = 0xffffffff; + return u; +} + +static void delete_updn_node(struct updn_node *u) +{ + u->sw->priv = NULL; + free(u); +} + +/* Find Root nodes automatically by Min Hop Table info */ +static void updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn) +{ + osm_opensm_t *p_osm = p_updn->p_osm; + osm_switch_t *p_sw; + osm_port_t *p_port; + osm_physp_t *p_physp; + cl_map_item_t *item; + double thd1, thd2; + unsigned i, cas_num = 0; + unsigned *cas_per_sw; + uint16_t lid_ho; + + OSM_LOG_ENTER(&p_osm->log); + + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "Current number of ports in the subnet is %d\n", + cl_qmap_count(&p_osm->subn.port_guid_tbl)); + + lid_ho = (uint16_t) cl_ptr_vector_get_size(&p_updn->p_osm->subn.port_lid_tbl) + 1; + cas_per_sw = malloc(lid_ho * sizeof(*cas_per_sw)); + if (!cas_per_sw) { + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR AA14: " + "cannot alloc mem for CAs per switch counter array\n"); + goto _exit; + } + memset(cas_per_sw, 0, lid_ho * sizeof(*cas_per_sw)); + + /* Find the Maximum number of CAs (and routers) for histogram normalization */ + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "Finding the number of CAs and storing them in cl_map\n"); + for (item = cl_qmap_head(&p_updn->p_osm->subn.port_guid_tbl); + item != cl_qmap_end(&p_updn->p_osm->subn.port_guid_tbl); + item = cl_qmap_next(item)) { + p_port = (osm_port_t *)item; + if (!p_port->p_node->sw) { + p_physp = p_port->p_physp->p_remote_physp; + if (!p_physp || !p_physp->p_node->sw) + continue; + lid_ho = osm_node_get_base_lid(p_physp->p_node, 0); + lid_ho = cl_ntoh16(lid_ho); + cas_per_sw[lid_ho]++; + cas_num++; + } + } + + thd1 = cas_num * 0.9; + thd2 = cas_num * 0.05; + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "Found %u CAs and RTRs, %u SWs in the subnet. " + "Thresholds are thd1 = %f && thd2 = %f\n", + cas_num, cl_qmap_count(&p_osm->subn.sw_guid_tbl), thd1, thd2); + + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "Passing through all switches to collect Min Hop info\n"); + for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + unsigned hop_hist[IB_SUBNET_PATH_HOPS_MAX]; + uint16_t max_lid_ho; + uint8_t hop_val; + uint16_t numHopBarsOverThd1 = 0; + uint16_t numHopBarsOverThd2 = 0; + + p_sw = (osm_switch_t *) item; + + memset(hop_hist, 0, sizeof(hop_hist)); + + max_lid_ho = p_sw->max_lid_ho; + for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++) + if (cas_per_sw[lid_ho]) { + hop_val = + osm_switch_get_least_hops(p_sw, lid_ho); + if (hop_val >= IB_SUBNET_PATH_HOPS_MAX) + continue; + + hop_hist[hop_val] += cas_per_sw[lid_ho]; + } + + /* Now recognize the spines by requiring one bar to be + above 90% of the number of CAs and RTRs */ + for (i = 0; i < IB_SUBNET_PATH_HOPS_MAX; i++) { + if (hop_hist[i] > thd1) + numHopBarsOverThd1++; + if (hop_hist[i] > thd2) + numHopBarsOverThd2++; + } + + /* If thd conditions are valid - rank the root node */ + if (numHopBarsOverThd1 == 1 && numHopBarsOverThd2 == 1) { + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "Ranking GUID 0x%" PRIx64 " as root node\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); + ((struct updn_node *)p_sw->priv)->rank = 0; + p_updn->num_roots++; + } + } + + free(cas_per_sw); +_exit: + OSM_LOG_EXIT(&p_osm->log); + return; +} + +static void dump_roots(cl_map_item_t *item, FILE *file, void *cxt) +{ + osm_switch_t *sw = (osm_switch_t *)item; + if (!((struct updn_node *)sw->priv)->rank) + fprintf(file, "0x%" PRIx64 "\n", + cl_ntoh64(osm_node_get_node_guid(sw->p_node))); +} + +static int update_id(void *cxt, uint64_t guid, char *p) +{ + osm_opensm_t *osm = cxt; + osm_switch_t *sw; + uint64_t id; + char *e; + + sw = osm_get_switch_by_guid(&osm->subn, cl_hton64(guid)); + if (!sw) { + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, + "switch with guid 0x%" PRIx64 " is not found\n", guid); + return 0; + } + + id = strtoull(p, &e, 0); + if (*e && !isspace(*e)) { + OSM_LOG(&osm->log, OSM_LOG_ERROR, + "ERR AA05: cannot parse node id \'%s\'", p); + return -1; + } + + OSM_LOG(&osm->log, OSM_LOG_DEBUG, + "update node 0x%" PRIx64 " id to 0x%" PRIx64 "\n", guid, id); + + ((struct updn_node *)sw->priv)->id = id; + + return 0; +} + +static int rank_root_node(void *cxt, uint64_t guid, char *p) +{ + updn_t *updn = cxt; + osm_switch_t *sw; + + sw = osm_get_switch_by_guid(&updn->p_osm->subn, cl_hton64(guid)); + if (!sw) { + OSM_LOG(&updn->p_osm->log, OSM_LOG_VERBOSE, + "switch with guid 0x%" PRIx64 " is not found\n", guid); + return 0; + } + + OSM_LOG(&updn->p_osm->log, OSM_LOG_DEBUG, + "Ranking root port GUID 0x%" PRIx64 "\n", guid); + + ((struct updn_node *)sw->priv)->rank = 0; + updn->num_roots++; + + return 0; +} + +/* UPDN callback function */ +static int updn_lid_matrices(void *ctx) +{ + updn_t *p_updn = ctx; + cl_map_item_t *item; + osm_switch_t *p_sw; + int ret = 0; + + OSM_LOG_ENTER(&p_updn->p_osm->log); + + for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *)item; + p_sw->priv = create_updn_node(p_sw); + if (!p_sw->priv) { + OSM_LOG(&(p_updn->p_osm->log), OSM_LOG_ERROR, "ERR AA0C: " + "cannot create updn node\n"); + OSM_LOG_EXIT(&p_updn->p_osm->log); + return -1; + } + } + + /* First setup root nodes */ + p_updn->num_roots = 0; + + if (p_updn->p_osm->subn.opt.root_guid_file) { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_DEBUG, + "UPDN - Fetching root nodes from file \'%s\'\n", + p_updn->p_osm->subn.opt.root_guid_file); + + ret = parse_node_map(p_updn->p_osm->subn.opt.root_guid_file, + rank_root_node, p_updn); + if (ret) { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR AA02: " + "cannot parse root guids file \'%s\'\n", + p_updn->p_osm->subn.opt.root_guid_file); + osm_ucast_mgr_build_lid_matrices(&p_updn->p_osm->sm.ucast_mgr); + updn_find_root_nodes_by_min_hop(p_updn); + } else if (p_updn->p_osm->subn.opt.connect_roots && + p_updn->num_roots > 1) + osm_ucast_mgr_build_lid_matrices(&p_updn->p_osm->sm.ucast_mgr); + } else { + osm_ucast_mgr_build_lid_matrices(&p_updn->p_osm->sm.ucast_mgr); + updn_find_root_nodes_by_min_hop(p_updn); + } + + if (p_updn->p_osm->subn.opt.ids_guid_file) { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_DEBUG, + "UPDN - update node ids from file \'%s\'\n", + p_updn->p_osm->subn.opt.ids_guid_file); + + ret = parse_node_map(p_updn->p_osm->subn.opt.ids_guid_file, + update_id, p_updn->p_osm); + if (ret) + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR AA03: " + "cannot parse node ids file \'%s\'\n", + p_updn->p_osm->subn.opt.ids_guid_file); + } + + /* Only if there are assigned root nodes do the algorithm, otherwise perform do nothing */ + if (p_updn->num_roots) { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_DEBUG, + "activating UPDN algorithm\n"); + ret = updn_build_lid_matrices(p_updn); + } else { + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_INFO, + "disabling UPDN algorithm, no root nodes were found\n"); + ret = -1; + } + + if (OSM_LOG_IS_ACTIVE_V2(&p_updn->p_osm->log, OSM_LOG_ROUTING)) + osm_dump_qmap_to_file(p_updn->p_osm, "opensm-updn-roots.dump", + &p_updn->p_osm->subn.sw_guid_tbl, + dump_roots, NULL); + + for (item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl); + item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl); + item = cl_qmap_next(item)) { + p_sw = (osm_switch_t *) item; + delete_updn_node(p_sw->priv); + } + + OSM_LOG_EXIT(&p_updn->p_osm->log); + return ret; +} + +static void updn_delete(void *context) +{ + free(context); +} + +int osm_ucast_updn_setup(struct osm_routing_engine *r, osm_opensm_t *osm) +{ + updn_t *updn; + + updn = malloc(sizeof(updn_t)); + if (!updn) + return -1; + memset(updn, 0, sizeof(updn_t)); + + updn->p_osm = osm; + + r->context = updn; + r->destroy = updn_delete; + r->build_lid_matrices = updn_lid_matrices; + + return 0; +} diff --git a/opensm/osm_vl15intf.c b/opensm/osm_vl15intf.c new file mode 100644 index 0000000..06a5001 --- /dev/null +++ b/opensm/osm_vl15intf.c @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_vl15_t. + * This object represents the VL15 Interface object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define FILE_ID OSM_FILE_VL15INTF_C +#include +#include +#include +#include +#include + +static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw) +{ + ib_api_status_t status; + boolean_t resp_expected = p_madw->resp_expected; + ib_smp_t * p_smp; + ib_net16_t attr_id; + uint8_t method; + + p_smp = osm_madw_get_smp_ptr(p_madw); + method = p_smp->method; + attr_id = p_smp->attr_id; + + /* + Non-response-expected mads are not throttled on the wire + since we can have no confirmation that they arrived + at their destination. + */ + if (resp_expected) + /* + Note that other threads may not see the response MAD + arrive before send() even returns. + In that case, the wire count would temporarily go negative. + To avoid this confusion, preincrement the counts on the + assumption that send() will succeed. + */ + cl_atomic_inc(&p_vl->p_stats->qp0_mads_outstanding_on_wire); + else + cl_atomic_inc(&p_vl->p_stats->qp0_unicasts_sent); + + cl_atomic_inc(&p_vl->p_stats->qp0_mads_sent); + + status = osm_vendor_send(osm_madw_get_bind_handle(p_madw), + p_madw, p_madw->resp_expected); + + if (status == IB_SUCCESS) { + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "%u QP0 MADs on wire, %u outstanding, " + "%u unicasts sent, %u total sent\n", + p_vl->p_stats->qp0_mads_outstanding_on_wire, + p_vl->p_stats->qp0_mads_outstanding, + p_vl->p_stats->qp0_unicasts_sent, + p_vl->p_stats->qp0_mads_sent); + return; + } + + OSM_LOG(p_vl->p_log, OSM_LOG_ERROR, "ERR 3E03: " + "MAD send failed (%s)\n", ib_get_err_str(status)); + + /* + The MAD was never successfully sent, so + fix up the pre-incremented count values. + */ + + /* Decrement qp0_mads_sent that were incremented in the code above. + qp0_mads_outstanding will be decremented by send error callback + (called by osm_vendor_send() */ + cl_atomic_dec(&p_vl->p_stats->qp0_mads_sent); + if (!resp_expected) { + cl_atomic_dec(&p_vl->p_stats->qp0_unicasts_sent); + return; + } + + /* need to cause heavy-sweep if resp_expected MAD sending failed */ + OSM_LOG(p_vl->p_log, OSM_LOG_ERROR, "ERR 3E04: " + "%s method failed for attribute 0x%X (%s)\n", + method == IB_MAD_METHOD_SET ? "SET" : "GET", + cl_ntoh16(attr_id), ib_get_sm_attr_str(attr_id)); + + p_vl->p_subn->subnet_initialization_error = TRUE; + +} + +static void vl15_poller(IN void *p_ptr) +{ + ib_api_status_t status; + osm_madw_t *p_madw; + osm_vl15_t *p_vl = p_ptr; + cl_qlist_t *p_fifo; + int32_t max_smps = p_vl->max_wire_smps; + int32_t max_smps2 = p_vl->max_wire_smps2; + + OSM_LOG_ENTER(p_vl->p_log); + + if (p_vl->thread_state == OSM_THREAD_STATE_NONE) + p_vl->thread_state = OSM_THREAD_STATE_RUN; + + while (p_vl->thread_state == OSM_THREAD_STATE_RUN) { + /* + Start servicing the FIFOs by pulling off MAD wrappers + and passing them to the transport interface. + There are lots of corner cases here so tread carefully. + + The unicast FIFO has priority, since somebody is waiting + for a timely response. + */ + cl_spinlock_acquire(&p_vl->lock); + + if (cl_qlist_count(&p_vl->ufifo) != 0) + p_fifo = &p_vl->ufifo; + else + p_fifo = &p_vl->rfifo; + + p_madw = (osm_madw_t *) cl_qlist_remove_head(p_fifo); + + cl_spinlock_release(&p_vl->lock); + + if (p_madw != (osm_madw_t *) cl_qlist_end(p_fifo)) { + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "Servicing p_madw = %p\n", p_madw); + if (OSM_LOG_IS_ACTIVE_V2(p_vl->p_log, OSM_LOG_FRAMES)) + osm_dump_dr_smp_v2(p_vl->p_log, + osm_madw_get_smp_ptr(p_madw), + FILE_ID, OSM_LOG_FRAMES); + + vl15_send_mad(p_vl, p_madw); + } else + /* + The VL15 FIFO is empty, so we have nothing left to do. + */ + status = cl_event_wait_on(&p_vl->signal, + EVENT_NO_TIMEOUT, TRUE); + + while (p_vl->p_stats->qp0_mads_outstanding_on_wire >= max_smps && + p_vl->thread_state == OSM_THREAD_STATE_RUN) { + status = cl_event_wait_on(&p_vl->signal, + p_vl->max_smps_timeout, + TRUE); + if (status == CL_TIMEOUT) { + if (max_smps < max_smps2) + max_smps++; + break; + } else if (status != CL_SUCCESS) { + OSM_LOG(p_vl->p_log, OSM_LOG_ERROR, "ERR 3E02: " + "Event wait failed (%s)\n", + CL_STATUS_MSG(status)); + break; + } + max_smps = p_vl->max_wire_smps; + } + } + + /* + since we abort immediately when the state != OSM_THREAD_STATE_RUN + we might have some mads on the queues. After the thread exits + the vl15 destroy routine should put these mads back... + */ + + OSM_LOG_EXIT(p_vl->p_log); +} + +void osm_vl15_construct(IN osm_vl15_t * p_vl) +{ + memset(p_vl, 0, sizeof(*p_vl)); + p_vl->state = OSM_VL15_STATE_INIT; + p_vl->thread_state = OSM_THREAD_STATE_NONE; + cl_event_construct(&p_vl->signal); + cl_spinlock_construct(&p_vl->lock); + cl_qlist_init(&p_vl->rfifo); + cl_qlist_init(&p_vl->ufifo); + cl_thread_construct(&p_vl->poller); +} + +void osm_vl15_destroy(IN osm_vl15_t * p_vl, IN struct osm_mad_pool *p_pool) +{ + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_vl->p_log); + + /* + Signal our threads that we're leaving. + */ + p_vl->thread_state = OSM_THREAD_STATE_EXIT; + + /* + Don't trigger unless event has been initialized. + Destroy the thread before we tear down the other objects. + */ + if (p_vl->state != OSM_VL15_STATE_INIT) + cl_event_signal(&p_vl->signal); + + cl_thread_destroy(&p_vl->poller); + + /* + Return the outstanding messages to the pool + */ + + cl_spinlock_acquire(&p_vl->lock); + + while (!cl_is_qlist_empty(&p_vl->rfifo)) { + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->rfifo); + osm_mad_pool_put(p_pool, p_madw); + } + while (!cl_is_qlist_empty(&p_vl->ufifo)) { + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->ufifo); + osm_mad_pool_put(p_pool, p_madw); + } + + cl_spinlock_release(&p_vl->lock); + + cl_event_destroy(&p_vl->signal); + p_vl->state = OSM_VL15_STATE_INIT; + cl_spinlock_destroy(&p_vl->lock); + + OSM_LOG_EXIT(p_vl->p_log); +} + +ib_api_status_t osm_vl15_init(IN osm_vl15_t * p_vl, IN osm_vendor_t * p_vend, + IN osm_log_t * p_log, IN osm_stats_t * p_stats, + IN osm_subn_t * p_subn, + IN int32_t max_wire_smps, + IN int32_t max_wire_smps2, + IN uint32_t max_smps_timeout) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + p_vl->p_vend = p_vend; + p_vl->p_log = p_log; + p_vl->p_stats = p_stats; + p_vl->p_subn = p_subn; + p_vl->max_wire_smps = max_wire_smps; + p_vl->max_wire_smps2 = max_wire_smps2; + p_vl->max_smps_timeout = max_wire_smps < max_wire_smps2 ? + max_smps_timeout : EVENT_NO_TIMEOUT; + + status = cl_event_init(&p_vl->signal, FALSE); + if (status != IB_SUCCESS) + goto Exit; + + p_vl->state = OSM_VL15_STATE_READY; + + status = cl_spinlock_init(&p_vl->lock); + if (status != IB_SUCCESS) + goto Exit; + + /* + Initialize the thread after all other dependent objects + have been initialized. + */ + status = cl_thread_init(&p_vl->poller, vl15_poller, p_vl, + "opensm poller"); +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +void osm_vl15_poll(IN osm_vl15_t * p_vl) +{ + OSM_LOG_ENTER(p_vl->p_log); + + CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY); + + /* + If we have room for more VL15 MADs on the wire, + then signal the poller thread. + + This is not an airtight check, since the poller thread + could be just about to send another MAD as we signal + the event here. To cover this rare case, the poller + thread checks for a spurious wake-up. + */ + if (p_vl->p_stats->qp0_mads_outstanding_on_wire < + (int32_t) p_vl->max_wire_smps) { + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "Signalling poller thread\n"); + cl_event_signal(&p_vl->signal); + } + + OSM_LOG_EXIT(p_vl->p_log); +} + +void osm_vl15_post(IN osm_vl15_t * p_vl, IN osm_madw_t * p_madw) +{ + OSM_LOG_ENTER(p_vl->p_log); + + CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY); + + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, "Posting p_madw = %p\n", p_madw); + + /* + Determine in which fifo to place the pending madw. + */ + cl_spinlock_acquire(&p_vl->lock); + if (p_madw->resp_expected == TRUE) { + cl_qlist_insert_tail(&p_vl->rfifo, &p_madw->list_item); + osm_stats_inc_qp0_outstanding(p_vl->p_stats); + } else + cl_qlist_insert_tail(&p_vl->ufifo, &p_madw->list_item); + cl_spinlock_release(&p_vl->lock); + + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "%u QP0 MADs on wire, %u QP0 MADs outstanding\n", + p_vl->p_stats->qp0_mads_outstanding_on_wire, + p_vl->p_stats->qp0_mads_outstanding); + + osm_vl15_poll(p_vl); + + OSM_LOG_EXIT(p_vl->p_log); +} + +void osm_vl15_shutdown(IN osm_vl15_t * p_vl, IN osm_mad_pool_t * p_mad_pool) +{ + osm_madw_t *p_madw; + + OSM_LOG_ENTER(p_vl->p_log); + + /* we only should get here after the VL15 interface was initialized */ + CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY); + + /* grab a lock on the object */ + cl_spinlock_acquire(&p_vl->lock); + + /* go over all outstanding MADs and retire their transactions */ + + /* first we handle the list of response MADs */ + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->ufifo); + while (p_madw != (osm_madw_t *) cl_qlist_end(&p_vl->ufifo)) { + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "Releasing Response p_madw = %p\n", p_madw); + + osm_mad_pool_put(p_mad_pool, p_madw); + + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->ufifo); + } + + /* Request MADs we send out */ + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->rfifo); + while (p_madw != (osm_madw_t *) cl_qlist_end(&p_vl->rfifo)) { + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "Releasing Request p_madw = %p\n", p_madw); + + osm_mad_pool_put(p_mad_pool, p_madw); + osm_stats_dec_qp0_outstanding(p_vl->p_stats); + + p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->rfifo); + } + + /* free the lock */ + cl_spinlock_release(&p_vl->lock); + + OSM_LOG_EXIT(p_vl->p_log); +} diff --git a/opensm/osm_vl_arb_rcv.c b/opensm/osm_vl_arb_rcv.c new file mode 100644 index 0000000..fe6b8c7 --- /dev/null +++ b/opensm/osm_vl_arb_rcv.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2010 HNR Consulting. All rights reserved. + * Copyright (c) 2010 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of osm_vla_rcv_t. + * This object represents the Vl Arbitration Receiver object. + * This object is part of the opensm family of objects. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#define FILE_ID OSM_FILE_VL_ARB_RCV_C +#include +#include +#include +#include +#include +#include + +/* + * WE ONLY RECEIVE GET or SET responses + */ +void osm_vla_rcv_process(IN void *context, IN void *data) +{ + osm_sm_t *sm = context; + osm_madw_t *p_madw = data; + ib_vl_arb_table_t *p_vla_tbl; + ib_smp_t *p_smp; + osm_port_t *p_port; + osm_physp_t *p_physp; + osm_node_t *p_node; + osm_vla_context_t *p_context; + ib_net64_t port_guid; + ib_net64_t node_guid; + uint8_t port_num, block_num; + + CL_ASSERT(sm); + + OSM_LOG_ENTER(sm->p_log); + + CL_ASSERT(p_madw); + + p_smp = osm_madw_get_smp_ptr(p_madw); + + p_context = osm_madw_get_vla_context_ptr(p_madw); + p_vla_tbl = ib_smp_get_payload_ptr(p_smp); + + port_guid = p_context->port_guid; + node_guid = p_context->node_guid; + + CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_VL_ARBITRATION); + + if (ib_smp_get_status(p_smp)) { + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "MAD status 0x%x received\n", + cl_ntoh16(ib_smp_get_status(p_smp))); + goto Exit2; + } + + cl_plock_excl_acquire(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); + if (!p_port) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3F06: " + "No port object for port with GUID 0x%" PRIx64 + "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 + ", TID 0x%" PRIx64 "\n", cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + goto Exit; + } + + p_node = p_port->p_node; + CL_ASSERT(p_node); + + block_num = (uint8_t) (cl_ntoh32(p_smp->attr_mod) >> 16); + /* in case of a non switch node the attr modifier should be ignored */ + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { + port_num = (uint8_t) (cl_ntoh32(p_smp->attr_mod) & 0x000000FF); + p_physp = osm_node_get_physp_ptr(p_node, port_num); + } else { + p_physp = p_port->p_physp; + port_num = p_physp->port_num; + } + + /* + We do not care if this is a result of a set or get - + all we want is to update the subnet. + */ + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Got GetResp(VLArb) block:%u port_num %u with GUID 0x%" + PRIx64 " for parent node GUID 0x%" PRIx64 ", TID 0x%" + PRIx64 "\n", block_num, port_num, cl_ntoh64(port_guid), + cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); + + /* + Determine if we encountered a new Physical Port. + If so, Ignore it. + */ + if (!p_physp) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "Got invalid port number %u\n", port_num); + goto Exit; + } + + if ((block_num < 1) || (block_num > 4)) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "Got invalid block number 0x%X\n", block_num); + goto Exit; + } + + osm_dump_vl_arb_table_v2(sm->p_log, port_guid, block_num, port_num, + p_vla_tbl, FILE_ID, OSM_LOG_DEBUG); + osm_physp_set_vla_tbl(p_physp, p_vla_tbl, block_num); + +Exit: + cl_plock_release(sm->p_lock); + +Exit2: + OSM_LOG_EXIT(sm->p_log); +} diff --git a/opensm/st.c b/opensm/st.c new file mode 100644 index 0000000..c2ee014 --- /dev/null +++ b/opensm/st.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#define FILE_ID OSM_FILE_ST_C +#include + +typedef struct st_table_entry st_table_entry; + +struct st_table_entry { + unsigned int hash; + st_data_t key; + st_data_t record; + st_table_entry *next; +}; + +#define ST_DEFAULT_MAX_DENSITY 5 +#define ST_DEFAULT_INIT_TABLE_SIZE 11 + +/* + * DEFAULT_MAX_DENSITY is the default for the largest we allow the + * average number of items per bin before increasing the number of + * bins + * + * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins + * allocated initially + * + */ +static int numcmp(void *, void *); +static st_ptr_t numhash(void *); +static struct st_hash_type type_numhash = { + numcmp, + numhash, +}; + +/* extern int strcmp(const char *, const char *); */ +static int strhash(const char *); + +static inline st_ptr_t st_strhash(void *key) +{ + return strhash((const char *)key); +} + +static inline int st_strcmp(void *key1, void *key2) +{ + return strcmp((const char *)key1, (const char *)key2); +} + +static struct st_hash_type type_strhash = { + st_strcmp, + st_strhash +}; + +#define xmalloc malloc +#define xcalloc calloc +#define xrealloc realloc +#define xfree free + +static void rehash(st_table *); + +#define alloc(type) (type*)xmalloc(sizeof(type)) +#define Calloc(n,s) (char*)xcalloc((n), (s)) + +#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)(((void*)x),((void *)y)) == 0) + +#define do_hash(key,table) (unsigned int)(*(table)->type->hash)(((void*)key)) +#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) + +/* + * MINSIZE is the minimum size of a dictionary. + */ + +#define MINSIZE 8 + +/* + Table of prime numbers 2^n+a, 2<=n<=30. +*/ +static long primes[] = { + 8 + 3, + 16 + 3, + 32 + 5, + 64 + 3, + 128 + 3, + 256 + 27, + 512 + 9, + 1024 + 9, + 2048 + 5, + 4096 + 3, + 8192 + 27, + 16384 + 43, + 32768 + 3, + 65536 + 45, + 131072 + 29, + 262144 + 3, + 524288 + 21, + 1048576 + 7, + 2097152 + 17, + 4194304 + 15, + 8388608 + 9, + 16777216 + 43, + 33554432 + 35, + 67108864 + 15, + 134217728 + 29, + 268435456 + 3, + 536870912 + 11, + 1073741824 + 85, + 0 +}; + +static int new_size(int size) +{ + int i; + +#if 0 + for (i = 3; i < 31; i++) { + if ((1 << i) > size) + return 1 << i; + } + return -1; +#else + int newsize; + + for (i = 0, newsize = MINSIZE; + i < sizeof(primes) / sizeof(primes[0]); i++, newsize <<= 1) { + if (newsize > size) + return primes[i]; + } + /* Ran out of polynomials */ + return 0; /* should raise exception */ +#endif +} + +#ifdef HASH_LOG +static int collision = 0; +static int init_st = 0; + +static void stat_col() +{ + FILE *f = fopen("/var/log/osm_st_col", "w"); + fprintf(f, "collision: %d\n", collision); + fclose(f); +} +#endif + +st_table *st_init_table_with_size(type, size) +struct st_hash_type *type; +size_t size; +{ + st_table *tbl; + +#ifdef HASH_LOG + if (init_st == 0) { + init_st = 1; + atexit(stat_col); + } +#endif + + size = new_size(size); /* round up to prime number */ + if (!size) + return NULL; + + tbl = alloc(st_table); + tbl->type = type; + tbl->num_entries = 0; + tbl->num_bins = size; + tbl->bins = (st_table_entry **) Calloc(size, sizeof(st_table_entry *)); + + return tbl; +} + +st_table *st_init_table(type) +struct st_hash_type *type; +{ + return st_init_table_with_size(type, 0); +} + +st_table *st_init_numtable(void) +{ + return st_init_table(&type_numhash); +} + +st_table *st_init_numtable_with_size(size) +size_t size; +{ + return st_init_table_with_size(&type_numhash, size); +} + +st_table *st_init_strtable(void) +{ + return st_init_table(&type_strhash); +} + +st_table *st_init_strtable_with_size(size) +size_t size; +{ + return st_init_table_with_size(&type_strhash, size); +} + +void st_free_table(table) +st_table *table; +{ + register st_table_entry *ptr, *next; + int i; + + for (i = 0; i < table->num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + free(ptr); + ptr = next; + } + } + free(table->bins); + free(table); +} + +#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) + +#ifdef HASH_LOG +#define COLLISION collision++ +#else +#define COLLISION +#endif + +#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ + bin_pos = hash_val%(table)->num_bins;\ + ptr = (table)->bins[bin_pos];\ + if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) \ + {\ + COLLISION;\ + while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ + ptr = ptr->next;\ + }\ + ptr = ptr->next;\ + }\ +} while (0) + +int st_lookup(table, key, value) +st_table *table; +register st_data_t key; +st_data_t *value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } else { + if (value != 0) + *value = ptr->record; + return 1; + } +} + +#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ +do {\ + st_table_entry *entry;\ + if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) \ + {\ + rehash(table);\ + bin_pos = hash_val % table->num_bins;\ + }\ + \ + entry = alloc(st_table_entry);\ + \ + entry->hash = hash_val;\ + entry->key = key;\ + entry->record = value;\ + entry->next = table->bins[bin_pos];\ + table->bins[bin_pos] = entry;\ + table->num_entries++;\ +} while (0); + +int st_insert(table, key, value) +register st_table *table; +register st_data_t key; +st_data_t value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + ADD_DIRECT(table, key, value, hash_val, bin_pos); + return 0; + } else { + ptr->record = value; + return 1; + } +} + +void st_add_direct(table, key, value) +st_table *table; +st_data_t key; +st_data_t value; +{ + unsigned int hash_val, bin_pos; + + hash_val = do_hash(key, table); + bin_pos = hash_val % table->num_bins; + ADD_DIRECT(table, key, value, hash_val, bin_pos); +} + +static void rehash(table) +register st_table *table; +{ + register st_table_entry *ptr, *next, **new_bins; + int i, old_num_bins = table->num_bins, new_num_bins; + unsigned int hash_val; + + new_num_bins = new_size(old_num_bins + 1); + if (!new_num_bins) + return; + + new_bins = + (st_table_entry **) Calloc(new_num_bins, sizeof(st_table_entry *)); + + for (i = 0; i < old_num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + hash_val = ptr->hash % new_num_bins; + ptr->next = new_bins[hash_val]; + new_bins[hash_val] = ptr; + ptr = next; + } + } + free(table->bins); + table->num_bins = new_num_bins; + table->bins = new_bins; +} + +st_table *st_copy(old_table) +st_table *old_table; +{ + st_table *new_table; + st_table_entry *ptr, *entry; + size_t i, num_bins = old_table->num_bins; + + new_table = alloc(st_table); + if (new_table == 0) { + return 0; + } + + *new_table = *old_table; + new_table->bins = (st_table_entry **) + Calloc(num_bins, sizeof(st_table_entry *)); + + if (new_table->bins == 0) { + free(new_table); + return 0; + } + + for (i = 0; i < num_bins; i++) { + new_table->bins[i] = 0; + ptr = old_table->bins[i]; + while (ptr != 0) { + entry = alloc(st_table_entry); + if (entry == 0) { + free(new_table->bins); + free(new_table); + return 0; + } + *entry = *ptr; + entry->next = new_table->bins[i]; + new_table->bins[i] = entry; + ptr = ptr->next; + } + } + return new_table; +} + +int st_delete(table, key, value) +register st_table *table; +register st_data_t *key; +st_data_t *value; +{ + unsigned int hash_val; + st_table_entry *tmp; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) + *value = 0; + return 0; + } + + if (EQUAL(table, *key, ptr->key)) { + table->bins[hash_val] = ptr->next; + table->num_entries--; + if (value != 0) + *value = ptr->record; + *key = ptr->key; + free(ptr); + return 1; + } + + for (; ptr->next != 0; ptr = ptr->next) { + if (EQUAL(table, ptr->next->key, *key)) { + tmp = ptr->next; + ptr->next = ptr->next->next; + table->num_entries--; + if (value != 0) + *value = tmp->record; + *key = tmp->key; + free(tmp); + return 1; + } + } + + return 0; +} + +int st_delete_safe(table, key, value, never) +register st_table *table; +register st_data_t *key; +st_data_t *value; +st_data_t never; +{ + unsigned int hash_val; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) + *value = 0; + return 0; + } + + for (; ptr != 0; ptr = ptr->next) { + if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + table->num_entries--; + *key = ptr->key; + if (value != 0) + *value = ptr->record; + ptr->key = ptr->record = never; + return 1; + } + } + + return 0; +} + +static int delete_never(st_data_t key, st_data_t value, st_data_t never) +{ + if (value == never) + return ST_DELETE; + return ST_CONTINUE; +} + +void st_cleanup_safe(table, never) +st_table *table; +st_data_t never; +{ + int num_entries = table->num_entries; + + st_foreach(table, delete_never, never); + table->num_entries = num_entries; +} + +void st_foreach(table, func, arg) +st_table *table; +int (*func) (st_data_t key, st_data_t val, st_data_t arg); +st_data_t arg; +{ + st_table_entry *ptr, *last, *tmp; + enum st_retval retval; + int i; + + for (i = 0; i < table->num_bins; i++) { + last = 0; + for (ptr = table->bins[i]; ptr != 0;) { + retval = (*func) (ptr->key, ptr->record, arg); + switch (retval) { + case ST_CONTINUE: + last = ptr; + ptr = ptr->next; + break; + case ST_STOP: + return; + case ST_DELETE: + tmp = ptr; + if (last == 0) { + table->bins[i] = ptr->next; + } else { + last->next = ptr->next; + } + ptr = ptr->next; + free(tmp); + table->num_entries--; + } + } + } +} + +static int strhash(string) +register const char *string; +{ + register int c; + +#ifdef HASH_ELFHASH + register unsigned int h = 0, g; + + while ((c = *string++) != '\0') { + h = (h << 4) + c; + if (g = h & 0xF0000000) + h ^= g >> 24; + h &= ~g; + } + return h; +#elif HASH_PERL + register int val = 0; + + while ((c = *string++) != '\0') { + val = val * 33 + c; + } + + return val + (val >> 5); +#else + register int val = 0; + + while ((c = *string++) != '\0') { + val = val * 997 + c; + } + + return val + (val >> 5); +#endif +} + +static int numcmp(x, y) +void *x, *y; +{ + return (st_ptr_t) x != (st_ptr_t) y; +} + +static st_ptr_t numhash(n) +void *n; +{ + return (st_ptr_t) n; +} diff --git a/osmeventplugin/Makefile.am b/osmeventplugin/Makefile.am new file mode 100644 index 0000000..a314c9c --- /dev/null +++ b/osmeventplugin/Makefile.am @@ -0,0 +1,34 @@ + +AM_CPPFLAGS = -I$(srcdir)/../include \ + -I$(includedir)/infiniband + +lib_LTLIBRARIES = libosmeventplugin.la + +if DEBUG +DBGFLAGS = -ggdb -D_DEBUG_ +else +DBGFLAGS = -g +endif + +libosmeventplugin_la_CFLAGS = -Wall -Wwrite-strings $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_GNU_SOURCE=1 + +if HAVE_LD_VERSION_SCRIPT + libosmeventplugin_version_script = -Wl,--version-script=$(srcdir)/libosmeventplugin.map +else + libosmeventplugin_version_script = +endif + +osmeventplugin_api_version=$(shell grep LIBVERSION= $(srcdir)/libosmeventplugin.ver | sed 's/LIBVERSION=//') + +libosmeventplugin_la_SOURCES = src/osmeventplugin.c +libosmeventplugin_la_LDFLAGS = -version-info $(osmeventplugin_api_version) \ + -export-dynamic $(libosmeventplugin_version_script) +libosmeventplugin_la_LIBADD = -L../complib -losmcomp -L../libopensm -lopensm $(OSMV_LDADD) +libosmeventplugin_la_DEPENDENCIES = $(srcdir)/libosmeventplugin.map + +libosmeventpluginincludedir = $(includedir)/infiniband/complib + +libosmeventplugininclude_HEADERS = + +# headers are distributed as part of the include dir +EXTRA_DIST = $(srcdir)/libosmeventplugin.map $(srcdir)/libosmeventplugin.ver diff --git a/osmeventplugin/libosmeventplugin.map b/osmeventplugin/libosmeventplugin.map new file mode 100644 index 0000000..346d1f3 --- /dev/null +++ b/osmeventplugin/libosmeventplugin.map @@ -0,0 +1,5 @@ +OSMPMDB_1.0 { + global: + osm_event_plugin; + local: *; +}; diff --git a/osmeventplugin/libosmeventplugin.ver b/osmeventplugin/libosmeventplugin.ver new file mode 100644 index 0000000..777bf15 --- /dev/null +++ b/osmeventplugin/libosmeventplugin.ver @@ -0,0 +1,9 @@ +# In this file we track the current API version +# of the vendor interface (and libraries) +# The version is built of the following +# tree numbers: +# API_REV:RUNNING_REV:AGE +# API_REV - advance on any added API +# RUNNING_REV - advance any change to the vendor files +# AGE - number of backward versions the API still supports +LIBVERSION=1:2:0 diff --git a/osmeventplugin/src/osmeventplugin.c b/osmeventplugin/src/osmeventplugin.c new file mode 100644 index 0000000..cba69ea --- /dev/null +++ b/osmeventplugin/src/osmeventplugin.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2013 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 The Regents of the University of California. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** ========================================================================= + * This is a simple example plugin which logs some of the events the OSM + * generates to this interface. + */ +#define SAMPLE_PLUGIN_OUTPUT_FILE "/tmp/osm_sample_event_plugin_output" +typedef struct _log_events { + FILE *log_file; + osm_log_t *osmlog; +} _log_events_t; + +/** ========================================================================= + */ +static void *construct(osm_opensm_t *osm) +{ + _log_events_t *log = malloc(sizeof(*log)); + if (!log) + return (NULL); + + log->log_file = fopen(SAMPLE_PLUGIN_OUTPUT_FILE, "a+"); + + if (!(log->log_file)) { + osm_log(&osm->log, OSM_LOG_ERROR, + "Sample Event Plugin: Failed to open output file \"%s\"\n", + SAMPLE_PLUGIN_OUTPUT_FILE); + free(log); + return (NULL); + } + + log->osmlog = &osm->log; + return ((void *)log); +} + +/** ========================================================================= + */ +static void destroy(void *_log) +{ + _log_events_t *log = (_log_events_t *) _log; + fclose(log->log_file); + free(log); +} + +/** ========================================================================= + */ +static void handle_port_counter(_log_events_t * log, osm_epi_pe_event_t * pc) +{ + if (pc->symbol_err_cnt > 0 + || pc->link_err_recover > 0 + || pc->link_downed > 0 + || pc->rcv_err > 0 + || pc->rcv_rem_phys_err > 0 + || pc->rcv_switch_relay_err > 0 + || pc->xmit_discards > 0 + || pc->xmit_constraint_err > 0 + || pc->rcv_constraint_err > 0 + || pc->link_integrity > 0 + || pc->buffer_overrun > 0 + || pc->vl15_dropped > 0 + || pc->xmit_wait > 0) { + fprintf(log->log_file, + "Port counter errors for node 0x%" PRIx64 + " (%s) port %d\n", pc->port_id.node_guid, + pc->port_id.node_name, pc->port_id.port_num); + } +} + +/** ========================================================================= + */ +static void +handle_port_counter_ext(_log_events_t * log, osm_epi_dc_event_t * epc) +{ + fprintf(log->log_file, + "Recieved Data counters for node 0x%" PRIx64 " (%s) port %d\n", + epc->port_id.node_guid, + epc->port_id.node_name, epc->port_id.port_num); +} + +/** ========================================================================= + */ +static void handle_port_select(_log_events_t * log, osm_epi_ps_event_t * ps) +{ + if (ps->xmit_wait > 0) { + fprintf(log->log_file, + "Port select Xmit Wait counts for node 0x%" PRIx64 + " (%s) port %d\n", ps->port_id.node_guid, + ps->port_id.node_name, ps->port_id.port_num); + } +} + +/** ========================================================================= + */ +static void handle_trap_event(_log_events_t *log, ib_mad_notice_attr_t *p_ntc) +{ + if (ib_notice_is_generic(p_ntc)) { + fprintf(log->log_file, + "Generic trap type %d; event %d; from LID %u\n", + ib_notice_get_type(p_ntc), + cl_ntoh16(p_ntc->g_or_v.generic.trap_num), + cl_ntoh16(p_ntc->issuer_lid)); + } else { + fprintf(log->log_file, + "Vendor trap type %d; from LID %u\n", + ib_notice_get_type(p_ntc), + cl_ntoh16(p_ntc->issuer_lid)); + } + +} + +/** ========================================================================= + */ +static void handle_lft_change_event(_log_events_t *log, + osm_epi_lft_change_event_t *lft_change) +{ + fprintf(log->log_file, + "LFT changed for switch 0x%" PRIx64 " flags 0x%x LFTTop %u block %d\n", + cl_ntoh64(osm_node_get_node_guid(lft_change->p_sw->p_node)), + lft_change->flags, lft_change->lft_top, lft_change->block_num); +} + +/** ========================================================================= + */ +static void report(void *_log, osm_epi_event_id_t event_id, void *event_data) +{ + _log_events_t *log = (_log_events_t *) _log; + + switch (event_id) { + case OSM_EVENT_ID_PORT_ERRORS: + handle_port_counter(log, (osm_epi_pe_event_t *) event_data); + break; + case OSM_EVENT_ID_PORT_DATA_COUNTERS: + handle_port_counter_ext(log, (osm_epi_dc_event_t *) event_data); + break; + case OSM_EVENT_ID_PORT_SELECT: + handle_port_select(log, (osm_epi_ps_event_t *) event_data); + break; + case OSM_EVENT_ID_TRAP: + handle_trap_event(log, (ib_mad_notice_attr_t *) event_data); + break; + case OSM_EVENT_ID_SUBNET_UP: + fprintf(log->log_file, "Subnet up reported\n"); + break; + case OSM_EVENT_ID_HEAVY_SWEEP_START: + fprintf(log->log_file, "Heavy sweep started\n"); + break; + case OSM_EVENT_ID_HEAVY_SWEEP_DONE: + fprintf(log->log_file, "Heavy sweep completed\n"); + break; + case OSM_EVENT_ID_UCAST_ROUTING_DONE: + fprintf(log->log_file, "Unicast routing completed %d\n", + (osm_epi_ucast_routing_flags_t) event_data); + break; + case OSM_EVENT_ID_STATE_CHANGE: + fprintf(log->log_file, "SM state changed\n"); + break; + case OSM_EVENT_ID_SA_DB_DUMPED: + fprintf(log->log_file, "SA DB dump file updated\n"); + break; + case OSM_EVENT_ID_LFT_CHANGE: + handle_lft_change_event(log, (osm_epi_lft_change_event_t *) event_data); + break; + case OSM_EVENT_ID_MAX: + default: + osm_log(log->osmlog, OSM_LOG_ERROR, + "Unknown event (%d) reported to plugin\n", event_id); + } + fflush(log->log_file); +} + +/** ========================================================================= + * Define the object symbol for loading + */ + +#if OSM_EVENT_PLUGIN_INTERFACE_VER != 2 +#error OpenSM plugin interface version missmatch +#endif + +osm_event_plugin_t osm_event_plugin = { + OSM_VERSION, + construct, + destroy, + report +}; diff --git a/osmroutingplugin/Makefile.am b/osmroutingplugin/Makefile.am new file mode 100644 index 0000000..4cfceac --- /dev/null +++ b/osmroutingplugin/Makefile.am @@ -0,0 +1,34 @@ + +AM_CPPFLAGS = -I$(srcdir)/../include \ + -I$(includedir)/infiniband + +lib_LTLIBRARIES = libosmroutingplugin.la + +if DEBUG +DBGFLAGS = -ggdb -D_DEBUG_ +else +DBGFLAGS = -g +endif + +libosmroutingplugin_la_CFLAGS = -Wall -Wwrite-strings $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_GNU_SOURCE=1 + +if HAVE_LD_VERSION_SCRIPT + libosmroutingplugin_version_script = -Wl,--version-script=$(srcdir)/libosmroutingplugin.map +else + libosmroutingplugin_version_script = +endif + +osmroutingplugin_api_version=$(shell grep LIBVERSION= $(srcdir)/libosmroutingplugin.ver | sed 's/LIBVERSION=//') + +libosmroutingplugin_la_SOURCES = src/osmroutingplugin.c +libosmroutingplugin_la_LDFLAGS = -version-info $(osmroutingplugin_api_version) \ + -export-dynamic $(libosmroutingplugin_version_script) +libosmroutingplugin_la_LIBADD = -L../complib -losmcomp -L../libopensm -lopensm $(OSMV_LDADD) +libosmroutingplugin_la_DEPENDENCIES = $(srcdir)/libosmroutingplugin.map + +libosmroutingpluginincludedir = $(includedir)/infiniband/complib + +libosmroutingplugininclude_HEADERS = + +# headers are distributed as part of the include dir +EXTRA_DIST = $(srcdir)/libosmroutingplugin.map $(srcdir)/libosmroutingplugin.ver diff --git a/osmroutingplugin/libosmroutingplugin.map b/osmroutingplugin/libosmroutingplugin.map new file mode 100644 index 0000000..346d1f3 --- /dev/null +++ b/osmroutingplugin/libosmroutingplugin.map @@ -0,0 +1,5 @@ +OSMPMDB_1.0 { + global: + osm_event_plugin; + local: *; +}; diff --git a/osmroutingplugin/libosmroutingplugin.ver b/osmroutingplugin/libosmroutingplugin.ver new file mode 100644 index 0000000..f755ff6 --- /dev/null +++ b/osmroutingplugin/libosmroutingplugin.ver @@ -0,0 +1,9 @@ +# In this file we track the current API version +# of the vendor interface (and libraries) +# The version is built of the following +# tree numbers: +# API_REV:RUNNING_REV:AGE +# API_REV - advance on any added API +# RUNNING_REV - advance any change to the vendor files +# AGE - number of backward versions the API still supports +LIBVERSION=1:0:0 diff --git a/osmroutingplugin/src/osmroutingplugin.c b/osmroutingplugin/src/osmroutingplugin.c new file mode 100644 index 0000000..aa89975 --- /dev/null +++ b/osmroutingplugin/src/osmroutingplugin.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2019 Fabriscale Technologies AS. All rights reserved. + * Copyright (c) 2013 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2007 The Regents of the University of California. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This is a simple routing engine plugin + * which implements the routing engine API + */ + +struct plugin_t { + osm_opensm_t *osm; +}; + +/* + * Forward declarations + */ +static void *construct(osm_opensm_t *osm); + +static void destroy(void *context); + +static int plugin_build_lid_matrices( + IN void *context); + +static int plugin_ucast_build_fwd_tables( + IN void *context); + +static void plugin_ucast_dump_tables( + IN void *context); + +static void plugin_update_sl2vl( + void *context, + IN osm_physp_t *port, + IN uint8_t in_port_num, + IN uint8_t out_port_num, + IN OUT ib_slvl_table_t *t); + +static void plugin_update_vlarb( + void *context, + IN osm_physp_t *port, + IN uint8_t port_num, + IN OUT ib_vl_arb_table_t *block, + unsigned int block_length, + unsigned int block_num); + +static uint8_t plugin_path_sl( + IN void *context, + IN uint8_t path_sl_hint, + IN const ib_net16_t slid, + IN const ib_net16_t dlid); + +static ib_api_status_t plugin_mcast_build_stree( + IN void *context, + IN OUT osm_mgrp_box_t *mgb); + +static void plugin_destroy_routing_engine( + IN void *context); + +static int routing_engine_setup( + osm_routing_engine_t *engine, + osm_opensm_t *osm); + +/* + * Implement plugin functions + */ +static void *construct(osm_opensm_t *osm) +{ + struct plugin_t *plugin; + cl_status_t status; + + plugin = (struct plugin_t *) calloc(1, sizeof(struct plugin_t)); + if (!plugin) + return NULL; + + plugin->osm = osm; + external_routing_engine_module_t plugin_routing_engine_module = { + "routing_engine_plugin", + routing_engine_setup, + plugin, + }; + + status = osm_register_external_routing_engine( + osm, &plugin_routing_engine_module, plugin); + if (status != CL_SUCCESS) { + destroy(plugin); + return NULL; + } + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "External routing engine '%s' has been registered with type '%d'\n", + plugin_routing_engine_module.name, + osm_routing_engine_type(plugin_routing_engine_module.name)); + + return ((void *)plugin); +} + +static void destroy(void *context) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + if (plugin) { + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Destroying plugin...\n"); + + free(plugin); + } +} + +/* + * Implement routing engine functions + */ +int routing_engine_setup( + osm_routing_engine_t *engine, + osm_opensm_t *osm) +{ + struct plugin_t *plugin = (struct plugin_t *) engine->context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Setting up the plugin as a new routing engine...\n"); + + engine->build_lid_matrices = plugin_build_lid_matrices; + engine->ucast_build_fwd_tables = plugin_ucast_build_fwd_tables; + engine->ucast_dump_tables = plugin_ucast_dump_tables; + engine->update_sl2vl = plugin_update_sl2vl; + engine->update_vlarb = plugin_update_vlarb; + engine->path_sl = plugin_path_sl; + engine->mcast_build_stree = plugin_mcast_build_stree; + engine->destroy = plugin_destroy_routing_engine; + + return 0; +} + +static int plugin_build_lid_matrices( + IN void *context) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_ERROR, + "Building LID matrices...\n"); + + return 0; +} + +static int plugin_ucast_build_fwd_tables( + IN void *context) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Building Forwarding tables...\n"); + return 0; +} + +static void plugin_ucast_dump_tables( + IN void *context) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Dumping Unicast forwarding tables...\n"); +} + +static void plugin_update_sl2vl( + void *context, + IN osm_physp_t *port, + IN uint8_t in_port_num, IN uint8_t out_port_num, + IN OUT ib_slvl_table_t *t) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Update Service Layer to Virtual Lanes mapping...\n"); +} + +static void plugin_update_vlarb( + void *context, + IN osm_physp_t *port, + IN uint8_t port_num, + IN OUT ib_vl_arb_table_t *block, + unsigned int block_length, + unsigned int block_num) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Update Virtual Lane arbritration...\n"); +} + +static uint8_t plugin_path_sl( + IN void *context, + IN uint8_t path_sl_hint, + IN const ib_net16_t slid, + IN const ib_net16_t dlid) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Computing Service Layer for the path LID %d -> LID %d with hint: %d...\n", + slid, dlid, path_sl_hint); + return 0; +} + +static ib_api_status_t plugin_mcast_build_stree( + IN void *context, + IN OUT osm_mgrp_box_t *mgb) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Building spanning tree for MLID: %d\n", + mgb->mlid); + return IB_SUCCESS; +} + +static void plugin_destroy_routing_engine( + IN void *context) +{ + struct plugin_t *plugin = (struct plugin_t *) context; + + OSM_LOG(&plugin->osm->log, OSM_LOG_INFO, + "Destroying plugin routing engine\n"); +} + +/* + * Define the object symbol for loading + */ + +#if OSM_EVENT_PLUGIN_INTERFACE_VER != 2 +#error OpenSM plugin interface version mismatch +#endif + +osm_event_plugin_t osm_event_plugin = { + OSM_VERSION, + construct, + destroy +}; diff --git a/osmtest/Makefile.am b/osmtest/Makefile.am new file mode 100644 index 0000000..360d6ed --- /dev/null +++ b/osmtest/Makefile.am @@ -0,0 +1,23 @@ + +if DEBUG +DBGFLAGS = -ggdb -D_DEBUG_ +else +DBGFLAGS = -g +endif + +AM_CPPFLAGS = -I$(srcdir)/include $(OSMV_INCLUDES) + +sbin_PROGRAMS = osmtest +osmtest_SOURCES = main.c osmtest.c osmt_service.c osmt_slvl_vl_arb.c \ + osmt_multicast.c osmt_inform.c +if OSMV_VAPI +osmtest_SOURCES += osmt_mtl_regular_qp.c +endif +osmtest_CFLAGS = -Wall -Wwrite-strings $(DBGFLAGS) +osmtest_LDADD = -L../complib -losmcomp -L../libopensm -lopensm -L../libvendor -losmvendor $(OSMV_LDADD) + +EXTRA_DIST = $(srcdir)/include/osmt_inform.h \ + $(srcdir)/include/osmtest_subnet.h \ + $(srcdir)/include/osmtest.h \ + $(srcdir)/include/osmt_mtl_regular_qp.h \ + $(srcdir)/include/osmtest_base.h diff --git a/osmtest/include/osmt_inform.h b/osmtest/include/osmt_inform.h new file mode 100644 index 0000000..bc796cc --- /dev/null +++ b/osmtest/include/osmt_inform.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __OSMT_INFORM__ +#define __OSMT_INFORM__ + +#ifdef OSM_VENDOR_INTF_MTL +#include +#include +#include "osmt_mtl_regular_qp.h" +#endif + +typedef struct _osmt_qp_ctx { +#ifdef OSM_VENDOR_INTF_MTL + osmt_mtl_mad_res_t qp_bind_hndl; +#endif + uint8_t *p_send_buf; + uint8_t *p_recv_buf; +#ifdef OSM_VENDOR_INTF_MTL + IB_MGT_mad_hndl_t ib_mgt_qp0_handle; +#endif +} osmt_qp_ctx_t; + +ib_api_status_t +osmt_bind_inform_qp(IN osmtest_t * const p_osmt, OUT osmt_qp_ctx_t * p_qp_ctx); + +void +osmt_unbind_inform_qp(IN osmtest_t * const p_osmt, IN osmt_qp_ctx_t * p_qp_ctx); + +ib_api_status_t +osmt_reg_unreg_inform_info(IN osmtest_t * p_osmt, + IN osmt_qp_ctx_t * p_qp_ctx, + IN ib_inform_info_t * p_inform_info, + IN uint8_t reg_flag); + +ib_api_status_t +osmt_trap_wait(IN osmtest_t * const p_osmt, IN osmt_qp_ctx_t * p_qp_ctx); + +ib_api_status_t +osmt_init_inform_info(IN osmtest_t * const p_osmt, OUT ib_inform_info_t * p_ii); + +ib_api_status_t +osmt_init_inform_info_by_trap(IN osmtest_t * const p_osmt, + IN ib_net16_t trap_num, + OUT ib_inform_info_t * p_ii); + +#endif /* __OSMT_INFORM__ */ diff --git a/osmtest/include/osmt_mtl_regular_qp.h b/osmtest/include/osmt_mtl_regular_qp.h new file mode 100644 index 0000000..3fd6e9d --- /dev/null +++ b/osmtest/include/osmt_mtl_regular_qp.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2001-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * mad.h - + * Header file for common special QP resources creation code. + * + * Creation date: + * + * Version: osmt_mtl_regular_qp.h,v 1.2 2003/03/20 16:05:10 eitan + * + * Authors: + * Elazar Raab + * + * Changes: + */ + +#ifndef H_MAD_H +#define H_MAD_H + +#include +#include +#include +#include + +#if defined(MAD_IN) || defined(MAD_OUT) +#error MACROS MAD_IN and MAD_OUT are in use, do not override +#endif +#define MAD_IN +#define MAD_OUT + +/* HCA Constants */ +#define HCA_ID "mt21108_pci0" +#define GRH_LEN 40 +#define KNOWN_QP1_QKEY 0x80010000 + +#define MAX_OUTS_SQ 2 /* Max. buffers posted for requests in SQ */ +#define MAX_OUTS_RQ 5 /* Max. buffers posted for responses in RQ */ + +#define MAX_POLL_CNT 300 +#define POLL_SLEEP 1 /* for usleep */ + +#define MAD_SIZE 256 /* MADs are always 256B */ +#define MAD_ATTR_OFFSET 16 +#define MAD_TID_OFFSET 8 + +/* Verbs SQP resources handles */ +typedef struct { + VAPI_hca_id_t hca_id; /*id of HCA */ + u_int8_t port_num; /* the port num to use */ + VAPI_hca_hndl_t hca_hndl; /*handle of HCA */ + VAPI_qp_hndl_t qp_hndl; /*handle of QP I use */ + VAPI_mr_hndl_t mr_hndl; /*handle of memory region */ + VAPI_cq_hndl_t rq_cq_hndl, sq_cq_hndl; /*handle of send & receive completion Queues */ + VAPI_pd_hndl_t pd_hndl; /*handle of Partition Domain */ + /* VAPI_ud_av_hndl_t av_hndl; */ + IB_lid_t slid; + /*LID*/ void *buf_ptr; /*mem buffer for outstanding pkts */ + MT_size_t buf_size; /*size of mem buffer for outstanding pkts */ + + u_int32_t max_outs_sq; /*max # of outstanding pkts in send queue */ + u_int32_t max_outs_rq; /*max # of outstanding pkts in receive queue */ + + IB_rkey_t l_key; /*my l_key for memory regions */ + VAPI_qkey_t qkey; /*my qkey */ + + EVAPI_compl_handler_hndl_t rq_cq_eventh, sq_cq_eventh; /* event handlers for polling */ + + bool is_sqp; /* relate to union below - my QP */ + union { + VAPI_special_qp_t sqp_type; + VAPI_qp_num_t qp_num; + } qp_id; + void *wait_q; +} osmt_mtl_mad_res_t; + +/* init an osmt_mtl_mad_res_t with all resources initialized (use functions below) */ +VAPI_ret_t osmt_mtl_init(osmt_mtl_mad_res_t * res /*pointer to res (resources) struct */ + ); +VAPI_ret_t osmt_mtl_init_opened_hca(osmt_mtl_mad_res_t * res /*pointer to res (resources) struct */ + ); + +/* Cleanup all resources of (which are valid) in res */ +VAPI_ret_t osmt_mtl_mad_cleanup(osmt_mtl_mad_res_t * res /*pointer to res (resources) struct */ + ); + +/* create CQs and QP as given in res->is_sqp (if TRUE, get special QP) */ +VAPI_ret_t osmt_mtl_get_qp_resources(osmt_mtl_mad_res_t * res /*pointer to res (resources) struct */ + ); + +/* move QP to RTS state */ +VAPI_ret_t osmt_mtl_mad_qp_init(osmt_mtl_mad_res_t * res /*max number of outstanding packets allowed in send queue */ + ); + +/* create and register res->buf_ptr */ +VAPI_ret_t osmt_mtl_mad_create_mr(osmt_mtl_mad_res_t * res /*pointer to res (resources) struct */ + ); + +VAPI_ret_t osmt_mtl_create_av(osmt_mtl_mad_res_t * res, /* pointer to res (resources) struct */ + int16_t dlid, /*destination lid */ + VAPI_ud_av_hndl_t * avh_p /* address vectr handle to update */ + ); + +/* Send MAD to given dest QP*/ +VAPI_ret_t osmt_mtl_mad_send(osmt_mtl_mad_res_t * res, /*pointer to res (resources) struct */ + VAPI_wr_id_t id, /*wqe ID */ + void *mad, /*mad buffer to send */ + VAPI_qp_num_t dest_qp, /*destination QP */ + IB_sl_t sl, /*Service Level */ + u_int32_t dest_qkey, /*Destination QP KEY */ + VAPI_ud_av_hndl_t avh /* address vectr handle to use */ + ); + +/* post buffers to RQ. returns num of buffers actually posted */ +int osmt_mtl_mad_post_recv_bufs(osmt_mtl_mad_res_t * res, /*pointer to res (resources) struct */ + void *buf_array, /*array of receive buffers */ + u_int32_t num_o_bufs, /*number of receive buffers */ + u_int32_t size, /* size of expected receive packet - MAD */ + VAPI_wr_id_t start_id /* start id for receive buffers */ + ); + +/* Poll given CQ for completion max_poll times (POLL_SLEEP [usec] delays). result in wc_desc_p. */ +VAPI_ret_t osmt_mtl_mad_poll4cqe(VAPI_hca_hndl_t hca, /*handle for HCA */ + VAPI_cq_hndl_t cq, /*handle for Completion Queue - Rcv/Send */ + VAPI_wc_desc_t * wc_desc_p, /*handle of cqe */ + u_int32_t max_poll, /*number of polling iterations */ + u_int32_t poll_sleep, /*timeout for each polling */ + VAPI_ud_av_hndl_t * avh_p /* address vectopr handle to cleanup */ + ); + +#endif diff --git a/osmtest/include/osmtest.h b/osmtest/include/osmtest.h new file mode 100644 index 0000000..f66d3a8 --- /dev/null +++ b/osmtest/include/osmtest.h @@ -0,0 +1,514 @@ +/* + * Copyright (c) 2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osmtest_t. + * This object represents the OSMTest Test object. + * + */ + +#ifndef _OSMTEST_H_ +#define _OSMTEST_H_ + +#include +#include +#include +#include +#include +#include +#include "osmtest_base.h" +#include "osmtest_subnet.h" + +enum OSMT_FLOWS { + OSMT_FLOW_ALL = 0, + OSMT_FLOW_CREATE_INVENTORY, + OSMT_FLOW_VALIDATE_INVENTORY, + OSMT_FLOW_SERVICE_REGISTRATION, + OSMT_FLOW_EVENT_FORWARDING, + OSMT_FLOW_STRESS_SA, + OSMT_FLOW_MULTICAST, + OSMT_FLOW_QOS, + OSMT_FLOW_TRAP, +}; + +/****s* OpenSM: Subnet/osmtest_opt_t + * NAME + * osmtest_opt_t + * + * DESCRIPTION + * Subnet options structure. This structure contains the various + * site specific configuration parameters for osmtest. + * + * SYNOPSIS + */ +typedef struct _osmtest_opt { + uint32_t transaction_timeout; + boolean_t force_log_flush; + boolean_t create; + boolean_t with_grh; + uint32_t retry_count; + uint32_t stress; + uint32_t mmode; + char file_name[OSMTEST_FILE_PATH_MAX]; + uint8_t flow; + uint8_t wait_time; + char *log_file; + boolean_t ignore_path_records; + boolean_t full_world_path_recs; +} osmtest_opt_t; + +/* + * FIELDS + * + * SEE ALSO + *********/ + +/****h* OSMTest/OSMTest + * NAME + * OSMTest + * + * DESCRIPTION + * The OSMTest object tests an SM/SA for conformance to a known + * set of data about an Infiniband subnet. + * + * AUTHOR + * Steve King, Intel + * + *********/ + +/****s* OSMTest/osmtest_t + * NAME + * osmtest_t + * + * DESCRIPTION + * OSMTest structure. + * + * This object should be treated as opaque and should + * be manipulated only through the provided functions. + * + * SYNOPSIS + */ +typedef struct _osmtest { + osm_log_t log; + struct _osm_vendor *p_vendor; + osm_bind_handle_t h_bind; + osm_mad_pool_t mad_pool; + + osmtest_opt_t opt; + ib_port_attr_t local_port; + ib_gid_t local_port_gid; + ib_gid_t sm_port_gid; + subnet_t exp_subn; + cl_qpool_t node_pool; + cl_qpool_t port_pool; + cl_qpool_t link_pool; + + uint16_t max_lid; +} osmtest_t; + +/* + * FIELDS + * log + * Log facility used by all OSMTest components. + * + * p_vendor + * Pointer to the vendor transport layer. + * + * h_bind + * The bind handle obtained by osm_vendor_sa_api/osmv_bind_sa + * + * mad_pool + * The mad pool provided for teh vendor layer to allocate mad wrappers in + * + * opt + * osmtest options structure + * + * local_port + * Port attributes for the port over which osmtest is running. + * + * exp_subn + * Subnet object representing the expected subnet + * + * node_pool + * Pool of objects for use in populating the subnet databases. + * + * port_pool + * Pool of objects for use in populating the subnet databases. + * + * link_pool + * Pool of objects for use in populating the subnet databases. + * + * SEE ALSO + *********/ + +/****s* OpenSM: Subnet/osmtest_req_context_t + * NAME + * osmtest_req_context_t + * + * DESCRIPTION + * Query context for ib_query callback function. + * + * SYNOPSIS + */ +typedef struct _osmtest_req_context { + osmtest_t *p_osmt; + osmv_query_res_t result; +} osmtest_req_context_t; + +typedef struct _osmtest_mgrp_t { + cl_map_item_t map_item; + ib_member_rec_t mcmember_rec; +} osmtest_mgrp_t; + +/* + * FIELDS + * + * SEE ALSO + *********/ + +/****f* OSMTest/osmtest_construct + * NAME + * osmtest_construct + * + * DESCRIPTION + * This function constructs an OSMTest object. + * + * SYNOPSIS + */ +void osmtest_construct(IN osmtest_t * const p_osmt); + +/* + * PARAMETERS + * p_osmt + * [in] Pointer to a OSMTest object to construct. + * + * RETURN VALUE + * This function does not return a value. + * + * NOTES + * Allows calling osmtest_init, osmtest_destroy. + * + * Calling osmtest_construct is a prerequisite to calling any other + * method except osmtest_init. + * + * SEE ALSO + * SM object, osmtest_init, osmtest_destroy + *********/ + +/****f* OSMTest/osmtest_destroy + * NAME + * osmtest_destroy + * + * DESCRIPTION + * The osmtest_destroy function destroys an osmtest object, releasing + * all resources. + * + * SYNOPSIS + */ +void osmtest_destroy(IN osmtest_t * const p_osmt); + +/* + * PARAMETERS + * p_osmt + * [in] Pointer to a OSMTest object to destroy. + * + * RETURN VALUE + * This function does not return a value. + * + * NOTES + * Performs any necessary cleanup of the specified OSMTest object. + * Further operations should not be attempted on the destroyed object. + * This function should only be called after a call to osmtest_construct or + * osmtest_init. + * + * SEE ALSO + * SM object, osmtest_construct, osmtest_init + *********/ + +/****f* OSMTest/osmtest_init + * NAME + * osmtest_init + * + * DESCRIPTION + * The osmtest_init function initializes a OSMTest object for use. + * + * SYNOPSIS + */ +ib_api_status_t osmtest_init(IN osmtest_t * const p_osmt, + IN const osmtest_opt_t * const p_opt, + IN const osm_log_level_t log_flags); + +/* + * PARAMETERS + * p_osmt + * [in] Pointer to an osmtest_t object to initialize. + * + * p_opt + * [in] Pointer to the options structure. + * + * log_flags + * [in] Log level flags to set. + * + * RETURN VALUES + * IB_SUCCESS if the OSMTest object was initialized successfully. + * + * NOTES + * Allows calling other OSMTest methods. + * + * SEE ALSO + * SM object, osmtest_construct, osmtest_destroy + *********/ + +/****f* OSMTest/osmtest_run + * NAME + * osmtest_run + * + * DESCRIPTION + * Runs the osmtest suite. + * + * SYNOPSIS + */ +ib_api_status_t osmtest_run(IN osmtest_t * const p_osmt); + +/* + * PARAMETERS + * p_osmt + * [in] Pointer to an osmtest_t object. + * + * guid + * [in] Port GUID over which to run the test suite. + * + * RETURN VALUES + * IB_SUCCESS + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OSMTest/osmtest_bind + * NAME + * osmtest_bind + * + * DESCRIPTION + * Binds osmtest to a local port. + * + * SYNOPSIS + */ +ib_api_status_t osmtest_bind(IN osmtest_t * p_osmt, + IN uint16_t max_lid, IN ib_net64_t guid OPTIONAL); + +/* + * PARAMETERS + * p_osmt + * [in] Pointer to an osmtest_t object. + * + * max_lid + * [in] The maximal lid to query about (if RMPP is not supported) + * + * guid + * [in] Port GUID over which to run the test suite. + * If zero, the bind function will display a menu of local + * port guids and wait for user input. + * + * RETURN VALUES + * IB_SUCCESS + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OSMTest/osmtest_query_res_cb + * NAME + * osmtest_query_res_cb + * + * DESCRIPTION + * A Callback for the query to invoke on completion + * + * SYNOPSIS + */ +void osmtest_query_res_cb(IN osmv_query_res_t * p_rec); +/* + * PARAMETERS + * p_rec + * [in] Pointer to an ib_query_rec_t object used for the query. + * + * RETURN VALUES + * NONE + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OSMTest/ib_get_mad_status_str + * NAME + * ib_get_mad_status_str + * + * DESCRIPTION + * return the string representing the given mad status + * + * SYNOPSIS + */ +const char *ib_get_mad_status_str(IN const ib_mad_t * const p_mad); +/* + * PARAMETERS + * p_mad + * [in] Pointer to the mad payload + * + * RETURN VALUES + * NONE + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OSMTest/osmt_run_service_records_flow + * NAME + * osmt_run_service_records_flow + * + * DESCRIPTION + * Run the service record testing flow. + * + * SYNOPSIS + */ +ib_api_status_t osmt_run_service_records_flow(IN osmtest_t * const p_osmt); +/* + * PARAMETERS + * p_osmt + * [in] Pointer to the osmtest obj + * + * RETURN VALUES + * IB_SUCCESS if PASS + * + * NOTES + * + * SEE ALSO + *********/ + +ib_api_status_t osmt_run_inform_info_flow(IN osmtest_t * const p_osmt); + +/****f* OSMTest/osmt_run_slvl_and_vlarb_records_flow + * NAME + * osmt_run_slvl_and_vlarb_records_flow + * + * DESCRIPTION + * Run the sl2vl and vlarb tables testing flow. + * + * SYNOPSIS + */ +ib_api_status_t +osmt_run_slvl_and_vlarb_records_flow(IN osmtest_t * const p_osmt); +/* + * PARAMETERS + * p_osmt + * [in] Pointer to the osmtest obj + * + * RETURN VALUES + * IB_SUCCESS if PASS + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OSMTest/osmt_run_mcast_flow + * NAME + * osmt_run_mcast_flow + * + * DESCRIPTION + * Run the multicast test flow + * + * SYNOPSIS + */ +ib_api_status_t osmt_run_mcast_flow(IN osmtest_t * const p_osmt); +/* + * PARAMETERS + * p_osmt + * [in] Pointer to the osmtest obj + * + * RETURN VALUES + * IB_SUCCESS if PASS + * + * NOTES + * + * SEE ALSO + *********/ + +/****f* OSMTest/osmt_run_trap64_65_flow + * NAME + * osmt_run_trap64_65_flow + * + * DESCRIPTION + * Run the trap 64/65 test flow. This test is ran with + * an outside tool. + * + * SYNOPSIS + */ +ib_api_status_t osmt_run_trap64_65_flow(IN osmtest_t * const p_osmt); +/* + * PARAMETERS + * p_osmt + * [in] Pointer to the osmtest obj + * + * RETURN VALUES + * IB_SUCCESS if PASS + * + * NOTES + * + * SEE ALSO + *********/ + +ib_api_status_t +osmtest_get_all_recs(IN osmtest_t * const p_osmt, + IN ib_net16_t const attr_id, + IN size_t const attr_size, + IN OUT osmtest_req_context_t * const p_context); + +ib_api_status_t +osmtest_get_local_port_lmc(IN osmtest_t * const p_osmt, + IN ib_net16_t lid, OUT uint8_t * const p_lmc); + +/* + * A few auxiliary macros for logging + */ + +#define EXPECTING_ERRORS_START "[[ ===== Expecting Errors - START ===== " +#define EXPECTING_ERRORS_END " ===== Expecting Errors - END ===== ]]" + +#endif /* _OSMTEST_H_ */ diff --git a/osmtest/include/osmtest_base.h b/osmtest/include/osmtest_base.h new file mode 100644 index 0000000..6c980a3 --- /dev/null +++ b/osmtest/include/osmtest_base.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osmtest_t. + * This object represents the OSMTest Test object. + * + */ +#ifndef _OSMTEST_BASE_H_ +#define _OSMTEST_BASE_H_ + +#ifndef __WIN__ +#include +#else +#include +#endif + +#define OSMTEST_MAX_LINE_LEN 120 +#define OSMTEST_FILE_PATH_MAX PATH_MAX + +#define STRESS_SMALL_RMPP_THR 100000 +/* + Take long times when querying big clusters (over 40 nodes), an average of : 0.25 sec for query + each query receives 1000 records +*/ +#define STRESS_LARGE_RMPP_THR 4000 +#define STRESS_LARGE_PR_RMPP_THR 20000 +#define STRESS_GET_PR 100000 + +extern const char *const p_file; + +#endif /* _OSMTEST_BASE_H_ */ diff --git a/osmtest/include/osmtest_subnet.h b/osmtest/include/osmtest_subnet.h new file mode 100644 index 0000000..09063dd --- /dev/null +++ b/osmtest/include/osmtest_subnet.h @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osmtest_t. + * This object represents the OSMTest Test object. + * + */ + +#ifndef _OSMTEST_SUBNET_H_ +#define _OSMTEST_SUBNET_H_ + +#include +#include +#include +#include +#include +#include + +/****s* Subnet Database/generic_t +* NAME +* generic_t +* +* DESCRIPTION +* Subnet database object for fields common to all record types. +* All other database types must be castable to this type. +* +* SYNOPSIS +*/ +typedef struct _generic { + cl_map_item_t map_item; /* must be first element! */ + uint32_t count; /* must be second element! */ +} generic_t; + +/* +* FIELDS +* +* SEE ALSO +*********/ + +/****s* Subnet Database/node_t +* NAME +* node_t +* +* DESCRIPTION +* Subnet database object for nodes. +* Must be castable to generic_t. +* +* SYNOPSIS +*/ +typedef struct _node { + cl_map_item_t map_item; /* must be first element! */ + uint32_t count; /* must be second element! */ + ib_node_record_t rec; + ib_node_record_t comp; +} node_t; + +/* +* FIELDS +* map_item +* Provides linkage for the qmap container. +* +* rec +* NodeRecord for this node as read from the database file. +* +* comp +* NodeRecord indicating which fields should be compared against rec. +* Bits set in the comp NodeRecord indicate that bit in the rec structure +* should be compared against real-time data from the SA. +* +* count +* Utility counter used by the validation logic. Typically used to +* to indicate the number of times a matching node was received from +* the SA. +* +* SEE ALSO +*********/ + +static inline node_t *node_new(void) +{ + node_t *p_obj; + + p_obj = malloc(sizeof(*p_obj)); + if (p_obj) + memset(p_obj, 0, sizeof(*p_obj)); + return (p_obj); +} + +static inline void node_delete(IN node_t * p_obj) +{ + free(p_obj); +} + +/****s* Subnet Database/port_t +* NAME +* port_t +* +* DESCRIPTION +* Subnet database object for ports. +* Must be castable to generic_t. +* +* SYNOPSIS +*/ +typedef struct _port { + cl_map_item_t map_item; /* must be first element! */ + uint32_t count; /* must be second element! */ + /* Since there is no unique identifier for all ports we + must be able to have such a key by the lid and port num */ + uint64_t port_id; + ib_portinfo_record_t rec; + ib_portinfo_record_t comp; +} port_t; + +/* +* FIELDS +* +* map_item +* Provides linkage for the qmap container. +* +* rec +* PortInfoRecord for this port as read from the database file. +* +* comp +* PortInfoRecord indicating which fields should be compared against rec. +* Bits set in the comp NodeRecord indicate that bit in the rec structure +* should be compared against real-time data from the SA. +* +* count +* Utility counter used by the validation logic. Typically used to +* to indicate the number of times a matching node was received from +* the SA. +* +* SEE ALSO +*********/ + +static inline port_t *port_new(void) +{ + port_t *p_obj; + + p_obj = malloc(sizeof(*p_obj)); + if (p_obj) + memset(p_obj, 0, sizeof(*p_obj)); + return (p_obj); +} + +static inline void port_delete(IN port_t * p_obj) +{ + free(p_obj); +} + +static inline uint64_t +port_gen_id(IN ib_net16_t const lid, IN uint8_t const port_num) +{ + return (lid << 8 | port_num); +} + +static inline void +port_ext_id(IN uint64_t id, IN ib_net16_t * p_lid, IN uint8_t * p_port_num) +{ + CL_ASSERT((id & 0xFF) < 0x100); + *p_port_num = (uint8_t) (id & 0xFF); + CL_ASSERT(((id >> 8) & 0xFFFF) < 0x10000); + *p_lid = (uint16_t) ((id >> 8) & 0xFFFF); +} + +static inline void +port_set_id(IN port_t * p_obj, + IN ib_net16_t const lid, IN uint8_t const port_num) +{ + p_obj->port_id = port_gen_id(lid, port_num); +} + +static inline void +port_get_id(IN port_t * p_obj, IN ib_net16_t * p_lid, IN uint8_t * p_port_num) +{ + port_ext_id(p_obj->port_id, p_lid, p_port_num); +} + +/****s* Subnet Database/path_t +* NAME +* node_t +* +* DESCRIPTION +* Subnet database object for paths. +* Must be castable to generic_t. +* +* SYNOPSIS +*/ +typedef struct _path { + cl_map_item_t map_item; /* must be first element! */ + uint32_t count; /* must be second element! */ + ib_path_rec_t rec; + ib_path_rec_t comp; +} path_t; + +/* +* FIELDS +* map_item +* Provides linkage for the qmap container. +* +* rec +* PathRecord for this path as read from the database file. +* +* comp +* PathRecord indicating which fields should be compared against rec. +* Bits set in the comp PathRecord indicate that bit in the rec structure +* should be compared against real-time data from the SA. +* +* count +* Utility counter used by the validation logic. Typically used to +* to indicate the number of times a matching node was received from +* the SA. +* +* SEE ALSO +*********/ + +static inline path_t *path_new(void) +{ + path_t *p_obj; + + p_obj = malloc(sizeof(*p_obj)); + if (p_obj) + memset(p_obj, 0, sizeof(*p_obj)); + return (p_obj); +} + +static inline void path_delete(IN path_t * p_obj) +{ + free(p_obj); +} + +/****s* Subnet Database/subnet_t +* NAME +* subnet_t +* +* DESCRIPTION +* Subnet database object. +* +* SYNOPSIS +*/ +typedef struct _subnet { + cl_qmap_t node_lid_tbl; + cl_qmap_t node_guid_tbl; + cl_qmap_t mgrp_mlid_tbl; + /* cl_qmap_t port_lid_tbl; */ + /* cl_qmap_t port_guid_tbl; */ + cl_qmap_t port_key_tbl; + cl_qmap_t link_tbl; + cl_qmap_t path_tbl; +} subnet_t; + +/* +* FIELDS +* +* SEE ALSO +*********/ + +/****f* Subnet Database/subnet_construct +* NAME +* subnet_construct +* +* DESCRIPTION +* This function constructs an subnet database object. +* This function cannot fail. +* +* SYNOPSIS +*/ +void subnet_construct(IN subnet_t * const p_subn); + +/* +* FIELDS +* +* SEE ALSO +*********/ + +/****f* Subnet Database/subnet_init +* NAME +* subnet_init +* +* DESCRIPTION +* This function initializes an subnet database object. +* +* SYNOPSIS +*/ +cl_status_t subnet_init(IN subnet_t * const p_subn); + +/* +* FIELDS +* +* SEE ALSO +*********/ + +#endif diff --git a/osmtest/main.c b/osmtest/main.c new file mode 100644 index 0000000..c9f53a2 --- /dev/null +++ b/osmtest/main.c @@ -0,0 +1,632 @@ +/* + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2009 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Command line interface for osmtest. + * + */ + +#include +#include +#include +#include +#include "osmtest.h" + +/******************************************************************** + D E F I N E G L O B A L V A R I A B L E S +*********************************************************************/ + +/* + This is the global osmtest object. + One osmtest object is required per subnet. + Future versions could support multiple subents by + instantiating more than one osmtest object. +*/ +#define MAX_LOCAL_IBPORTS 64 +#define OSMT_DEFAULT_RETRY_COUNT 3 +#define OSMT_DEFAULT_TRANS_TIMEOUT_MILLISEC 1000 +#define OSMT_DEFAULT_TRAP_WAIT_TIMEOUT_SEC 10 +#define INVALID_GUID (0xFFFFFFFFFFFFFFFFULL) + +static boolean_t osmt_is_debug(void) +{ +#if defined( _DEBUG_ ) + return TRUE; +#else + return FALSE; +#endif /* defined( _DEBUG_ ) */ +} + +static void show_usage(void) +{ + printf + ("\n------- osmtest - Usage and options ----------------------\n"); + printf("Usage: osmtest [options]\n"); + printf("Options:\n"); + printf("-f \n" + "--flow \n" + " This option directs osmtest to run a specific flow:\n" + " FLOW DESCRIPTION\n" + " c = create an inventory file with all nodes, ports and paths\n" + " a = run all validation tests (expecting an input inventory)\n" + " v = only validate the given inventory file\n" + " s = run service registration, deregistration, and lease test\n" + " e = run event forwarding test\n" + " f = flood the SA with queries according to the stress mode\n" + " m = multicast flow\n" + " q = QoS info: dump VLArb and SLtoVL tables\n" + " t = run trap 64/65 flow (this flow requires running of external tool)\n" + " (default is all flows except QoS)\n\n"); + + printf("-w \n" + "--wait \n" + " This option specifies the wait time for trap 64/65 in seconds\n" + " It is used only when running -f t - the trap 64/65 flow\n" + " (default to 10 sec)\n\n"); + printf("-d \n" + "--debug \n" + " This option specifies a debug option\n" + " These options are not normally needed\n" + " The number following -d selects the debug\n" + " option to enable as follows:\n" + " OPT Description\n" + " --- -----------------\n" + " -d0 - Unused.\n" + " -d1 - Do not scan/compare path records.\n" + " -d2 - Force log flushing after each log message.\n" + " -d4 - Use full world path record queries.\n" + " Without -d, no debug options are enabled\n\n"); + printf("-m \n" + "--max_lid \n" + " This option specifies the maximal LID number to be searched\n" + " for during inventory file build (default to 100)\n\n"); + printf("-g \n" + "--guid \n" + " This option specifies the local port GUID value\n" + " with which osmtest should bind. osmtest may be\n" + " bound to 1 port at a time\n\n"); + printf("-p \n" + "--port\n" + " This option displays a menu of possible local port GUID values\n" + " with which osmtest could bind\n\n"); + printf("-h\n" + "--help\n" " Display this usage info then exit\n\n"); + printf("-i \n" + "--inventory \n" + " This option specifies the name of the inventory file\n" + " Normally, osmtest expects to find an inventory file,\n" + " which osmtest uses to validate real-time information\n" + " received from the SA during testing\n" + " If -i is not specified, osmtest defaults to the file\n" + " 'osmtest.dat'\n" + " See -c option for related information\n\n"); + printf("-s\n" + "--stress\n" + " This option runs the specified stress test instead\n" + " of the normal test suite\n" + " Stress test options are as follows:\n" + " OPT Description\n" + " --- -----------------\n" + " -s1 - Single-MAD (RMPP) response SA queries\n" + " -s2 - Multi-MAD (RMPP) response SA queries\n" + " -s3 - Multi-MAD (RMPP) Path Record SA queries\n" + " -s4 - Single-MAD (non RMPP) get Path Record SA queries\n" + " Without -s, stress testing is not performed\n\n"); + printf("-G\n" + "--grh\n" + " sends all SA queries with GRH with exception of\n" + " GetMulti MultiPathRecord\n\n"); + printf("-M\n" + "--Multicast_Mode\n" + " This option specify length of Multicast test:\n" + " OPT Description\n" + " --- -----------------\n" + " -M1 - Short Multicast Flow (default) - single mode\n" + " -M2 - Short Multicast Flow - multiple mode\n" + " -M3 - Long Multicast Flow - single mode\n" + " -M4 - Long Multicast Flow - multiple mode\n" + " Single mode - Osmtest is tested alone, with no other\n" + " apps that interact with OpenSM MC\n" + " Multiple mode - Could be run with other apps using MC with\n" + " OpenSM." + " Without -M, default flow testing is performed\n\n"); + + printf("-t \n" + " This option specifies the time in milliseconds\n" + " used for transaction timeouts\n" + " Specifying -t 0 disables timeouts\n" + " Without -t, osmtest defaults to a timeout value of\n" + " 1 second\n\n"); + printf("-l\n" + "--log_file\n" + " This option defines the log to be the given file\n" + " By default the log goes to stdout\n\n"); + printf("-v\n" + " This option increases the log verbosity level\n" + " The -v option may be specified multiple times\n" + " to further increase the verbosity level\n" + " See the -vf option for more information about.\n" + " log verbosity\n\n"); + printf("-V\n" + " This option sets the maximum verbosity level and\n" + " forces log flushing\n" + " The -V is equivalent to '-vf 0xFF -d 2'\n" + " See the -vf option for more information about.\n" + " log verbosity\n\n"); + printf("-vf \n" + " This option sets the log verbosity level\n" + " A flags field must follow the -vf option\n" + " A bit set/clear in the flags enables/disables a\n" + " specific log level as follows:\n" + " BIT LOG LEVEL ENABLED\n" + " ---- -----------------\n" + " 0x01 - ERROR (error messages)\n" + " 0x02 - INFO (basic messages, low volume)\n" + " 0x04 - VERBOSE (interesting stuff, moderate volume)\n" + " 0x08 - DEBUG (diagnostic, high volume)\n" + " 0x10 - FUNCS (function entry/exit, very high volume)\n" + " 0x20 - FRAMES (dumps all SMP and GMP frames)\n" + " 0x40 - currently unused\n" + " 0x80 - currently unused\n" + " Without -vf, osmtest defaults to ERROR + INFO (0x3)\n" + " Specifying -vf 0 disables all messages\n" + " Specifying -vf 0xFF enables all messages (see -V)\n" + " High verbosity levels may require increasing\n" + " the transaction timeout with the -t option\n\n"); +} + +static void print_all_guids(IN osmtest_t * p_osmt) +{ + ib_api_status_t status; + uint32_t num_ports = MAX_LOCAL_IBPORTS; + ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS] = { {0} }; + uint32_t i; + + /* + Call the transport layer for a list of local port + GUID values. + */ + status = + osm_vendor_get_all_port_attr(p_osmt->p_vendor, attr_array, + &num_ports); + if (status != IB_SUCCESS) { + printf("\nError from osm_vendor_get_all_port_attr (%x)\n", + status); + return; + } + + printf("\nListing GUIDs:\n"); + for (i = 0; i < num_ports; i++) + printf("Port %i: 0x%" PRIx64 "\n", i, + cl_hton64(attr_array[i].port_guid)); +} + +static ib_net64_t get_port_guid(IN osmtest_t * p_osmt, uint64_t port_guid) +{ + ib_api_status_t status; + uint32_t num_ports = MAX_LOCAL_IBPORTS; + ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS] = { {0} }; + uint32_t i; + + /* + Call the transport layer for a list of local port + GUID values. + */ +/* "local ports" is(?) phys, shouldn't this exclude port 0 then ? */ + status = + osm_vendor_get_all_port_attr(p_osmt->p_vendor, attr_array, + &num_ports); + if (status != IB_SUCCESS) { + printf("\nError from osm_vendor_get_all_port_attr (%x)\n", + status); + return (0); + } + + if (num_ports == 1) { + printf("using default guid 0x%" PRIx64 "\n", + cl_hton64(attr_array[0].port_guid)); + return (attr_array[0].port_guid); + } + + for (i = 0; i < num_ports; i++) { + if (attr_array[i].port_guid == port_guid || + (!port_guid && attr_array[i].link_state > IB_LINK_DOWN)) + return attr_array[i].port_guid; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + static osmtest_t osm_test; + osmtest_opt_t opt = { 0 }; + ib_net64_t guid = 0; + uint16_t max_lid = 100; + ib_api_status_t status; + uint32_t log_flags = OSM_LOG_ERROR | OSM_LOG_INFO; + int32_t vendor_debug = 0; + char flow_name[64]; + int next_option; + const char *const short_option = "f:l:m:M:d:g:s:t:i:pcvGVh"; + + /* + * In the array below, the 2nd parameter specified the number + * of arguments as follows: + * 0: no arguments + * 1: argument + * 2: optional + */ + const struct option long_option[] = { + {"create", 0, NULL, 'c'}, + {"debug", 1, NULL, 'd'}, + {"flow", 1, NULL, 'f'}, + {"wait", 1, NULL, 'w'}, + {"inventory", 1, NULL, 'i'}, + {"max_lid", 1, NULL, 'm'}, + {"guid", 1, NULL, 'g'}, + {"port", 0, NULL, 'p'}, + {"help", 0, NULL, 'h'}, + {"stress", 1, NULL, 's'}, + {"grh", 0, NULL, 'G'}, + {"Multicast_Mode", 1, NULL, 'M'}, + {"timeout", 1, NULL, 't'}, + {"verbose", 0, NULL, 'v'}, + {"log_file", 1, NULL, 'l'}, + {"vf", 1, NULL, 'x'}, + {"V", 0, NULL, 'V'}, + + {NULL, 0, NULL, 0} /* Required at end of array */ + }; + + /* Make sure that the opensm, complib and osmtest were compiled using + same modes (debug/free) */ + if (osm_is_debug() != cl_is_debug() || osm_is_debug() != osmt_is_debug() + || osmt_is_debug() != cl_is_debug()) { + fprintf(stderr, + "-E- OpenSM, Complib and OsmTest were compiled using different modes\n"); + fprintf(stderr, + "-E- OpenSM debug:%d Complib debug:%d OsmTest debug:%d \n", + osm_is_debug(), cl_is_debug(), osmt_is_debug()); + exit(1); + } + + opt.transaction_timeout = OSMT_DEFAULT_TRANS_TIMEOUT_MILLISEC; + opt.wait_time = OSMT_DEFAULT_TRAP_WAIT_TIMEOUT_SEC; + opt.retry_count = OSMT_DEFAULT_RETRY_COUNT; + opt.force_log_flush = FALSE; + opt.stress = 0; + opt.log_file = NULL; + opt.create = FALSE; + opt.mmode = 1; + opt.ignore_path_records = FALSE; /* Do path Records too */ + opt.full_world_path_recs = FALSE; + opt.flow = OSMT_FLOW_ALL; /* run all validation tests */ + opt.with_grh = FALSE; + strcpy(flow_name, "All Validations"); + strcpy(opt.file_name, "osmtest.dat"); + + printf("\nCommand Line Arguments\n"); + do { + next_option = getopt_long_only(argc, argv, short_option, + long_option, NULL); + switch (next_option) { + case 'c': + /* + * Create the inventory file. + */ + opt.create = TRUE; + printf("\tCreating inventory file\n"); + break; + + case 'i': + /* + * Specifies inventory file name. + */ + if (strlen(optarg) > OSMTEST_FILE_PATH_MAX) + printf + ("\nError: path name too long (ignored)\n"); + else + strcpy(opt.file_name, optarg); + + printf("\tFile = %s\n", opt.file_name); + break; + + case 'f': + /* + * Specifies Flow. + */ + if (strlen(optarg) > OSMTEST_FILE_PATH_MAX) + printf + ("\nError: path name too long (ignored)\n"); + else + strcpy(flow_name, optarg); + + if (!strcmp("c", optarg)) { + strcpy(flow_name, "Create Inventory"); + opt.flow = OSMT_FLOW_CREATE_INVENTORY; + } else if (!strcmp("v", optarg)) { + strcpy(flow_name, "Validate Inventory"); + opt.flow = OSMT_FLOW_VALIDATE_INVENTORY; + } else if (!strcmp("s", optarg)) { + strcpy(flow_name, "Services Registration"); + opt.flow = OSMT_FLOW_SERVICE_REGISTRATION; + } else if (!strcmp("e", optarg)) { + strcpy(flow_name, "Event Forwarding"); + opt.flow = OSMT_FLOW_EVENT_FORWARDING; + } else if (!strcmp("f", optarg)) { + strcpy(flow_name, "Stress SA"); + opt.flow = OSMT_FLOW_STRESS_SA; + } else if (!strcmp("m", optarg)) { + strcpy(flow_name, "Multicast"); + opt.flow = OSMT_FLOW_MULTICAST; + } else if (!strcmp("q", optarg)) { + strcpy(flow_name, "QoS: VLArb and SLtoVL"); + opt.flow = OSMT_FLOW_QOS; + } else if (!strcmp("t", optarg)) { + strcpy(flow_name, "Trap 64/65"); + opt.flow = OSMT_FLOW_TRAP; + } else if (!strcmp("a", optarg)) { + strcpy(flow_name, "All Validations"); + opt.flow = OSMT_FLOW_ALL; + } else { + printf("\nError: unknown flow %s\n", flow_name); + exit(2); + } + break; + + case 'w': + /* + * Specifies trap 64/65 wait time + */ + CL_ASSERT(strtol(optarg, NULL, 0) < 0x100); + opt.wait_time = (uint8_t) strtol(optarg, NULL, 0); + printf("\tTrap 64/65 wait time = %d\n", opt.wait_time); + break; + + case 'm': + /* + * Specifies the max LID to search for during exploration. + */ + max_lid = (uint16_t) atoi(optarg); + printf("\tMAX-LID %u\n", max_lid); + break; + + case 'g': + /* + * Specifies port guid with which to bind. + */ + guid = cl_hton64(strtoull(optarg, NULL, 16)); + printf(" Guid <0x%" PRIx64 ">\n", cl_hton64(guid)); + break; + + case 'p': + /* + * Display current port guids + */ + guid = INVALID_GUID; + break; + + case 't': + /* + * Specifies transaction timeout. + */ + opt.transaction_timeout = strtol(optarg, NULL, 0); + printf("\tTransaction timeout = %d\n", + opt.transaction_timeout); + break; + + case 'l': + opt.log_file = optarg; + printf("\tLog File: %s\n", opt.log_file); + break; + + case 'v': + /* + * Increases log verbosity. + */ + log_flags = (log_flags << 1) | 1; + printf("\tVerbose option -v (log flags = 0x%X)\n", + log_flags); + break; + + case 'V': + /* + * Specifies maximum log verbosity. + */ + log_flags = 0xFFFFFFFF; + opt.force_log_flush = TRUE; + printf("\tEnabling maximum log verbosity\n"); + break; + + case 's': + /* + * Perform stress test. + */ + opt.stress = strtol(optarg, NULL, 0); + printf("\tStress test enabled: "); + switch (opt.stress) { + case 1: + printf("Small SA queries\n"); + break; + case 2: + printf("Large SA queries\n"); + break; + case 3: + printf("Large Path Record SA queries\n"); + break; + case 4: + printf("SA Get Path Record queries\n"); + break; + default: + printf("Unknown value %u (ignored)\n", + opt.stress); + opt.stress = 0; + break; + } + break; + + case 'M': + /* + * Perform multicast test. + */ + opt.mmode = strtol(optarg, NULL, 0); + printf("\tMulticast test enabled: "); + switch (opt.mmode) { + case 1: + printf + ("Short MC Flow - single mode (default)\n"); + break; + case 2: + printf("Short MC Flow - multiple mode\n"); + break; + case 3: + printf("Long MC Flow - single mode\n"); + break; + case 4: + printf("Long MC Flow - multiple mode\n"); + break; + default: + printf("Unknown value %u (ignored)\n", + opt.stress); + opt.mmode = 0; + break; + } + break; + + case 'd': + /* + * Debug Options + */ + printf("\tDebug Option: "); + switch (strtol(optarg, NULL, 0)) { + case 1: + printf("Ignore Path Records\n"); + opt.ignore_path_records = TRUE; + break; + case 2: + printf("Force Log Flush\n"); + opt.force_log_flush = TRUE; + break; + case 4: + printf("Use Full World Path Record Queries\n"); + opt.full_world_path_recs = TRUE; + break; + case 3: + /* Used to be memory tracking */ + default: + printf("Unknown value %ld (ignored)\n", + strtol(optarg, NULL, 0)); + break; + } + break; + + case 'h': + show_usage(); + return 0; + + case 'x': + log_flags = strtol(optarg, NULL, 0); + printf("\tVerbose option -vf (log flags = 0x%X)\n", + log_flags); + break; + case 'G': + opt.with_grh = TRUE; + break; + case -1: + printf("Done with args\n"); + break; + + default: /* something wrong */ + abort(); + } + + } + while (next_option != -1); + + printf("\tFlow = %s\n", flow_name); + + if (vendor_debug) + osm_vendor_set_debug(osm_test.p_vendor, vendor_debug); + + if (complib_init_v2() != CL_SUCCESS) { + printf("\ncomplib_init_v2 failed\n"); + status = IB_ERROR; + goto Exit; + } + + status = osmtest_init(&osm_test, &opt, (osm_log_level_t) log_flags); + if (status != IB_SUCCESS) { + printf("\nError from osmtest_init: %s\n", + ib_get_err_str(status)); + goto Exit; + } + if (cl_hton64(guid) == cl_hton64(INVALID_GUID)) { + print_all_guids(&osm_test); + complib_exit(); + return (status); + } + + /* + If the user didn't specify a GUID on the command line, + then get a port GUID value with which to bind. + */ + if (guid == 0 && !(guid = get_port_guid(&osm_test, guid))) { + printf("\nError: port guid 0x%" PRIx64 " not found\n", guid); + goto Exit; + } + + /* + * Guid may be zero going into this function if the user + * hasn't specified a binding port on the command line. + */ + status = osmtest_bind(&osm_test, max_lid, guid); + if (status != IB_SUCCESS) + exit(status); + + status = osmtest_run(&osm_test); + if (status != IB_SUCCESS) { + printf("OSMTEST: TEST \"%s\" FAIL\n", flow_name); + } else { + printf("OSMTEST: TEST \"%s\" PASS\n", flow_name); + } + osmtest_destroy(&osm_test); + + complib_exit(); + +Exit: + return (status); +} diff --git a/osmtest/osmt_inform.c b/osmtest/osmt_inform.c new file mode 100644 index 0000000..f06ec3f --- /dev/null +++ b/osmtest/osmt_inform.c @@ -0,0 +1,767 @@ +/* + * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifdef OSM_VENDOR_INTF_MTL +/* + * Abstract: + * Implementation of InformInfo testing flow.. + * Top level is osmt_run_inform_info_flow: + * osmt_bind_inform_qp + * osmt_reg_unreg_inform_info + * osmt_send_trap_wait_for_forward + * + */ + +#include +#include +#include +#include +#include +#include +#include "osmtest.h" +#include "osmt_inform.h" + +/* + * Prepare an asynchronous QP (rcv) for sending inform info and + * handling the incoming reports. + * + */ +ib_api_status_t +osmt_bind_inform_qp(IN osmtest_t * const p_osmt, OUT osmt_qp_ctx_t * p_qp_ctx) +{ + ib_net64_t port_guid; + VAPI_hca_hndl_t hca_hndl; + VAPI_hca_id_t hca_id; + uint32_t port_num; + VAPI_ret_t vapi_ret; + IB_MGT_ret_t mgt_ret; + uint8_t hca_index; + osm_log_t *p_log = &p_osmt->log; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + port_guid = p_osmt->local_port.port_guid; + + OSM_LOG(p_log, OSM_LOG_DEBUG, "Binding to port 0x%" PRIx64 "\n", + cl_ntoh64(port_guid)); + + /* obtain the hca name and port num from the guid */ + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Finding CA and Port that owns port guid 0x%" PRIx64 "\n", + port_guid); + + mgt_ret = + osm_vendor_get_guid_ca_and_port(p_osmt->p_vendor, + port_guid, + &hca_hndl, + &hca_id[0], &hca_index, &port_num); + if (mgt_ret != IB_MGT_OK) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0109: " + "Unable to obtain CA and port (%d).\n"); + status = IB_ERROR; + goto Exit; + } +#define OSMT_MTL_REVERSE_QP1_WELL_KNOWN_Q_KEY 0x80010000 + + strncpy(p_qp_ctx->qp_bind_hndl.hca_id, hca_id, sizeof(hca_id)); + p_qp_ctx->qp_bind_hndl.hca_hndl = hca_hndl; + p_qp_ctx->qp_bind_hndl.port_num = port_num; + p_qp_ctx->qp_bind_hndl.max_outs_sq = 10; + p_qp_ctx->qp_bind_hndl.max_outs_rq = 10; + p_qp_ctx->qp_bind_hndl.qkey = OSMT_MTL_REVERSE_QP1_WELL_KNOWN_Q_KEY; + + vapi_ret = osmt_mtl_init_opened_hca(&p_qp_ctx->qp_bind_hndl); + if (vapi_ret != VAPI_OK) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0114: " + "Error initializing QP.\n"); + status = IB_ERROR; + goto Exit; + } + + /* we use the pre-allocated buffers for send and receive : + send from buf[0] + receive from buf[2] + */ + p_qp_ctx->p_send_buf = + (uint8_t *) p_qp_ctx->qp_bind_hndl.buf_ptr + GRH_LEN; + p_qp_ctx->p_recv_buf = + (uint8_t *) p_qp_ctx->qp_bind_hndl.buf_ptr + 2 * (GRH_LEN + + MAD_BLOCK_SIZE); + + /* Need to clear assigned memory of p_send_buf - before using it to send any data */ + memset(p_qp_ctx->p_send_buf, 0, MAD_BLOCK_SIZE); + + status = IB_SUCCESS; + OSM_LOG(p_log, OSM_LOG_DEBUG, "Initialized QP:0x%X in VAPI Mode\n", + p_qp_ctx->qp_bind_hndl.qp_id); + + OSM_LOG(p_log, OSM_LOG_DEBUG, "Binding to IB_MGT SMI\n"); + + /* we also need a QP0 handle for sending packets */ + mgt_ret = IB_MGT_get_handle(hca_id, port_num, IB_MGT_SMI, + &(p_qp_ctx->ib_mgt_qp0_handle)); + if (IB_MGT_OK != mgt_ret) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0115: " + "Error obtaining IB_MGT handle to SMI\n"); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +/* + * Close the QP + */ +void +osmt_unbind_inform_qp(IN osmtest_t * const p_osmt, IN osmt_qp_ctx_t * p_qp_ctx) +{ + osm_log_t *p_log = &p_osmt->log; + + OSM_LOG_ENTER(p_log); + + osmt_mtl_mad_cleanup(&p_qp_ctx->qp_bind_hndl); + + IB_MGT_release_handle(p_qp_ctx->ib_mgt_qp0_handle); + + OSM_LOG(p_log, OSM_LOG_DEBUG, "Unbind QP handles\n"); + OSM_LOG_EXIT(&p_osmt->log); +} + +/* + * Register/Unregister to receive the given InformInfo + * + * Uses the qp context to send the inform info mad. + * Wait for GetResp(InformInfoResp) + * + */ +ib_api_status_t +osmt_reg_unreg_inform_info(IN osmtest_t * p_osmt, + IN osmt_qp_ctx_t * p_qp_ctx, + IN ib_inform_info_t * p_inform_info, + IN uint8_t reg_flag) +{ + ib_sa_mad_t *p_sa_mad = (ib_sa_mad_t *) (p_qp_ctx->p_send_buf); + ib_inform_info_t *p_ii = ib_sa_mad_get_payload_ptr(p_sa_mad); /* SA Payload */ + VAPI_ret_t vapi_ret; + VAPI_wc_desc_t wc_desc; + VAPI_ud_av_hndl_t avh; + static VAPI_wr_id_t wrid = 16198; + osm_log_t *p_log = &p_osmt->log; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + /* init the MAD */ + ib_mad_init_new((ib_mad_t *) p_sa_mad, + IB_MCLASS_SUBN_ADM, + (uint8_t) 2, + IB_MAD_METHOD_SET, cl_hton64(wrid), (ib_net16_t) 0, 0); + wrid++; + p_sa_mad->attr_id = IB_MAD_ATTR_INFORM_INFO; + + /* copy the reference inform info */ + memcpy(p_ii, p_inform_info, sizeof(ib_inform_info_t)); + + if (reg_flag) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Subscribing InformInfo: Traps from lid:0x%X to 0x%X, trap num :0x%X\n", + p_ii->lid_range_begin, p_ii->lid_range_end, + p_ii->g_or_v.generic.trap_num); + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "UnSubscribing InformInfo: Traps from lid:0x%X to 0x%X\n", + p_ii->lid_range_begin, p_ii->lid_range_end); + } + + /* set the subscribe bit */ + if (reg_flag) { + p_ii->subscribe = 1; + } else { + p_ii->subscribe = 0; + /* + * we need to set the QPN on the mad if we unsubscribe: + * o13-2.1.1 - QPN Field need to be set when unsubscribing. + */ + ib_inform_info_set_qpn(p_ii, + cl_hton32(p_qp_ctx->qp_bind_hndl.qp_id. + qp_num)); + } + + osm_dump_inform_info(&p_osmt->log, p_ii, OSM_LOG_DEBUG); + + /* --------------------- PREP ------------------------- */ + if (osmt_mtl_mad_post_recv_bufs(&p_qp_ctx->qp_bind_hndl, p_qp_ctx->p_recv_buf, 1, /* but we need only one mad at a time */ + GRH_LEN + MAD_BLOCK_SIZE, wrid) != 1) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0120: " + "Error posting recv bufs\n"); + status = IB_ERROR; + goto Exit; + } + OSM_LOG(p_log, OSM_LOG_DEBUG, "Posted recv bufs\n"); + + vapi_ret = + osmt_mtl_create_av(&p_qp_ctx->qp_bind_hndl, + p_osmt->local_port.sm_lid, &avh); + if (vapi_ret != VAPI_OK) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0121: " + "Error Preparing AVH (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_ERROR; + goto Exit; + } + OSM_LOG(p_log, OSM_LOG_DEBUG, "Prepared AVH\n"); + + if (osm_log_is_active(p_log, OSM_LOG_DEBUG)) { + osm_dump_sa_mad(p_log, (ib_sa_mad_t *) (p_qp_ctx->p_send_buf), + OSM_LOG_DEBUG); +#if 0 + for (i = 56; i < 253; i++) { + if (i % 8 == 0) { + printf("\n %d : ", i); + } + printf("0x%02X ", p_qp_ctx->p_send_buf[i]); + } +#endif + printf("\n"); + } + + /* --------------------- SEND ------------------------- */ + vapi_ret = osmt_mtl_mad_send(&p_qp_ctx->qp_bind_hndl, wrid, p_qp_ctx->p_send_buf, 1, /* SA is QP1 */ + 0, /* SL is 0 */ + OSMT_MTL_REVERSE_QP1_WELL_KNOWN_Q_KEY, + avh); + if (vapi_ret != VAPI_OK) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0122: " + "Error sending mad (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_ERROR; + goto Exit; + } + + vapi_ret = osmt_mtl_mad_poll4cqe(p_qp_ctx->qp_bind_hndl.hca_hndl, + p_qp_ctx->qp_bind_hndl.sq_cq_hndl, + &wc_desc, 20, 10000, NULL); + if (vapi_ret != VAPI_OK) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0123: " + "Error getting send completion (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_ERROR; + goto Exit; + } + + if (wc_desc.status != VAPI_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0124: " + "Error on send completion (%s) (%d)\n", + VAPI_strerror_sym(wc_desc.status), wc_desc.status); + status = IB_ERROR; + goto Exit; + } + OSM_LOG(p_log, OSM_LOG_DEBUG, "Sent MAD\n"); + + /* --------------------- RECV ------------------------- */ + vapi_ret = osmt_mtl_mad_poll4cqe(p_qp_ctx->qp_bind_hndl.hca_hndl, + p_qp_ctx->qp_bind_hndl.rq_cq_hndl, + &wc_desc, 20, 10000, &avh); + if (vapi_ret != VAPI_SUCCESS) { + if (vapi_ret == VAPI_CQ_EMPTY) { + status = IB_TIMEOUT; + } else { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0125: " + "Error receiving mad (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_ERROR; + } + goto Exit; + } + + /* check to see if successful - by examination of the subscribe bit */ + p_sa_mad = (ib_sa_mad_t *) (p_qp_ctx->p_recv_buf + GRH_LEN); + + if (p_sa_mad->status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "Remote error = %s\n", + ib_get_mad_status_str((ib_mad_t *) p_sa_mad)); + status = IB_REMOTE_ERROR; + goto Exit; + } + + if (p_sa_mad->method != IB_MAD_METHOD_GET_RESP) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Expected IB_MAD_METHOD_GET_RESP but got:(%X)\n", + p_sa_mad->method); + status = IB_REMOTE_ERROR; + goto Exit; + } + + if (p_sa_mad->attr_id != IB_MAD_ATTR_INFORM_INFO) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Expected IB_MAD_ATTR_INFORM_INFO but got:(%X)\n", + cl_ntoh16(p_sa_mad->attr_id)); + status = IB_REMOTE_ERROR; + goto Exit; + } + + p_ii = ib_sa_mad_get_payload_ptr(p_sa_mad); + if (!p_ii->subscribe) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0126: " + "Subscribe/Unsubscribe Failed\n"); + status = IB_REMOTE_ERROR; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +/* + * Send a trap (Subn LID Route) Trap(Notice) through the regular + * connection QP connection (targeted at QP0) + * + * Wait for the trap repress + */ +ib_api_status_t +osmt_send_trap_wait_for_forward(IN osmtest_t * const p_osmt, + IN osmt_qp_ctx_t * p_qp_ctx) +{ + ib_smp_t *p_smp = (ib_smp_t *) (p_qp_ctx->p_send_buf); + ib_mad_notice_attr_t *p_ntc = ib_smp_get_payload_ptr(p_smp); + ib_sa_mad_t *p_sa_mad; + IB_MGT_ret_t mgt_res; + VAPI_ret_t vapi_ret; + VAPI_wc_desc_t wc_desc; + VAPI_ud_av_hndl_t avh; + IB_ud_av_t av; + static VAPI_wr_id_t wrid = 2222; + osm_log_t *p_log = &p_osmt->log; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_INFO, + "Sending Traps to QP0 of SA LID:0x%X\n", + p_osmt->local_port.sm_lid); + + /* init the MAD */ + memset(p_smp, 0, sizeof(ib_smp_t)); + ib_mad_init_new((ib_mad_t *) p_smp, + IB_MCLASS_SUBN_LID, + (uint8_t) 2, + IB_MAD_METHOD_TRAP, cl_hton64(wrid), (ib_net16_t) 0, 0); + + wrid++; + p_smp->attr_id = IB_MAD_ATTR_NOTICE; + + /* prepare the notice */ + p_ntc->generic_type = 0x82; /* generic, type = 2 */ + ib_notice_set_prod_type_ho(p_ntc, 1); + p_ntc->g_or_v.generic.trap_num = cl_hton16(0x26); + p_ntc->issuer_lid = cl_hton16(2); + + /* --------------------- PREP ------------------------- */ + if (osmt_mtl_mad_post_recv_bufs(&p_qp_ctx->qp_bind_hndl, p_qp_ctx->p_recv_buf, 1, /* we need to receive both trap repress and report */ + GRH_LEN + MAD_BLOCK_SIZE, wrid) != 1) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0127: " + "Error posting recv bufs\n"); + status = IB_ERROR; + goto Exit; + } + OSM_LOG(p_log, OSM_LOG_DEBUG, "Posted recv bufs\n"); + + av.dlid = p_osmt->local_port.sm_lid; + av.grh_flag = FALSE; + + /* EZ: returned in HACK: use constants */ + av.static_rate = 0; /* p_mad_addr->static_rate; */ + av.src_path_bits = 1; /* p_mad_addr->path_bits; */ + av.sl = 0; /* p_mad_addr->addr_type.gsi.service_level; */ + + OSM_LOG(p_log, OSM_LOG_DEBUG, + "av.dlid 0x%X, av.static_rate %d, av.path_bits %d\n", + cl_ntoh16(av.dlid), av.static_rate, av.src_path_bits); + + /* send it */ + mgt_res = IB_MGT_send_mad(p_qp_ctx->ib_mgt_qp0_handle, p_smp, /* actual payload */ + &av, /* address vector */ + wrid, /* casting the mad wrapper pointer for err cb */ + p_osmt->opt.transaction_timeout); + if (mgt_res != IB_MGT_OK) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0128: " + "Error sending mad (%d)\n", mgt_res); + status = IB_ERROR; + goto Exit; + } + + vapi_ret = + osmt_mtl_create_av(&p_qp_ctx->qp_bind_hndl, + p_osmt->local_port.sm_lid, &avh); + if (vapi_ret != VAPI_OK) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0129: " + "Error Preparing AVH (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_ERROR; + goto Exit; + } + OSM_LOG(p_log, OSM_LOG_DEBUG, "Prepared AVH\n"); + + OSM_LOG(p_log, OSM_LOG_DEBUG, "Trap MAD Sent\n"); + + /* --------------------- RECV ------------------------- */ + vapi_ret = osmt_mtl_mad_poll4cqe(p_qp_ctx->qp_bind_hndl.hca_hndl, + p_qp_ctx->qp_bind_hndl.rq_cq_hndl, + &wc_desc, 200, 10000, &avh); + if (vapi_ret != VAPI_SUCCESS) { + if (vapi_ret == VAPI_CQ_EMPTY) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0130: " + "Timeout receiving mad (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_TIMEOUT; + } else { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0131: " + "Error receiving mad (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_ERROR; + } + goto Exit; + } + + /* check to see if successful - by examination of the subscribe bit */ + p_sa_mad = (ib_sa_mad_t *) (p_qp_ctx->p_recv_buf + GRH_LEN); + + if (p_sa_mad->method == IB_MAD_METHOD_REPORT) { + if (p_sa_mad->attr_id == IB_MAD_ATTR_NOTICE) { + OSM_LOG(p_log, OSM_LOG_INFO, "Received the Report!\n"); + status = IB_SUCCESS; + } else { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 1020" + "Did not receive a Report(Notice) but attr:%d\n", + cl_ntoh16(p_sa_mad->attr_id)); + status = IB_ERROR; + } + } else { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 1020" + "Received an Unexpected Method:%d\n", p_smp->method); + status = IB_ERROR; + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +/* + * Wait for a trap on QPn + * + */ +ib_api_status_t +osmt_trap_wait(IN osmtest_t * const p_osmt, IN osmt_qp_ctx_t * p_qp_ctx) +{ + ib_smp_t *p_smp = (ib_smp_t *) (p_qp_ctx->p_send_buf); + ib_sa_mad_t *p_sa_mad; + VAPI_ret_t vapi_ret; + VAPI_wc_desc_t wc_desc; + osm_log_t *p_log = &p_osmt->log; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(p_log); + + OSM_LOG(p_log, OSM_LOG_INFO, + "Waiting for Traps under QP:0x%X of SA LID:0x%X\n", + cl_ntoh16(p_osmt->local_port.sm_lid)); + + /* --------------------- RECV ------------------------- */ + vapi_ret = osmt_mtl_mad_poll4cqe(p_qp_ctx->qp_bind_hndl.hca_hndl, + p_qp_ctx->qp_bind_hndl.rq_cq_hndl, + &wc_desc, + // 200, + p_osmt->opt.wait_time * 100, + 10000, NULL); + if (vapi_ret != VAPI_SUCCESS) { + if (vapi_ret == VAPI_CQ_EMPTY) { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0130: " + "Timeout receiving mad (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_TIMEOUT; + } else { + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0131: " + "Error receiving mad (%s)\n", + VAPI_strerror_sym(vapi_ret)); + status = IB_ERROR; + } + goto Exit; + } + + /* check to see if successful - by examination of the subscribe bit */ + p_sa_mad = (ib_sa_mad_t *) (p_qp_ctx->p_recv_buf + GRH_LEN); + + if (p_sa_mad->method == IB_MAD_METHOD_REPORT) { + if (p_sa_mad->attr_id == IB_MAD_ATTR_NOTICE) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Received the Report!\n"); + status = IB_SUCCESS; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 1020" + "Did not receive a Report(Notice) but attr:%d\n", + cl_ntoh16(p_sa_mad->attr_id)); + status = IB_ERROR; + } + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 1020" + "Received an Unexpected Method:%d\n", p_smp->method); + status = IB_ERROR; + } + +Exit: + OSM_LOG_EXIT(p_log); + return status; +} + +/* + * Initialize an inform info attribute: + * Catch all traps in the lid range of the p_osmt + * + */ +ib_api_status_t +osmt_init_inform_info(IN osmtest_t * const p_osmt, OUT ib_inform_info_t * p_ii) +{ + + memset(p_ii, 0, sizeof(ib_inform_info_t)); + /* p_ii->lid_range_begin = cl_hton16(1); */ + p_ii->lid_range_begin = 0xFFFF; + p_ii->lid_range_end = cl_hton16(p_osmt->max_lid); + p_ii->is_generic = 1; /* have to choose */ + p_ii->trap_type = 0xFFFF; /* ALL */ + p_ii->g_or_v.generic.trap_num = 0xFFFF; /* ALL */ + p_ii->g_or_v.generic.node_type_lsb = 0xFFFF; /* ALL */ + p_ii->g_or_v.generic.node_type_msb = 0xFF; /* ALL */ + return IB_SUCCESS; +} + +ib_api_status_t +osmt_init_inform_info_by_trap(IN osmtest_t * const p_osmt, + IN ib_net16_t trap_num, + OUT ib_inform_info_t * p_ii) +{ + + memset(p_ii, 0, sizeof(ib_inform_info_t)); + /* p_ii->lid_range_begin = cl_hton16(1); */ + p_ii->lid_range_begin = 0xFFFF; + p_ii->lid_range_end = cl_hton16(p_osmt->max_lid); + p_ii->is_generic = 1; /* have to choose */ + p_ii->trap_type = 0xFFFF; /* ALL */ + p_ii->g_or_v.generic.trap_num = trap_num; /* ALL */ + p_ii->g_or_v.generic.node_type_lsb = 0xFFFF; /* ALL */ + p_ii->g_or_v.generic.node_type_msb = 0xFF; /* ALL */ + return IB_SUCCESS; +} + +/* + * Run a complete inform info test flow: + * - try to unregister inform info (should fail) + * - register an inform info + * - try to unregister inform info (should succeed) + * - register an inform info + * - send a trap - sleep + * - check that a Report(Notice) arrived that matches the sent trap + * + */ +ib_api_status_t osmt_run_inform_info_flow(IN osmtest_t * const p_osmt) +{ + ib_inform_info_t inform_info; + ib_api_status_t status; + osmt_qp_ctx_t qp_ctx; + + OSM_LOG_ENTER(&p_osmt->log); + + /* bind the QP */ + status = osmt_bind_inform_qp(p_osmt, &qp_ctx); + if (status != IB_SUCCESS) { + goto Exit; + } + + /* init the inform info */ + osmt_init_inform_info(p_osmt, &inform_info); + + /* first try to unsubscribe */ + status = osmt_reg_unreg_inform_info(p_osmt, &qp_ctx, &inform_info, 0); + /* WAS IB_REMOTE_ERROR */ + if (status != IB_REMOTE_ERROR) { + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Error during UnSubscribe: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected Failure to UnSubscribe non existing InformInfo\n"); + status = IB_ERROR; + goto Exit; + } + } + + /* send the inform info registration */ + status = osmt_reg_unreg_inform_info(p_osmt, &qp_ctx, &inform_info, 1); + if (status != IB_SUCCESS) { + goto Exit; + } + + /* send a trap through QP0 and wait on QPN */ + status = osmt_send_trap_wait_for_forward(p_osmt, &qp_ctx); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Error during Send Trap and Wait For Report: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* try to unsubscribe for cleanup */ + status = osmt_reg_unreg_inform_info(p_osmt, &qp_ctx, &inform_info, 0); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Error during UnSubscribe: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } else { + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Remote Error during UnSubscribe\n"); + status = IB_ERROR; + goto Exit; + } + } + +Exit: + osmt_unbind_inform_qp(p_osmt, &qp_ctx); + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +/* + * Run a complete inform info test flow: + * - try to unregister inform info (should fail) + * - register an inform info + * - try to unregister inform info (should succeed) + * - register an inform info + * - send a trap - sleep + * - check that a Report(Notice) arrived that match the sent one + * + */ +ib_api_status_t osmt_run_trap64_65_flow(IN osmtest_t * const p_osmt) +{ + ib_inform_info_t inform_info; + ib_api_status_t status; + osmt_qp_ctx_t qp_ctx; + + OSM_LOG_ENTER(&p_osmt->log); + + /* bind the QP */ + status = osmt_bind_inform_qp(p_osmt, &qp_ctx); + if (status != IB_SUCCESS) { + goto Exit; + } + + /* init the inform info */ + osmt_init_inform_info_by_trap(p_osmt, cl_hton16(64), &inform_info); + + /* send the inform info registration */ + status = osmt_reg_unreg_inform_info(p_osmt, &qp_ctx, &inform_info, 1); + if (status != IB_SUCCESS) { + goto Exit; + } + + /*--------------------- PREP -------------------------*/ + if (osmt_mtl_mad_post_recv_bufs(&qp_ctx.qp_bind_hndl, qp_ctx.p_recv_buf, 1, /* we need to receive the report */ + GRH_LEN + MAD_BLOCK_SIZE, 1) != 1) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0127: " + "Error posting recv bufs for trap 64\n"); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "Posted recv bufs for trap 64\n"); + + /* init the inform info */ + osmt_init_inform_info_by_trap(p_osmt, cl_hton16(65), &inform_info); + + /* send the inform info registration */ + status = osmt_reg_unreg_inform_info(p_osmt, &qp_ctx, &inform_info, 1); + if (status != IB_SUCCESS) { + goto Exit; + } + + /*--------------------- PREP -------------------------*/ + if (osmt_mtl_mad_post_recv_bufs(&qp_ctx.qp_bind_hndl, qp_ctx.p_recv_buf, 1, /* we need to receive the report */ + GRH_LEN + MAD_BLOCK_SIZE, 1) != 1) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0127: " + "Error posting recv bufs for trap 65\n"); + status = IB_ERROR; + goto Exit; + } + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "Posted recv bufs for trap 65\n"); + + /* Sleep for x seconds in order to allow external script trap generation */ +#if 0 + sleep(p_osmt->opt.wait_time); +#endif + + /* wait for a trap on QPN */ + status = osmt_trap_wait(p_osmt, &qp_ctx); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Error during Send Trap and Wait For Report: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* try to unsubscribe for cleanup */ + status = osmt_reg_unreg_inform_info(p_osmt, &qp_ctx, &inform_info, 0); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Error during UnSubscribe: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + osmt_unbind_inform_qp(p_osmt, &qp_ctx); + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +#endif /* OSM_VENDOR_INTF_MTL */ diff --git a/osmtest/osmt_mtl_regular_qp.c b/osmtest/osmt_mtl_regular_qp.c new file mode 100644 index 0000000..72015bf --- /dev/null +++ b/osmtest/osmt_mtl_regular_qp.c @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifdef OSM_VENDOR_INTF_MTL + +/* - Mellanox Confidential and Proprietary - + * + * Copyright (C) Jul. 2001, Mellanox Technologies Ltd. ALL RIGHTS RESERVED. + * + * Except as specifically permitted herein, no portion of the information, + * including but not limited to object code and source code, may be reproduced, + * modified, distributed, republished or otherwise exploited in any form or by + * any means for any purpose without the prior written permission of Mellanox + * Technologies Ltd. Use of software subject to the terms and conditions + * detailed in the file "LICENSE.txt". + * + * End of legal section ...................................................... + * + * osmt_mtl_regular_qp.c - + * Provide Simple Interface for Sending and Receiving MADS through a regular QP + * + * Creation date: + * + * Version: $Id$ + * + * Authors: + * Eitan Zahavi + * + * Changes: + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +/* + * Initialize the QP etc. + * Given in res: port_num, max_outs_sq, max_outs_rq + */ +VAPI_ret_t osmt_mtl_get_qp_resources(IN OUT osmt_mtl_mad_res_t * res) +{ + VAPI_ret_t ret; + VAPI_hca_port_t hca_port_info; + VAPI_qp_init_attr_t qp_init_attr; + VAPI_qp_prop_t qp_prop; + VAPI_cqe_num_t act_num; + + /* Get HCA LID */ + ret = + VAPI_query_hca_port_prop(res->hca_hndl, res->port_num, + &hca_port_info); + VAPI_CHECK_RET; + res->slid = hca_port_info.lid; + + /* Get a PD */ + ret = VAPI_alloc_pd(res->hca_hndl, &(res->pd_hndl)); + VAPI_CHECK_RET; + + /* Create CQ for RQ and SQ *//* TBD - Check we have enough act nums */ + ret = + VAPI_create_cq(res->hca_hndl, res->max_outs_sq + 1, + &(res->sq_cq_hndl), &act_num); + VAPI_CHECK_RET; + ret = + VAPI_create_cq(res->hca_hndl, res->max_outs_rq + 1, + &(res->rq_cq_hndl), &act_num); + VAPI_CHECK_RET; + + /* register event handlers for polling(block mode) internal use */ + /* ret= EVAPI_set_comp_eventh(res->hca_hndl,res->rq_cq_hndl, */ + /* EVAPI_POLL_CQ_UNBLOCK_HANDLER,NULL,&(res->rq_cq_eventh)); */ + /* VAPI_CHECK_RET; */ + /* ret= EVAPI_set_comp_eventh(res->hca_hndl,res->sq_cq_hndl, */ + /* EVAPI_POLL_CQ_UNBLOCK_HANDLER,NULL,&(res->sq_cq_eventh)); */ + /* VAPI_CHECK_RET; */ + + /* Create QP */ + qp_init_attr.cap.max_oust_wr_sq = res->max_outs_sq + 1; + qp_init_attr.cap.max_oust_wr_rq = res->max_outs_rq + 1; + qp_init_attr.cap.max_sg_size_sq = 4; + qp_init_attr.cap.max_sg_size_rq = 4; + + qp_init_attr.pd_hndl = res->pd_hndl; + qp_init_attr.rdd_hndl = 0; + qp_init_attr.rq_cq_hndl = res->rq_cq_hndl; + qp_init_attr.rq_sig_type = VAPI_SIGNAL_ALL_WR; /* That's default for IB */ + qp_init_attr.sq_cq_hndl = res->sq_cq_hndl; + qp_init_attr.sq_sig_type = VAPI_SIGNAL_REQ_WR; + qp_init_attr.ts_type = VAPI_TS_UD; + + ret = + VAPI_create_qp(res->hca_hndl, &qp_init_attr, &(res->qp_hndl), + &qp_prop); + VAPI_CHECK_RET; + res->qp_id.qp_num = qp_prop.qp_num; + + return (VAPI_OK); +} + +VAPI_ret_t osmt_mtl_qp_init(osmt_mtl_mad_res_t * res) +{ + VAPI_ret_t ret; + + VAPI_qp_attr_t qp_attr; + VAPI_qp_attr_mask_t qp_attr_mask; + VAPI_qp_cap_t qp_cap; + + /* + * Change QP to INIT + * + */ + QP_ATTR_MASK_CLR_ALL(qp_attr_mask); + qp_attr.qp_state = VAPI_INIT; + QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE); + qp_attr.pkey_ix = 0; + QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX); + qp_attr.port = res->port_num; + QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PORT); + qp_attr.qkey = res->qkey; + QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QKEY); + + /* If I do not set this mask, I get an error from HH. QPM should catch it */ + ret = + VAPI_modify_qp(res->hca_hndl, res->qp_hndl, &qp_attr, &qp_attr_mask, + &qp_cap); + VAPI_CHECK_RET; + + return (ret); + +} + +VAPI_ret_t osmt_mtl_qp_2_rtr_rts(osmt_mtl_mad_res_t * res) +{ + VAPI_ret_t ret; + + VAPI_qp_attr_t qp_attr; + VAPI_qp_attr_mask_t qp_attr_mask; + VAPI_qp_cap_t qp_cap; + + /* + * Change QP to RTR + * + */ + QP_ATTR_MASK_CLR_ALL(qp_attr_mask); + qp_attr.qp_state = VAPI_RTR; + QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE); + /* qp_attr.rq_psn = 0; */ + /* QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_RQ_PSN); */ + + ret = + VAPI_modify_qp(res->hca_hndl, res->qp_hndl, &qp_attr, &qp_attr_mask, + &qp_cap); + VAPI_CHECK_RET; + + /* + * Change QP to RTS + * + */ + QP_ATTR_MASK_CLR_ALL(qp_attr_mask); + qp_attr.qp_state = VAPI_RTS; + QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE); + qp_attr.sq_psn = 0; + QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_SQ_PSN); + + ret = + VAPI_modify_qp(res->hca_hndl, res->qp_hndl, &qp_attr, &qp_attr_mask, + &qp_cap); + VAPI_CHECK_RET; + + return (ret); +} + +VAPI_ret_t osmt_mtl_mad_create_mr(osmt_mtl_mad_res_t * res) +{ + + VAPI_ret_t ret; + + VAPI_mrw_t mr_in, mr_out; + + res->buf_size = + (MAD_SIZE + GRH_LEN) * (res->max_outs_sq + res->max_outs_rq + 1); + + /* Register single memory address region for all buffers */ + res->buf_ptr = VMALLOC(res->buf_size); + + if (res->buf_ptr == ((VAPI_virt_addr_t) NULL)) { + ret = VAPI_EAGAIN; + VAPI_CHECK_RET; + } + + /* Enable local and remote access to memory region */ + mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE; + mr_in.l_key = 0; + mr_in.pd_hndl = res->pd_hndl; + mr_in.r_key = 0; + mr_in.size = res->buf_size; + ASSERT_VOIDP2UINTN(res->buf_ptr); + mr_in.start = (VAPI_virt_addr_t) (res->buf_ptr); + mr_in.type = VAPI_MR; + + ret = VAPI_register_mr(res->hca_hndl, &mr_in, &(res->mr_hndl), &mr_out); + VAPI_CHECK_RET; + + res->l_key = mr_out.l_key; + + return (ret); +} + +VAPI_ret_t osmt_mtl_init_opened_hca(osmt_mtl_mad_res_t * res) +{ + VAPI_ret_t ret; + + res->pd_hndl = VAPI_INVAL_HNDL; + res->rq_cq_hndl = VAPI_INVAL_HNDL; + res->sq_cq_hndl = VAPI_INVAL_HNDL; + res->sq_cq_eventh = VAPI_INVAL_HNDL; + res->rq_cq_eventh = VAPI_INVAL_HNDL; + res->qp_hndl = VAPI_INVAL_HNDL; + res->mr_hndl = VAPI_INVAL_HNDL; + + /* + * Create QP + * + */ + ret = osmt_mtl_get_qp_resources(res); + if (ret != VAPI_OK) { + return ret; + } + + /* + * Move to init + * + */ + ret = osmt_mtl_qp_init(res); + if (ret != VAPI_OK) { + return ret; + } + + /* + * Initialize memory regions + * + */ + ret = osmt_mtl_mad_create_mr(res); + if (ret != VAPI_OK) { + return ret; + } + + /* only now move to RTR and RTS */ + ret = osmt_mtl_qp_2_rtr_rts(res); + if (ret != VAPI_OK) { + return ret; + } + + return VAPI_OK; +} + +VAPI_ret_t osmt_mtl_mad_cleanup(osmt_mtl_mad_res_t * res) +{ + if (res->qp_hndl != VAPI_INVAL_HNDL) { + VAPI_destroy_qp(res->hca_hndl, res->qp_hndl); + } + if (res->sq_cq_eventh != VAPI_INVAL_HNDL) { + EVAPI_clear_comp_eventh(res->hca_hndl, res->sq_cq_eventh); + } + if (res->rq_cq_eventh != VAPI_INVAL_HNDL) { + EVAPI_clear_comp_eventh(res->hca_hndl, res->rq_cq_eventh); + } + if (res->rq_cq_hndl != VAPI_INVAL_HNDL) { + VAPI_destroy_cq(res->hca_hndl, res->rq_cq_hndl); + } + if (res->sq_cq_hndl != VAPI_INVAL_HNDL) { + VAPI_destroy_cq(res->hca_hndl, res->sq_cq_hndl); + } + if (res->mr_hndl != VAPI_INVAL_HNDL) { + VAPI_deregister_mr(res->hca_hndl, res->mr_hndl); + } + if (res->pd_hndl != VAPI_INVAL_HNDL) { + VAPI_dealloc_pd(res->hca_hndl, res->pd_hndl); + } +#if 0 + /* open/close of HCA should be done system wide - not per application */ + if (res->hca_hndl != VAPI_INVAL_HNDL) { + VAPI_close_hca(res->hca_hndl); /* TBD: HCA_open/close should be done on a system wide basis */ + } +#endif + return VAPI_OK; +} + +VAPI_ret_t osmt_mtl_create_av(osmt_mtl_mad_res_t * res, int16_t dlid, + VAPI_ud_av_hndl_t * avh_p) +{ + VAPI_ud_av_t av; + VAPI_ret_t ret; + + av.dlid = dlid; + av.port = res->port_num; + av.sl = 0; /* dest->sl; */ + av.src_path_bits = 0; /* dest->ee_dlid.dst_path_bits; */ + av.static_rate = 0; + /* GRH ? */ + av.grh_flag = 0; + + ret = VAPI_create_addr_hndl(res->hca_hndl, res->pd_hndl, &av, avh_p); + if (ret != VAPI_OK) { + MTL_ERROR1("%s: failed VAPI_create_addr_hndl (%s)\n", __func__, + VAPI_strerror_sym(ret)); + return ret; + } + return VAPI_OK; +} + +VAPI_ret_t osmt_mtl_mad_send(osmt_mtl_mad_res_t * res, VAPI_wr_id_t id, + void *mad, VAPI_qp_num_t dest_qp, IB_sl_t sl, + u_int32_t dest_qkey, VAPI_ud_av_hndl_t avh) +{ + VAPI_sr_desc_t sr; + VAPI_sg_lst_entry_t sg_entry; + VAPI_ret_t ret; + + /* building SEND request */ + sr.opcode = VAPI_SEND; + sr.remote_ah = avh; + sr.remote_qp = dest_qp; + sr.remote_qkey = dest_qkey; + + sr.id = id; + sr.set_se = FALSE; + sr.fence = FALSE; + sr.comp_type = VAPI_SIGNALED; + sr.sg_lst_len = 1; + sr.sg_lst_p = &sg_entry; + ASSERT_VOIDP2UINTN(mad); + sg_entry.addr = (VAPI_virt_addr_t) (mad); + sg_entry.len = MAD_SIZE; + sg_entry.lkey = res->l_key; + + ret = VAPI_post_sr(res->hca_hndl, res->qp_hndl, &sr); + if (ret != VAPI_OK) { + MTL_ERROR1(__FUNCTION__ ": failed VAPI_post_sr (%s)\n", + VAPI_strerror_sym(ret)); + return ret; + } + + return VAPI_OK; +} + +int osmt_mtl_mad_post_recv_bufs(osmt_mtl_mad_res_t * res, void *buf_array, + u_int32_t num_o_bufs, u_int32_t size, + VAPI_wr_id_t start_id) +{ + uint32_t i; + void *cur_buf; + VAPI_rr_desc_t rr; + VAPI_sg_lst_entry_t sg_entry; + VAPI_ret_t ret; + + rr.opcode = VAPI_RECEIVE; + rr.comp_type = VAPI_SIGNALED; /* All with CQE (IB compliant) */ + rr.sg_lst_len = 1; /* single buffers */ + rr.sg_lst_p = &sg_entry; + sg_entry.lkey = res->l_key; + cur_buf = buf_array; + for (i = 0; i < num_o_bufs; i++) { + rr.id = start_id + i; /* WQE id used is the index to buffers ptr array */ + ASSERT_VOIDP2UINTN(cur_buf); + sg_entry.addr = (VAPI_virt_addr_t) cur_buf; + sg_entry.len = size; + memset(cur_buf, 0x00, size); /* fill with 0 */ + ret = VAPI_post_rr(res->hca_hndl, res->qp_hndl, &rr); + if (ret != VAPI_OK) { + MTL_ERROR1(__FUNCTION__ + ": failed posting RQ WQE (%s)\n", + VAPI_strerror_sym(ret)); + return i; + } + MTL_DEBUG4(__FUNCTION__ ": posted buf at %p\n", cur_buf); + cur_buf += size; + } + + return i; /* num of buffers posted */ +} + +VAPI_ret_t osmt_mtl_mad_poll4cqe(VAPI_hca_hndl_t hca, VAPI_cq_hndl_t cq, + VAPI_wc_desc_t * wc_desc_p, + u_int32_t max_poll, u_int32_t poll_sleep, + VAPI_ud_av_hndl_t * avh_p) +{ + VAPI_ret_t ret = VAPI_CQ_EMPTY; + u_int32_t poll_cnt = 0; + + /* wait for something to arrive */ + while ((ret == VAPI_CQ_EMPTY) && (poll_cnt < max_poll)) { + ret = VAPI_poll_cq(hca, cq, wc_desc_p); + /* don't sleep if we already succeeded) */ + if (ret != VAPI_CQ_EMPTY) { + break; + } + usleep(poll_sleep); + poll_cnt++; + } + + /* if passed an AVH to destory - do it */ + if (avh_p != NULL) { + VAPI_destroy_addr_hndl(hca, *avh_p); + } + + if ((poll_cnt == max_poll) && (ret == VAPI_CQ_EMPTY)) { + MTL_DEBUG1(__FUNCTION__ + ": Failed to get completion on wq after %d polls.\n", + max_poll); + return VAPI_CQ_EMPTY; + } + + if (ret != VAPI_OK) { + MTL_DEBUG1(__FUNCTION__ + ": VAPI_poll_cq failed with ret=%s on sq_cq\n", + mtl_strerror_sym(ret)); + return ret; + } + + if (wc_desc_p->status != VAPI_SUCCESS) { + MTL_DEBUG1(__FUNCTION__ ": completion error (%d) detected\n", + wc_desc_p->status); + } + + return VAPI_OK; +} + +#endif /* OSM_VENDOR_INTF_MTL */ diff --git a/osmtest/osmt_multicast.c b/osmtest/osmt_multicast.c new file mode 100644 index 0000000..663c17a --- /dev/null +++ b/osmtest/osmt_multicast.c @@ -0,0 +1,2619 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2010 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of Multicast Member testing flow.. + * + */ + +#ifndef __WIN__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include "osmtest.h" + +static void __osmt_print_all_multicast_records(IN osmtest_t * const p_osmt) +{ + uint32_t i; + ib_api_status_t status; + osmv_query_req_t req; + osmv_user_query_t user; + osmtest_req_context_t context; + ib_member_rec_t *mcast_record; + + memset(&context, 0, sizeof(context)); + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + + user.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = 1; + req.flags = OSM_SA_FLAGS_SYNC; + context.p_osmt = p_osmt; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + /* UnTrusted (SMKey of 0) - get the multicast groups */ + status = osmv_query_sa(p_osmt->h_bind, &req); + + if (status != IB_SUCCESS || context.result.status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02B5: " + "Failed getting the multicast groups records - %s/%s\n", + ib_get_err_str(status), + ib_get_err_str(context.result.status)); + return; + } + + osm_log(&p_osmt->log, OSM_LOG_INFO, + "\n |------------------------------------------|" + "\n | Remaining Multicast Groups |" + "\n |------------------------------------------|\n"); + + for (i = 0; i < context.result.result_cnt; i++) { + mcast_record = + osmv_get_query_mc_rec(context.result.p_result_madw, i); + osm_dump_mc_record(&p_osmt->log, mcast_record, OSM_LOG_INFO); + } + + /* Trusted - now get the multicast group members */ + req.sm_key = OSM_DEFAULT_SM_KEY; + status = osmv_query_sa(p_osmt->h_bind, &req); + + if (status != IB_SUCCESS || context.result.status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02B6: " + "Failed getting the multicast group members records - %s/%s\n", + ib_get_err_str(status), + ib_get_err_str(context.result.status)); + return; + } + + osm_log(&p_osmt->log, OSM_LOG_INFO, + "\n |--------------------------------------------------|" + "\n | Remaining Multicast Group Members |" + "\n |--------------------------------------------------|\n"); + + for (i = 0; i < context.result.result_cnt; i++) { + mcast_record = + osmv_get_query_mc_rec(context.result.p_result_madw, i); + osm_dump_mc_record(&p_osmt->log, mcast_record, OSM_LOG_INFO); + } + +} + +static cl_status_t +__match_mgids(IN const void *const p_object, IN void *context) +{ + ib_gid_t *p_mgid_context = (ib_gid_t *) context; + ib_gid_t *p_mgid_list_item = (ib_gid_t *) p_object; + int32_t count; + + count = memcmp(p_mgid_context, p_mgid_list_item, sizeof(ib_gid_t)); + if (count == 0) + return CL_SUCCESS; + else + return CL_NOT_FOUND; +} + +ib_api_status_t osmt_query_mcast(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + osmtest_req_context_t context; + ib_member_rec_t *p_rec; + uint32_t i, num_recs = 0; + cl_list_t mgids_list; + cl_list_t *p_mgids_list; + cl_list_iterator_t p_mgids_res; + cl_status_t cl_status; + cl_map_item_t *p_item, *p_next_item; + osmtest_mgrp_t *p_mgrp; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Do a blocking query for all Multicast Records in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + + context.p_osmt = p_osmt; + user.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0203: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0264: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s.\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + goto Exit; + } + + /* ok we have got something */ + /* First Delete the old MGID Table */ + p_next_item = cl_qmap_head(&p_osmt->exp_subn.mgrp_mlid_tbl); + while (p_next_item != cl_qmap_end(&p_osmt->exp_subn.mgrp_mlid_tbl)) { + p_item = p_next_item; + p_next_item = cl_qmap_next(p_item); + cl_qmap_remove_item(&p_osmt->exp_subn.mgrp_mlid_tbl, p_item); + free(p_item); + } + + cl_list_construct(&mgids_list); + cl_list_init(&mgids_list, num_recs); + p_mgids_list = &mgids_list; + num_recs = context.result.result_cnt; + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %u records\n", + num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = osmv_get_query_result(context.result.p_result_madw, i); + p_mgids_res = + cl_list_find_from_head(p_mgids_list, __match_mgids, + &(p_rec->mgid)); + /* If returns iterator other than end of list, same mgid exists already */ + if (p_mgids_res != cl_list_end(p_mgids_list)) { + char gid_str[INET6_ADDRSTRLEN]; + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0265: " + "MCG MGIDs are the same - invalid MGID : %s\n", + inet_ntop(AF_INET6, p_rec->mgid.raw, gid_str, + sizeof gid_str)); + status = IB_ERROR; + goto Exit; + + } + osm_dump_mc_record(&p_osmt->log, p_rec, OSM_LOG_VERBOSE); + cl_status = cl_list_insert_head(p_mgids_list, &(p_rec->mgid)); + if (cl_status) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0205: " + "Could not add MGID to cl_list\n"); + status = IB_ERROR; + goto Exit; + } + p_mgrp = (osmtest_mgrp_t *) malloc(sizeof(*p_mgrp)); + if (!p_mgrp) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0204: " + "Could not allocate new MCG\n"); + status = IB_ERROR; + goto Exit; + } + memcpy(&p_mgrp->mcmember_rec, p_rec, + sizeof(p_mgrp->mcmember_rec)); + cl_qmap_insert(&p_osmt->exp_subn.mgrp_mlid_tbl, + cl_ntoh16(p_rec->mlid), &p_mgrp->map_item); + } + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/* given a multicast request send and wait for response. */ +ib_api_status_t +osmt_send_mcast_request(IN osmtest_t * const p_osmt, + IN uint8_t is_set, + IN ib_member_rec_t * p_mc_req, + IN uint64_t comp_mask, OUT ib_sa_mad_t * p_res) +{ + osmtest_req_context_t context; + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Do a blocking query for this record in the subnet. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&context, 0, sizeof(context)); + memset(p_res, 0, sizeof(ib_sa_mad_t)); + + context.p_osmt = p_osmt; + + user.p_attr = p_mc_req; + user.comp_mask = comp_mask; + + if (is_set == 1) + req.query_type = OSMV_QUERY_UD_MULTICAST_SET; + else if (is_set == 0) + req.query_type = OSMV_QUERY_UD_MULTICAST_DELETE; + else if (is_set == 0xee) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Set USER DEFINED QUERY\n"); + req.query_type = OSMV_QUERY_USER_DEFINED; + user.method = IB_MAD_METHOD_GET; + user.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + } else if (is_set == 0xff) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Set USER DEFINED QUERY\n"); + req.query_type = OSMV_QUERY_USER_DEFINED; + user.method = IB_MAD_METHOD_SET; + user.attr_id = IB_MAD_ATTR_MCMEMBER_RECORD; + } + + /* TODO : Check the validity of all user fields in order to use + OSMV_QUERY_USER_DEFINED + p_user_query = ( osmv_user_query_t * ) p_query_req->p_query_input; + if (p_user_query->method) sa_mad_data.method = p_user_query->method; + sa_mad_data.attr_offset = p_user_query->attr_offset; + sa_mad_data.attr_id = p_user_query->attr_id; + sa_mad_data.comp_mask = p_user_query->comp_mask; + sa_mad_data.p_attr = p_user_query->p_attr; + */ + + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0206: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + /* ok it worked */ + memcpy(p_res, osm_madw_get_mad_ptr(context.result.p_result_madw), + sizeof(ib_sa_mad_t)); + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0224: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +void osmt_init_mc_query_rec(IN osmtest_t * const p_osmt, + IN OUT ib_member_rec_t * p_mc_req) +{ + /* use default values so we can change only what we want later */ + memset(p_mc_req, 0, sizeof(ib_member_rec_t)); + + /* we leave the MGID to the user */ + memcpy(&p_mc_req->port_gid.unicast.interface_id, + &p_osmt->local_port.port_guid, + sizeof(p_osmt->local_port.port_guid)); + + /* use our own subnet prefix: */ + p_mc_req->port_gid.unicast.prefix = cl_hton64(p_osmt->local_port_gid.unicast.prefix); + + /* ib_net32_t qkey; */ + /* ib_net16_t mlid; - we keep it zero for upper level to decide. */ + /* uint8_t mtu; - keep it zero means - anything you have please. */ + /* uint8_t tclass; can leave as zero for now (between subnets) */ + /* ib_net16_t pkey; leave as zero */ + p_mc_req->rate = IB_PATH_RECORD_RATE_2_5_GBS; + /* uint8_t pkt_life; zero means greater than zero ... */ + /* ib_net32_t sl_flow_hop; keep it all zeros */ + /* we want to use a link local scope: 0x02 */ + p_mc_req->scope_state = ib_member_set_scope_state(0x02, 0); +} + +/*********************************************************************** + * UD Multicast testing flow: + * o15.0.1.3: + * - Request new MCG with not enough components in comp_mask : + * ERR_INSUFFICIENT_COMPONENTS + * o15.0.1.8: + * - Request a join with irrelevant RATE and get a ERR_INVALID_REQ + * o15.0.1.4: + * - Create an MGID by asking for a join with MGID = 0 + * providing P_Key, Q_Key, SL, FlowLabel, Tclass. + * o15.0.1.5: + * - Check the returned MGID is valid. (p 804) + * o15.0.1.6: + * - Create a new MCG with valid requested MGID. + * - Try to create a new MCG with invalid MGID : get back ERR_REQ_INVALID + * - Try again with MGID prefix = 0xA01B (maybe 0x1BA0 little or big ?) + * - Try to create again the already created group: ERR_REQ_INVALID + * o15.0.1.7 - implicitly checked during the prev steps. + * o15.0.1.9 + * - Create MCG with Invalid JoinState.FullMember != 1 : get ERR_REQ_INVALID + * o15.0.1.10 - can't check on a single client . + * o15.0.1.11: + * - Try to join into a MGID that exists with JoinState=SendOnlyMember - + * see that it updates JoinState. What is the routing change? + * - We can not check simple join since we have only one tester (for now) + * o15.0.1.12: + * - The last join should have a special treatment in the SA (sender only) + * but what is it ? + * o15.0.1.13: + * - Try joining with wrong rate - ERR_REQ_INVALID + * o15.0.1.14: + * - Try partial delete - actually updating the join state. check it. + * - Register by InformInfo flow to receive trap 67 on MCG delete. + * - Try full delete (JoinState and should be 0) + * - Wait for trap 67. + * - Try joining (not full mem) again to see the group was deleted. + * (should fail - o15.0.1.13) + * o15.0.1.15: + * - Try deletion of the IPoIB MCG and get: ERR_REQ_INVALID + * o15.0.1.16: + * - Try GetTable with PortGUID wildcarded and get back some groups. + ***********************************************************************/ + +#define PREFIX_MASK CL_HTON64(0xff10ffff00000000ULL) +#define PREFIX_SIGNATURE CL_HTON64(0xff10601b00000000ULL) +#define IPV4_PREFIX_MASK CL_HTON64(0xff10ffff00000000ULL) +#define PREFIX_SIGNATURE_IPV4 CL_HTON64(0xff10401b00000000ULL) + +static unsigned is_ipv4_mgid(ib_gid_t * mgid) +{ + return ((mgid->unicast.prefix & IPV4_PREFIX_MASK) == PREFIX_SIGNATURE_IPV4); +} + +static unsigned is_ipv6_mgid(ib_gid_t * mgid) +{ + return ((mgid->unicast.prefix & PREFIX_MASK) == PREFIX_SIGNATURE); +} + +/* The following macro can be used only within the osmt_run_mcast_flow() function */ +#define IS_IPOIB_MGID(p_mgid) (is_ipv4_mgid(p_mgid) || is_ipv6_mgid(p_mgid)) + +ib_api_status_t osmt_run_mcast_flow(IN osmtest_t * const p_osmt) +{ + char gid_str[INET6_ADDRSTRLEN]; + char gid_str2[INET6_ADDRSTRLEN]; + ib_api_status_t status; + ib_member_rec_t mc_req_rec; + union { + ib_sa_mad_t sa_mad; + ib_member_rec_t mcmr; + } res; + ib_sa_mad_t *sa_mad; + ib_member_rec_t *p_mc_res; + uint64_t comp_mask = 0; + ib_net64_t remote_port_guid = 0x0; + cl_qmap_t *p_mgrp_mlid_tbl; + osmtest_mgrp_t *p_mgrp; + ib_gid_t special_mgid, tmp_mgid, proxy_mgid; + ib_net16_t invalid_mlid = 0x0; + ib_net16_t max_mlid = cl_hton16(0xFFFE), tmp_mlid; + int start_cnt = 0, cnt, middle_cnt = 0, end_cnt = 0; + int start_ipoib_cnt = 0, end_ipoib_cnt = 0; + int mcg_outside_test_cnt = 0, fail_to_delete_mcg = 0; + osmtest_req_context_t context; + ib_node_record_t *p_rec; + uint32_t num_recs = 0, i; + uint8_t mtu_phys = 0, rate_phys = 0; + cl_map_t test_created_mlids; /* List of all mlids created in this test */ + boolean_t got_error = FALSE; + + static ib_gid_t good_mgid = { + { + 0xFF, 0x12, 0xA0, 0x1C, + 0xFE, 0x80, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x12, 0x34, 0x56, 0x78} + }; + static ib_gid_t osm_ipoib_mgid = { + { + 0xff, /* multicast field */ + 0x12, /* scope */ + 0x40, 0x1b, /* IPv4 signature */ + 0xff, 0xff, /* 16 bits of P_Key (to be filled in) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 48 bits of zeros */ + 0xff, 0xff, 0xff, 0xee, /* 32 bit IPv4 broadcast address */ + }, + }; +#if 0 + static ib_gid_t osm_ts_ipoib_good_mgid = { + { + 0xff, /* multicast field */ + 0x12, /* non-permanent bit,scope */ + 0x40, 0x1b, /* IPv4 signature */ + 0xff, 0xff, /* 16 bits of P_Key (to be filled in) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 48 bits of zeros */ + 0x00, 0x00, 0x00, 0x01, /* 32 bit IPv4 broadcast address */ + }, + }; +#endif + static ib_gid_t osm_ipoib_good_mgid = { + { + 0xff, /* multicast field */ + 0x12, /* non-permanent bit,scope */ + 0x40, 0x1b, /* IPv4 signature */ + 0xff, 0xff, /* 16 bits of P_Key (to be filled in) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 48 bits of zeros */ + 0xff, 0xff, 0xff, 0xff, /* 32 bit IPv4 broadcast address */ + }, + }; + static ib_gid_t osm_link_local_mgid = { + { + 0xFF, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01}, + }; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "GetTable of all current MCGs...\n"); + status = osmt_query_mcast(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02FF " + "GetTable of all records has failed!\n"); + goto Exit; + } + + /* Initialize the test_created_mgrps map */ + cl_map_construct(&test_created_mlids); + cl_map_init(&test_created_mlids, 1000); + + p_mgrp_mlid_tbl = &p_osmt->exp_subn.mgrp_mlid_tbl; + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + sa_mad = &res.sa_mad; + p_mc_res = ib_sa_mad_get_payload_ptr(sa_mad); + + /* Only when we are on single mode check flow - do the count comparison, otherwise skip */ + if (p_osmt->opt.mmode == 1 || p_osmt->opt.mmode == 3) { + start_cnt = cl_qmap_count(p_mgrp_mlid_tbl); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "(start): " + "Number of MC Records found in SA DB is %d\n", + start_cnt); + } + + /* This flow is being added due to bug discovered using SilverStorm stack - + The bug was initializing MCast with MTU & RATE min values that do + not match the subnet capability, even though that OpenSM + reponds with the correct value it does not store it in the MCG. + We want the check a join request to already existing group (ipoib) + without using MTU or RATE then getting response from OpenSM with + the correct values then join again with them and get IB_SUCCESS + all the way + */ + + /* First validate IPoIB exist in the SA DB */ + p_mgrp = (osmtest_mgrp_t *) cl_qmap_head(p_mgrp_mlid_tbl); + /* scan all available multicast groups in the DB and fill in the table */ + while (p_mgrp != (osmtest_mgrp_t *) cl_qmap_end(p_mgrp_mlid_tbl)) { + /* search for ipoib mgid */ + if (IS_IPOIB_MGID(&p_mgrp->mcmember_rec.mgid)) + start_ipoib_cnt++; + else { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Non-IPoIB MC Groups exist: mgid=%s\n", + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str)); + mcg_outside_test_cnt++; + } + + p_mgrp = (osmtest_mgrp_t *) cl_qmap_next(&p_mgrp->map_item); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Found %d non-IPoIB MC Groups\n", mcg_outside_test_cnt); + + if (start_ipoib_cnt) { + /* o15-0.2.4 - Check a join request to already created MCG */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Found IPoIB MC Group, so we run SilverStorm Bug Flow...\n"); + /* Try to join first like IPoIB of SilverStorm */ + memcpy(&mc_req_rec.mgid, &osm_ipoib_good_mgid, + sizeof(ib_gid_t)); + /* Request Join */ + ib_member_set_join_state(&mc_req_rec, + IB_MC_REC_STATE_FULL_MEMBER); + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_JOIN_STATE; + + status = osmt_send_mcast_request(p_osmt, 0xff, /* User Defined query Set */ + &mc_req_rec, comp_mask, + sa_mad); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Joining an existing IPoIB multicast group\n"); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Sent Join request with :\n\t\tport_gid=%s, mgid=%s\n" + "\t\tjoin state= 0x%x, response is : %s\n", + inet_ntop(AF_INET6, mc_req_rec.port_gid.raw, + gid_str, sizeof gid_str), + inet_ntop(AF_INET6, mc_req_rec.mgid.raw, + gid_str2, sizeof gid_str2), + (mc_req_rec.scope_state & 0x0F), + ib_get_err_str(status)); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02B3: " + "Failed joining existing IPoIB MCGroup - got %s\n", + ib_get_err_str(status)); + goto Exit; + } + /* Check MTU & Rate Value and resend with SA suggested values */ + + /* Prepare the mc_req_rec for the rest of the flow */ + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + /* + We simulate the same situation as in SilverStorm - a response with the + exact RATE & MTU as the SA responded with. Actually the query + has included some more fields but we know that problem was + genereated by the RATE + */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Received attributes of MCG : \n\t\tMTU=0x%02X, RATE=0x%02X\n", + p_mc_res->mtu, p_mc_res->rate); + + mc_req_rec.mtu = p_mc_res->mtu; + mc_req_rec.rate = p_mc_res->rate; + /* Set feasible mtu & rate that will allow check the + exact statement of OpenSM */ + mtu_phys = p_mc_res->mtu; + rate_phys = p_mc_res->rate; + + memcpy(&mc_req_rec.mgid, &osm_ipoib_good_mgid, + sizeof(ib_gid_t)); + /* Request Join */ + ib_member_set_join_state(&mc_req_rec, + IB_MC_REC_STATE_FULL_MEMBER); + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_MTU_SEL | + IB_MCR_COMPMASK_MTU | IB_MCR_COMPMASK_RATE_SEL | + IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Sending attributes of MCG : \n\t\tMTU=0x%02X, RATE=0x%02X\n", + mc_req_rec.mtu, mc_req_rec.rate); + status = osmt_send_mcast_request(p_osmt, 0xff, /* User Defined query */ + &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Sent Join request using response values, response is : %s\n", + ib_get_err_str(status)); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02EF: " + "Query as Full Member of already existing " + "ipoib group gid %s has failed\n", + inet_ntop(AF_INET6, mc_req_rec.mgid.raw, + gid_str, sizeof gid_str)); + goto Exit; + } + /* We do not want to leave the MCG since its IPoIB */ + } + + /**************************************************************************/ + /* Check Get with invalid mlid */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Get with invalid mlid...\n"); + /* Request Get */ + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + mc_req_rec.mlid = invalid_mlid; + comp_mask = IB_MCR_COMPMASK_MLID; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 0xee, /* User Defined query Get */ + &mc_req_rec, comp_mask, sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02E0 " + "SubnAdmGet with invalid mlid 0x%x succeeded\n", + cl_ntoh16(mc_req_rec.mlid)); + status = IB_ERROR; + goto Exit; + } + + /* Prepare the mc_req_rec for the rest of the flow */ + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + /**************************************************************************/ + /* Check Get with invalid port guid */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Get with invalid port guid (0x0) but valid interface ID : 0x%" + PRIx64 "...\n", + cl_ntoh64(mc_req_rec.port_gid.unicast.interface_id)); + + /* Request Get */ + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + memset(&mc_req_rec.port_gid.unicast.interface_id, 0, + sizeof(ib_net64_t)); + comp_mask = IB_MCR_COMPMASK_GID; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 0xee, /* User Defined query Get */ + &mc_req_rec, comp_mask, sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02E4 " + "SubnAdmGet with invalid port guid succeeded\n"); + status = IB_ERROR; + goto Exit; + } + + /* Prepare the mc_req_rec for the rest of the flow */ + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + /**************************************************************************/ + + /* o15.0.1.3: */ + /* - Request Join with insufficient comp_mask */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with insufficient comp mask qkey & pkey (o15.0.1.3)...\n"); + + /* no MGID */ + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + /* Request Join */ + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + /* IB_MCR_COMPMASK_QKEY | */ + /* IB_MCR_COMPMASK_PKEY | intentionally missed to raise the error */ + IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02EE: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with insufficient comp mask - sl (15.0.1.3)...\n"); + + /* no MGID */ + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + /* Request Join */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | + /* IB_MCR_COMPMASK_SL | */ + IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02ED: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + /* no MGID */ + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + + mc_req_rec.mgid.raw[15] = 0x01; + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with insufficient comp mask - flow label (o15.0.1.3)...\n"); + + /* Request Join */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | + /* IB_MCR_COMPMASK_FLOW | intentionally missed to raise the error */ + IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02EC: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with insufficient comp mask - tclass (o15.0.1.3)...\n"); + + /* Request Join */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | + IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | + /* IB_MCR_COMPMASK_TCLASS | Intentionally missed to raise an error */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02EA: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with insufficient comp mask - tclass qkey (o15.0.1.3)...\n"); + + /* no MGID */ + /* memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); */ + /* Request Join */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + /* IB_MCR_COMPMASK_QKEY | intentionally missed to raise the error */ + IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | + IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | + /* IB_MCR_COMPMASK_TCLASS | intentionally missed to raise the error */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02E9: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + /* o15.0.1.8: */ + /* - Request join with unrealistic RATE : get REQ INVALID status */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with unrealistic rate (o15.0.1.8)...\n"); + + /* impossible requested rate */ + mc_req_rec.rate = + IB_RATE_MAX | IB_PATH_SELECTOR_GREATER_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR || + sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0207: " + "Expected REMOTE ERROR IB_SA_MAD_STATUS_REQ_INVALID got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Check Valid value which is unreasonable now */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with unrealistic rate 300GB (o15.0.1.8)...\n"); + + /* impossible requested rate */ + mc_req_rec.rate = + IB_PATH_RECORD_RATE_300_GBS | IB_PATH_SELECTOR_GREATER_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR || + sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0208: " + "Expected REMOTE ERROR IB_SA_MAD_STATUS_REQ_INVALID got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Check Valid value which is unreasonable now */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with less than min rate 2.5GB (o15.0.1.8)...\n"); + + /* impossible requested rate */ + mc_req_rec.rate = + IB_PATH_RECORD_RATE_2_5_GBS | IB_PATH_SELECTOR_LESS_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR || + sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02AB: " + "Expected REMOTE ERROR IB_SA_MAD_STATUS_REQ_INVALID got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Checking above max value of MTU which is impossible */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with unrealistic mtu : \n\t\tmore than 4096 -" + " max (o15.0.1.8)...\n"); + + /* impossible requested mtu */ + mc_req_rec.mtu = IB_MTU_LEN_4096 | IB_PATH_SELECTOR_GREATER_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR || + sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02AC: " + "Expected REMOTE ERROR IB_SA_MAD_STATUS_REQ_INVALID got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Checking below min value of MTU which is impossible */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with unrealistic mtu : \n\t\tless than 256 -" + " min (o15.0.1.8)...\n"); + + /* impossible requested mtu */ + mc_req_rec.mtu = IB_MTU_LEN_256 | IB_PATH_SELECTOR_LESS_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR || + sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02AD: " + "Expected REMOTE ERROR IB_SA_MAD_STATUS_REQ_INVALID got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with unrealistic mtu (o15.0.1.8)...\n"); + + /* impossible requested mtu */ + mc_req_rec.mtu = 0x6 | IB_PATH_SELECTOR_GREATER_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR || + sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02AE: " + "Expected REMOTE ERROR IB_SA_MAD_STATUS_REQ_INVALID got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } +#if 0 + /* Currently PacketLifeTime isn't checked in opensm */ + /* Check PacketLifeTime as 0 */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create with unrealistic packet life value less than 0 (o15.0.1.8)...\n"); + + /* impossible requested packet life */ + mc_req_rec.pkt_life = 0 | IB_PATH_SELECTOR_LESS_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_LIFE | IB_MCR_COMPMASK_LIFE_SEL; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR || + sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02AF: " + "Expected REMOTE ERROR IB_SA_MAD_STATUS_REQ_INVALID got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } +#endif + + /* o15.0.1.4: */ + /* - Create an MGID by asking for a join with MGID = 0 */ + /* providing P_Key, Q_Key, SL, FlowLabel, Tclass. */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create given MGID=0 skip service level (o15.0.1.4)...\n"); + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + /* no MGID */ + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + /* Request Join */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | + /* IB_MCR_COMPMASK_SL | Intentionally missed */ + IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02A8: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + /* Check that no same MCG in the SMDB */ + status = osmt_query_mcast(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02AA: " + "Could not get all MC Records in subnet, got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Only when we are on single mode check flow - do the count comparison, otherwise skip */ + if (p_osmt->opt.mmode == 1 || p_osmt->opt.mmode == 3) { + middle_cnt = cl_qmap_count(&p_osmt->exp_subn.mgrp_mlid_tbl); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "(post false create): " + "Number of MC Records found in SA DB is %d\n", + middle_cnt); + if (middle_cnt != start_cnt) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Got different number of records stored in SA DB (before any creation)\n" + "Instead of %d got %d\n", start_cnt, + middle_cnt); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create given MGID=0 skip Qkey and Pkey (o15.0.1.4)...\n"); + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + /* no MGID */ + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + /* Request Join */ + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + /* IB_MCR_COMPMASK_QKEY | */ + /* IB_MCR_COMPMASK_PKEY | Intentionally missed */ + IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02A7: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + /* Bad Query o15.0.1.4 */ + + status = osmt_query_mcast(p_osmt); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create given MGID=0 skip TClass (o15.0.1.4)...\n"); + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + /* no MGID */ + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + /* Request Join */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | + IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | + /* IB_MCR_COMPMASK_TCLASS | Intentionally missed */ + /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (((ib_net16_t) (sa_mad->status & IB_SMP_STATUS_MASK)) != + IB_SA_MAD_STATUS_INSUF_COMPS) + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Expected IB_SA_MAD_STATUS_INSUF_COMPS got:%s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02A6: " + "Expected REMOTE ERROR got:%s\n", + ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create given MGID=0 valid Set several options :\n\t\t" + "First any RATE, Second less than max RATE\n\t\t" + "Third above min MTU, Fourth less than max MTU\n\t\t" + "Fifth exact MTU & RATE feasible, Sixth exact RATE feasible\n\t\t" + "Seventh exact MTU feasible (o15.0.1.4)...\n"); + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS; /* all above are required */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02A5: " + "Failed to create MCG for MGID=0 with higher than minimum RATE - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + + mc_req_rec.rate = + IB_PATH_RECORD_RATE_60_GBS | IB_PATH_SELECTOR_LESS_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0211: " + "Failed to create MCG for MGID=0 with less than highest RATE - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + + mc_req_rec.mtu = IB_MTU_LEN_4096 | IB_PATH_SELECTOR_LESS_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0238: " + "Failed to create MCG for MGID=0 with less than highest MTU - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + mc_req_rec.mtu = IB_MTU_LEN_256 | IB_PATH_SELECTOR_GREATER_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0239: " + "Failed to create MCG for MGID=0 with higher than lowest MTU - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + /* Using Exact feasible MTU & RATE */ + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Using Exact feasible MTU & RATE: " + "MTU = 0x%02X, RATE = 0x%02X\n", mtu_phys, rate_phys); + + mc_req_rec.mtu = mtu_phys; + mc_req_rec.rate = rate_phys; + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | + IB_MCR_COMPMASK_MTU | + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0240: " + "Failed to create MCG for MGID=0 with exact MTU & RATE - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + /* Using Exact feasible RATE */ + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Using Exact feasible RATE: 0x%02X\n", rate_phys); + + mc_req_rec.rate = rate_phys; + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0241: " + "Failed to create MCG for MGID=0 with exact RATE - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + /* Using Exact feasible MTU */ + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Using Exact feasible MTU: 0x%02X\n", mtu_phys); + + mc_req_rec.mtu = mtu_phys; + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0242: " + "Failed to create MCG for MGID=0 with exact MTU - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* o15.0.1.5: */ + /* - Check the returned MGID is valid. (p 804) */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating resulting MGID (o15.0.1.5)...\n"); + /* prefix 0xFF1 Scope 0xA01B */ + /* Since we did not directly specified SCOPE in comp mask + we should get the comp mask that is link-local scope */ + if ((p_mc_res->mgid.multicast.header[0] != 0xFF) || + (p_mc_res->mgid.multicast.header[1] != 0x12) || + (p_mc_res->mgid.multicast.raw_group_id[0] != 0xA0) || + (p_mc_res->mgid.multicast.raw_group_id[1] != 0x1B)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0209: " + "Validating MGID failed. MGID:%s\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str)); + status = IB_ERROR; + goto Exit; + } + + /* Good Flow - mgid is 0 while giving all required fields for join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + /* Using feasible GREATER_THAN 0 packet lifitime */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create given MGID=0 (o15.0.1.4)...\n"); + + status = osmt_query_mcast(p_osmt); + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + /* no MGID */ + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + /* Request Join */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + mc_req_rec.pkt_life = 0 | IB_PATH_SELECTOR_GREATER_THAN << 6; + + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_LIFE | IB_MCR_COMPMASK_LIFE_SEL; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0210: " + "Failed to create MCG for MGID=0 - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* o15.0.1.6: */ + /* - Create a new MCG with valid requested MGID. */ + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + mc_req_rec.pkey = IB_DEFAULT_PKEY; + mc_req_rec.mgid = good_mgid; + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create given valid MGID=%s (o15.0.1.6)...\n", + inet_ntop(AF_INET6, mc_req_rec.mgid.raw, gid_str, + sizeof gid_str)); + + /* Before creation, need to check that this group doesn't exist */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Verifying that MCGroup with this MGID doesn't exist by trying to Join it (o15.0.1.13)...\n"); + + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_NON_MEMBER); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, /* join */ + &mc_req_rec, comp_mask, sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0301: " + "Tried joining group that shouldn't have existed - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Set State to full member to allow group creation */ + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Now creating group with given valid MGID=%s (o15.0.1.6)...\n", + inet_ntop(AF_INET6, mc_req_rec.mgid.raw, gid_str, + sizeof gid_str)); + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0211: " + "Failed to create MCG for MGID=%s (o15.0.1.6) - got %s/%s\n", + inet_ntop(AF_INET6, good_mgid.raw, gid_str, + sizeof gid_str), ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating resulting MGID (o15.0.1.6)...\n"); + /* prefix 0xFF1 Scope 0xA01B */ + if ((p_mc_res->mgid.multicast.header[0] != 0xFF) || (p_mc_res->mgid.multicast.header[1] != 0x12) || /* HACK hardcoded scope = 0x02 */ + (p_mc_res->mgid.multicast.raw_group_id[0] != 0xA0) || + (p_mc_res->mgid.multicast.raw_group_id[1] != 0x1C)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0212: " + "Validating MGID failed. MGID:%s\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str)); + status = IB_ERROR; + goto Exit; + } + + /* - Try to create a new MCG with invalid MGID : get back ERR_REQ_INVALID */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD MGID=0xFA..... (o15.0.1.6)...\n"); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid.raw[0] = 0xFA; + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0213: " + "Failed to recognize MGID error for MGID=0xFA - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* - Try again with MGID prefix = 0xA01B (maybe 0x1BA0 little or big ?) */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD MGID=0xFF12A01B..... with link-local scope (o15.0.1.6)...\n"); + + mc_req_rec.mgid.raw[0] = 0xFF; + mc_req_rec.mgid.raw[3] = 0x1B; + comp_mask = comp_mask | IB_MCR_COMPMASK_SCOPE; + mc_req_rec.scope_state = mc_req_rec.scope_state & 0x2F; /* local scope */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0214: " + "Failed to recognize MGID error for A01B with link-local bit (status %s) (rem status %s)\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Change the mgid prefix - get back ERR_REQ_INVALID */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD MGID PREFIX=0xEF... (o15.0.1.6)...\n"); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.mgid.raw[0] = 0xEF; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0215: " + "Failed to recognize MGID PREFIX error for MGID=0xEF - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Change the scope to reserved - get back VALID REQ */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking local scope with full member \n\t\tand valid mgid %s" + " ... (o15.0.1.6)...\n", + inet_ntop(AF_INET6, mc_req_rec.mgid.raw, gid_str, + sizeof gid_str)); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.mgid.raw[1] = 0x1F; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0216: " + "Failed to create MCG for MGID=%s - got %s/%s\n", + inet_ntop(AF_INET6, good_mgid.raw, gid_str, + sizeof gid_str), ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Change the flags to invalid value 0x2 - get back INVALID REQ */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking invalid flags=0xFF 22 ... (o15.0.1.6)...\n"); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.mgid.raw[1] = 0x22; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0217: " + "Failed to recognize create with invalid flags value 0x2 - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Change the MGID to link local MGID - get back VALID REQ */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking link local MGID 0xFF02:0:0:0:0:0:0:1 (o15.0.1.6)...\n"); + + mc_req_rec.mgid = osm_link_local_mgid; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0218: " + "Failed to create MCG for MGID=0xFF02:0:0:0:0:0:0:1 - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* o15.0.1.7 - implicitly checked during the prev steps */ + /* o15.0.1.8 - implicitly checked during the prev steps */ + + /* o15.0.1.9 */ + /* - Create MCG with Invalid JoinState.FullMember != 1 : get ERR_REQ_INVALID */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking new MGID with invalid join state (o15.0.1.9)...\n"); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.mgid.raw[12] = 0xFF; + mc_req_rec.scope_state = 0x22; /* link-local scope, non-member state */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0219: " + "Failed to recognize create with JoinState != FullMember - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Lets try a valid join scope state */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking new MGID with valid join state (o15.0.1.9)...\n"); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.scope_state = 0x23; /* link-local scope, non member and full member */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0220: " + "Failed to create MCG with valid join state 0x3 - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* Lets try another invalid join scope state */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking new MGID with invalid join state (o15.0.1.9)...\n"); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + /* We have created a new MCG so now we need different mgid when creating group otherwise it will be counted as join request */ + mc_req_rec.mgid = good_mgid; + mc_req_rec.mgid.raw[12] = 0xFC; + mc_req_rec.scope_state = 0x24; /* link-local scope, send only member */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0221: " + "Failed to recognize create with JoinState != FullMember - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Lets try another valid join scope state */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking new MGID creation with valid join state (o15.0.2.3)...\n"); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.mgid.raw[12] = 0xFB; + memcpy(&special_mgid, &mc_req_rec.mgid, sizeof(ib_gid_t)); + mc_req_rec.scope_state = 0x2F; /* link-local scope, Full member with all other bits turned on */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0222: " + "Failed to create MCG with valid join state 0xF - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, gid_str, + sizeof gid_str), cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, cl_ntoh16(p_mc_res->mlid), p_mc_res); + + /* o15.0.1.10 - can't check on a single client .-- obsolete - + checked by SilverStorm bug o15-0.2.4, never the less recheck */ + /* o15-0.2.4 - Check a join request to already created MCG */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "Check o15-0.2.4 statement...\n"); + /* Try to join */ + memcpy(&mc_req_rec.mgid, &p_mc_res->mgid, sizeof(ib_gid_t)); + /* Request Join */ + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_NON_MEMBER); + comp_mask = + IB_MCR_COMPMASK_MGID | + IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_JOIN_STATE; + + status = osmt_send_mcast_request(p_osmt, 0x1, /* SubnAdmSet */ + &mc_req_rec, comp_mask, sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02CC: " + "Failed to join MCG with valid req, returned status = %s\n", + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + if ((p_mc_res->scope_state & 0x7) != 0x7) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02D0: " + "Validating JoinState update failed. " + "Expected 0x27 got 0x%02X\n", p_mc_res->scope_state); + status = IB_ERROR; + goto Exit; + } + + /* o15.0.1.11: */ + /* - Try to join into a MGID that exists with JoinState=SendOnlyMember - */ + /* see that it updates JoinState. What is the routing change? */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Retry of existing MGID - See JoinState update (o15.0.1.11)...\n"); + + mc_req_rec.mgid = good_mgid; + + /* first, make sure that the group exists */ + mc_req_rec.scope_state = 0x21; + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02CD: " + "Failed to create/join as full member - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + mc_req_rec.scope_state = 0x22; /* link-local scope, non-member */ + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02D1: " + "Failed to update existing MGID - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating Join State update with NonMember (o15.0.1.11)...\n"); + + if (p_mc_res->scope_state != 0x23) { /* scope is LSB */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02CE: " + "Validating JoinState update failed. Expected 0x23 got: 0x%02X\n", + p_mc_res->scope_state); + status = IB_ERROR; + goto Exit; + } + + /* Try delete current join state then update it with another value */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking JoinState update request should return 0x22 (o15.0.1.11)...\n"); + + mc_req_rec.rate = + IB_LINK_WIDTH_ACTIVE_1X | IB_PATH_SELECTOR_GREATER_THAN << 6; + mc_req_rec.mgid = good_mgid; + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Partially delete JoinState (o15.0.1.14)...\n"); + + /* link-local scope, both non-member bits, + so we should not be able to delete) */ + mc_req_rec.scope_state = 0x26; + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 0, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02CF: " + "Expected to fail partially update JoinState, " + "but got %s\n", ib_get_err_str(status)); + status = IB_ERROR; + goto Exit; + } + + /* link-local scope, NonMember bit, the FullMember bit should stay */ + mc_req_rec.scope_state = 0x22; + status = osmt_send_mcast_request(p_osmt, 0, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02D3: " + "Failed to partially update JoinState : %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + if (p_mc_res->scope_state != 0x21) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02D4: " + "Failed to partially update JoinState : " + "JoinState = 0x%02X, expected 0x%02X\n", + p_mc_res->scope_state, 0x21); + status = IB_ERROR; + goto Exit; + } + + /* So far successfully delete state - Now change it */ + mc_req_rec.mgid = good_mgid; + mc_req_rec.scope_state = 0x24; /* link-local scope, send only member */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C0: " + "Failed to update existing MCG - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating Join State update with Send Only Member (o15.0.1.11)...\n"); + + if (p_mc_res->scope_state != 0x25) { /* scope is MSB */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C1: " + "Validating JoinState update failed. Expected 0x25 got: 0x%02X\n", + p_mc_res->scope_state); + status = IB_ERROR; + goto Exit; + } + /* Now try to update value of join state */ + mc_req_rec.scope_state = 0x21; /* link-local scope, full member */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C2: " + "Failed to update existing MGID - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating Join State update with Full Member\n\t\t" + "to an existing 0x5 state MCG (o15.0.1.11)...\n"); + + if (p_mc_res->scope_state != 0x25) { /* scope is LSB */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C3: " + "Validating JoinState update failed. Expected 0x25 got: 0x%02X\n", + p_mc_res->scope_state); + status = IB_ERROR; + goto Exit; + } + + /* Now try to update value of join state */ + mc_req_rec.scope_state = 0x22; /* link-local scope, non member */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C4: " + "Failed to update existing MGID - got %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating Join State update with Non Member\n\t\t" + "to an existing 0x5 state MCG (o15.0.1.11)...\n"); + + if (p_mc_res->scope_state != 0x27) { /* scope is LSB */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C5: " + "Validating JoinState update failed. Expected 0x27 got: 0x%02X\n", + p_mc_res->scope_state); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "DEBUG - Current scope_state value : 0x%02X...\n", + p_mc_res->scope_state); + + /* - We can not check simple join since we have only one tester (for now) */ + + /* o15.0.1.12: Not Supported */ + /* - The SendOnlyNonMem join should have a special treatment in the + SA but what is it ? */ + + /* o15.0.1.13: */ + /* - Try joining with rate that does not exist in any MCG - + ERR_REQ_INVALID */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD RATE when connecting to existing MGID (o15.0.1.13)...\n"); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.rate = + IB_PATH_RECORD_RATE_2_5_GBS | IB_PATH_SELECTOR_LESS_THAN << 6; + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_RATE_SEL | IB_MCR_COMPMASK_RATE; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C6: " + "Failed to catch BAD RATE joining an existing MGID: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Try MTU that does not exist in any MCG */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD MTU (higher than max) when connecting to " + "existing MGID (o15.0.1.13)...\n"); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid = osm_ipoib_mgid; + mc_req_rec.mtu = IB_MTU_LEN_4096 | IB_PATH_SELECTOR_GREATER_THAN << 6; + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C7: " + "Failed to catch BAD RATE (higher than max) joining an existing MGID: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Try another MTU that does not exist in any MCG */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD MTU (less than min) when connecting " + "to existing MGID (o15.0.1.13)...\n"); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid = osm_ipoib_mgid; + mc_req_rec.mtu = IB_MTU_LEN_256 | IB_PATH_SELECTOR_LESS_THAN << 6; + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C8: " + "Failed to catch BAD RATE (less than min) joining an existing MGID: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* o15.0.1.14: */ + /* - Try partial delete - actually updating the join state. check it. */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking partial JoinState delete request - removing NonMember (o15.0.1.14)...\n"); + + mc_req_rec.mgid = good_mgid; + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS; /* all above are required */ + /* link-local scope, non member (so we should not be able to delete) */ + /* but the NonMember bit should be gone */ + mc_req_rec.scope_state = 0x22; + + status = osmt_send_mcast_request(p_osmt, 0, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02C9: " + "Fail to partially update JoinState during delete: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating Join State removal of Non Member bit (o15.0.1.14)...\n"); + if (p_mc_res->scope_state != 0x25) { /* scope is MSB - now only the full member & send only member have left */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02CA: " + "Validating JoinState update failed. Expected 0x25 got: 0x%02X\n", + p_mc_res->scope_state); + status = IB_ERROR; + goto Exit; + } + + /* Now use the same scope_state and delete all JoinState - leave multicast group since state is 0x0 */ + mc_req_rec.scope_state = 0x25; + status = osmt_send_mcast_request(p_osmt, 0, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02CB: " + "Failed to update JoinState during delete: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Validating Join State update remove (o15.0.1.14)...\n"); + + if (p_mc_res->scope_state != 0x20) { /* scope is MSB - now only 0x0 so port is removed from MCG */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02BF: " + "Validating JoinState update failed. Expected 0x20 got: 0x%02X\n", + p_mc_res->scope_state); + status = IB_ERROR; + goto Exit; + } + + /* - Try joining (not full mem) again to see the group was deleted. (should fail) */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Delete by trying to Join deleted group (o15.0.1.13)...\n"); + + mc_req_rec.scope_state = 0x22; /* use non member - so if no group fail */ + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, /* join */ + &mc_req_rec, comp_mask, sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status != IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02BC: " + "Succeeded Joining Deleted Group: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* - Try deletion of the IPoIB MCG and get: ERR_REQ_INVALID */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD Delete of Mgid membership (no prev join) (o15.0.1.15)...\n"); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid = osm_ipoib_mgid; + mc_req_rec.rate = + IB_LINK_WIDTH_ACTIVE_1X | IB_PATH_SELECTOR_GREATER_THAN << 6; + mc_req_rec.scope_state = 0x21; /* delete full member */ + + status = osmt_send_mcast_request(p_osmt, 0, /* delete flag */ + &mc_req_rec, comp_mask, sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02BD: " + "Failed to catch BAD delete from IPoIB: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* Prepare another MCG for the following tests : */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Create given MGID=%s\n\t\t(o15.0.1.4)...\n", + inet_ntop(AF_INET6, osm_ipoib_mgid.raw, gid_str, + sizeof gid_str)); + + mc_req_rec.mgid = good_mgid; + mc_req_rec.mgid.raw[12] = 0xAA; + mc_req_rec.pkt_life = 0 | IB_PATH_SELECTOR_GREATER_THAN << 6; + mc_req_rec.scope_state = 0x21; /* Full member */ + comp_mask = IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_LIFE | IB_MCR_COMPMASK_LIFE_SEL; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02BE: " + "Failed to create MCG for %s - got %s/%s\n", + inet_ntop(AF_INET6, good_mgid.raw, gid_str, + sizeof gid_str), ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + + /* - Try delete with valid join state */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Full Delete of a group (o15.0.1.14)...\n"); + mc_req_rec.scope_state = 0x21; /* the FullMember is the current JoinState */ + status = osmt_send_mcast_request(p_osmt, 0, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) + goto Exit; + + /* o15.0.1.15: */ + /* - Try deletion of the IPoIB MCG and get: ERR_REQ_INVALID */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking BAD Delete of IPoIB membership (no prev join) (o15.0.1.15)...\n"); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + mc_req_rec.mgid = osm_ipoib_mgid; + mc_req_rec.rate = + IB_LINK_WIDTH_ACTIVE_1X | IB_PATH_SELECTOR_GREATER_THAN << 6; + mc_req_rec.scope_state = 0x21; /* delete full member */ + + status = osmt_send_mcast_request(p_osmt, 0, /* delete flag */ + &mc_req_rec, comp_mask, sa_mad); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if ((status != IB_REMOTE_ERROR) || + (sa_mad->status != IB_SA_MAD_STATUS_REQ_INVALID)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0223: " + "Failed to catch BAD delete from IPoIB: %s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /**************************************************************************/ + /* Checking join with invalid MTU */ + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Join with unrealistic mtu : \n" + "\t\tFirst create new MCG than try to join it \n" + "\t\twith unrealistic MTU greater than 4096 (o15.0.1.8)...\n"); + + /* First create new mgrp */ + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + mc_req_rec.mtu = IB_MTU_LEN_1024 | IB_PATH_SELECTOR_EXACTLY << 6; + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02EB: " + "Failed to create new mgrp\n"); + goto Exit; + } + memcpy(&tmp_mgid, &p_mc_res->mgid, sizeof(ib_gid_t)); + osm_dump_mc_record(&p_osmt->log, p_mc_res, OSM_LOG_INFO); + /* tmp_mtu = p_mc_res->mtu & 0x3F; */ + + /* impossible requested mtu always greater than exist in MCG */ + mc_req_rec.mtu = IB_MTU_LEN_4096 | IB_PATH_SELECTOR_GREATER_THAN << 6; + memcpy(&mc_req_rec.mgid, &tmp_mgid, sizeof(ib_gid_t)); + ib_member_set_join_state(&mc_req_rec, IB_MC_REC_STATE_FULL_MEMBER); + comp_mask = + IB_MCR_COMPMASK_GID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_JOIN_STATE | + IB_MCR_COMPMASK_MTU_SEL | IB_MCR_COMPMASK_MTU; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, comp_mask, + sa_mad); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02E4: " + "Expected REMOTE ERROR got:%s/%s\n", + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + /* - Try GetTable with PortGUID wildcarded and get back some groups. */ + status = osmt_query_mcast(p_osmt); + cnt = cl_qmap_count(&p_osmt->exp_subn.mgrp_mlid_tbl); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "(Before checking Max MCG creation): " + "Number of MC Records found in SA DB is %d\n", cnt); + + /**************************************************************************/ + /* Checking join on behalf of remote port gid */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "Checking Proxy Join...\n"); + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all NodeRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_NODE_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02E5: " + "osmtest_get_all_recs failed on getting all node records(%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Populate the database with the received records. + */ + num_recs = context.result.result_cnt; + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_node_rec(context.result.p_result_madw, i); + if (p_rec->node_info.port_guid != p_osmt->local_port.port_guid + && p_rec->node_info.node_type == IB_NODE_TYPE_CA) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "remote port_guid = 0x%" PRIx64 "\n", + cl_ntoh64(p_rec->node_info.port_guid)); + + remote_port_guid = p_rec->node_info.port_guid; + i = num_recs; + break; + } + } + + if (remote_port_guid != 0x0) { + mc_req_rec.pkey = IB_DEFAULT_PKEY; + ib_member_set_join_state(&mc_req_rec, + IB_MC_REC_STATE_FULL_MEMBER); + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + mc_req_rec.port_gid.unicast.interface_id = remote_port_guid; + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS; /* all above are required */ + + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, + comp_mask, sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02B4: " + "Could not join on behalf of remote port 0x%016" + PRIx64 " remote status: %s\n", + cl_ntoh64(remote_port_guid), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + memcpy(&proxy_mgid, &p_mc_res->mgid, sizeof(ib_gid_t)); + + /* First try a bad deletion then good one */ + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Trying deletion of remote port with local port guid\n"); + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + ib_member_set_join_state(&mc_req_rec, + IB_MC_REC_STATE_FULL_MEMBER); + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_JOIN_STATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + + status = osmt_send_mcast_request(p_osmt, 0, /* delete flag */ + &mc_req_rec, + comp_mask, sa_mad); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02A9: " + "Successful deletion of remote port guid with local one MGID : " + "%s, Got : %s/%s\n", + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str), + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Trying deletion of remote port with the right port guid\n"); + + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + ib_member_set_join_state(&mc_req_rec, + IB_MC_REC_STATE_FULL_MEMBER); + mc_req_rec.mgid = proxy_mgid; + mc_req_rec.port_gid.unicast.interface_id = remote_port_guid; + comp_mask = + IB_MCR_COMPMASK_MGID | + IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_JOIN_STATE; + status = osmt_send_mcast_request(p_osmt, 0, /* delete flag */ + &mc_req_rec, + comp_mask, sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02B0: " + "Failed to delete mgid with remote port guid MGID : " + "%s, Got : %s/%s\n", + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str), + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) sa_mad)); + goto Exit; + } + } else + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Could not check proxy join since could not found remote port, different from local port\n"); + + /* prepare init for next check */ + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + /**************************************************************************/ + if (p_osmt->opt.mmode > 2) { + /* Check invalid Join with max mlid which is more than the + Mellanox switches support 0xC000+0x1000 = 0xd000 */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Checking Creation of Maximum available Groups (MulticastFDBCap)...\n"); + tmp_mlid = cl_ntoh16(max_mlid) - cnt; + + while (tmp_mlid > 0) { + uint16_t cur_mlid = 0; + + /* Request Set */ + ib_member_set_join_state(&mc_req_rec, + IB_MC_REC_STATE_FULL_MEMBER); + /* Good Flow - mgid is 0 while giving all required fields for + join : P_Key, Q_Key, SL, FlowLabel, Tclass */ + + mc_req_rec.rate = + IB_LINK_WIDTH_ACTIVE_1X | + IB_PATH_SELECTOR_GREATER_THAN << 6; + mc_req_rec.mlid = max_mlid; + memset(&mc_req_rec.mgid, 0, sizeof(ib_gid_t)); + comp_mask = IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | IB_MCR_COMPMASK_QKEY | IB_MCR_COMPMASK_PKEY | IB_MCR_COMPMASK_SL | IB_MCR_COMPMASK_FLOW | IB_MCR_COMPMASK_JOIN_STATE | IB_MCR_COMPMASK_TCLASS | /* all above are required */ + IB_MCR_COMPMASK_MLID; + status = osmt_send_mcast_request(p_osmt, 1, &mc_req_rec, + comp_mask, sa_mad); + if (status == IB_SUCCESS) { + cur_mlid = cl_ntoh16(p_mc_res->mlid); + /* Save the mlid created in test_created_mlids map */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Created MGID:%s MLID:0x%04X\n", + inet_ntop(AF_INET6, p_mc_res->mgid.raw, + gid_str, sizeof gid_str), + cl_ntoh16(p_mc_res->mlid)); + cl_map_insert(&test_created_mlids, + cl_ntoh16(p_mc_res->mlid), + p_mc_res); + } else if (cur_mlid > cl_ntoh16(max_mlid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02E1 " + "Successful Join with greater mlid than switches support (MulticastFDBCap) 0x%04X\n", + cur_mlid); + status = IB_ERROR; + osm_dump_mc_record(&p_osmt->log, p_mc_res, + OSM_LOG_VERBOSE); + goto Exit; + } else if ((sa_mad->status & IB_SMP_STATUS_MASK) == + IB_SA_MAD_STATUS_NO_RESOURCES) + /* You can quietly exit the loop since no available mlid in SA DB + i.e. reached the maximum valid avalable mlid */ + break; + tmp_mlid--; + } + } + + /* Prepare the mc_req_rec for the rest of the flow */ + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + + /**************************************************************************/ + /* o15.0.1.16: */ + /* - Try GetTable with PortGUID wildcarded and get back some groups. */ + + status = osmt_query_mcast(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02B1: " + "Failed to query multicast groups: %s\n", + ib_get_err_str(status)); + goto Exit; + } + + cnt = cl_qmap_count(&p_osmt->exp_subn.mgrp_mlid_tbl); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "(Before Deletion of all MCG): " + "Number of MC Records found in SA DB is %d\n", cnt); + + /* Delete all MCG that are not of IPoIB */ + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Cleanup all MCG that are not IPoIB...\n"); + + p_mgrp_mlid_tbl = &p_osmt->exp_subn.mgrp_mlid_tbl; + p_mgrp = (osmtest_mgrp_t *) cl_qmap_head(p_mgrp_mlid_tbl); + /* scan all available multicast groups in the DB and fill in the table */ + while (p_mgrp != (osmtest_mgrp_t *) cl_qmap_end(p_mgrp_mlid_tbl)) { + /* Only if different from IPoIB Mgid try to delete */ + if (!IS_IPOIB_MGID(&p_mgrp->mcmember_rec.mgid)) { + osmt_init_mc_query_rec(p_osmt, &mc_req_rec); + mc_req_rec.mgid = p_mgrp->mcmember_rec.mgid; + + /* o15-0.1.4 - need to specify the oppsite state for a valid delete */ + if (!memcmp(&special_mgid, &p_mgrp->mcmember_rec.mgid, + sizeof(special_mgid))) + mc_req_rec.scope_state = 0x2F; + else + mc_req_rec.scope_state = 0x21; + comp_mask = + IB_MCR_COMPMASK_MGID | IB_MCR_COMPMASK_PORT_GID | + IB_MCR_COMPMASK_JOIN_STATE; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Sending request to delete MGID : %s" + ", scope_state : 0x%02X\n", + inet_ntop(AF_INET6, mc_req_rec.mgid.raw, + gid_str, sizeof gid_str), + mc_req_rec.scope_state); + status = osmt_send_mcast_request(p_osmt, 0, /* delete flag */ + &mc_req_rec, comp_mask, + sa_mad); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 02FF: Failed to delete MGID : %s" + " ,\n\t\t it is not our MCG, Status : %s/%s\n", + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str), + ib_get_err_str(status), + ib_get_mad_status_str((ib_mad_t *) + sa_mad)); + fail_to_delete_mcg++; + } + } else + end_ipoib_cnt++; + p_mgrp = (osmtest_mgrp_t *) cl_qmap_next(&p_mgrp->map_item); + } + + status = osmt_query_mcast(p_osmt); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02B2 " + "GetTable of all records has failed - got %s\n", + ib_get_err_str(status)); + goto Exit; + } + + /* If we are in single mode check flow - need to make sure all the multicast groups + that are left are not ones created during the flow. + */ + if (p_osmt->opt.mmode == 1 || p_osmt->opt.mmode == 3) { + end_cnt = cl_qmap_count(&p_osmt->exp_subn.mgrp_mlid_tbl); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, "Status of MC Records in SA DB during the test flow:\n" " Beginning of test\n" " Unrelated to the test: %d\n" " IPoIB MC Records : %d\n" " Total : %d\n" " End of test\n" " Failed to delete : %d\n" " IPoIB MC Records : %d\n" " Total : %d\n", mcg_outside_test_cnt, /* Non-IPoIB that existed at the beginning */ + start_ipoib_cnt, /* IPoIB records */ + start_cnt, /* Total: IPoIB and MC Records unrelated to the test */ + fail_to_delete_mcg, /* Failed to delete at the end */ + end_ipoib_cnt, /* IPoIB records */ + end_cnt); /* Total MC Records at the end */ + + /* when we compare num of MCG we should consider an outside source which create other MCGs */ + if ((end_cnt - fail_to_delete_mcg - end_ipoib_cnt) != + (start_cnt - mcg_outside_test_cnt - start_ipoib_cnt)) { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Got different number of non-IPoIB records stored in SA DB\n\t\t" + "at Start got %d, at End got %d (IPoIB groups only)\n", + (start_cnt - mcg_outside_test_cnt - + start_ipoib_cnt), + (end_cnt - fail_to_delete_mcg - end_ipoib_cnt)); + } + + p_mgrp_mlid_tbl = &p_osmt->exp_subn.mgrp_mlid_tbl; + p_mgrp = (osmtest_mgrp_t *) cl_qmap_head(p_mgrp_mlid_tbl); + while (p_mgrp != + (osmtest_mgrp_t *) cl_qmap_end(p_mgrp_mlid_tbl)) { + uint16_t mlid = + (uint16_t) cl_qmap_key((cl_map_item_t *) p_mgrp); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Found MLID:0x%04X\n", mlid); + /* Check if the mlid is in the test_created_mlids. If TRUE, then we + didn't delete a MCgroup that was created in this flow. */ + if (cl_map_get(&test_created_mlids, mlid) != NULL) { + /* This means that we still have an mgrp that we created!! */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 02FE: " + "Wasn't able to erase mgrp with MGID:%s" + " MLID:0x%04X\n", + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str), + mlid); + got_error = TRUE; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Still exists %s MGID:%s\n", + (IS_IPOIB_MGID + (&p_mgrp->mcmember_rec. + mgid)) ? "IPoIB" : "non-IPoIB", + inet_ntop(AF_INET6, + p_mgrp->mcmember_rec.mgid.raw, + gid_str, sizeof gid_str)); + } + p_mgrp = + (osmtest_mgrp_t *) cl_qmap_next(&p_mgrp->map_item); + } + + if (got_error) { + __osmt_print_all_multicast_records(p_osmt); + status = IB_ERROR; + } + } +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return status; +} diff --git a/osmtest/osmt_service.c b/osmtest/osmt_service.c new file mode 100644 index 0000000..eeda1ac --- /dev/null +++ b/osmtest/osmt_service.c @@ -0,0 +1,1640 @@ +/* + * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2010 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of service records testing flow.. + * Top level is osmt_run_service_records_flow: + * osmt_register_service + * osmt_get_service_by_name + * osmt_get_all_services + * osmt_delete_service_by_name + * + */ + +#ifndef __WIN__ +#include +#else +#include +#endif +#include +#include +#include +#include +#include "osmtest.h" + + +ib_api_status_t +osmt_register_service(IN osmtest_t * const p_osmt, + IN ib_net64_t service_id, + IN ib_net16_t service_pkey, + IN ib_net32_t service_lease, + IN uint8_t service_key_lsb, IN char *service_name) +{ + osmv_query_req_t req; + osmv_user_query_t user; + osmtest_req_context_t context; + ib_service_record_t svc_rec; + osm_log_t *p_log = &p_osmt->log; + ib_api_status_t status; + + OSM_LOG_ENTER(p_log); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Registering service: name: %s id: 0x%" PRIx64 "\n", + service_name, cl_ntoh64(service_id)); + + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + memset(&user, 0, sizeof(user)); + memset(&svc_rec, 0, sizeof(svc_rec)); + + /* set the new service record fields */ + svc_rec.service_id = service_id; + svc_rec.service_pkey = service_pkey; + svc_rec.service_gid.unicast.prefix = p_osmt->local_port_gid.unicast.prefix; + svc_rec.service_gid.unicast.interface_id = p_osmt->local_port.port_guid; + svc_rec.service_lease = service_lease; + memset(&svc_rec.service_key, 0, 16 * sizeof(uint8_t)); + svc_rec.service_key[0] = service_key_lsb; + memset(svc_rec.service_name, 0, sizeof(svc_rec.service_name)); + memcpy(svc_rec.service_name, service_name, + (strlen(service_name) + 1) * sizeof(char)); + + /* prepare the data used for this query */ + /* sa_mad_data.method = IB_MAD_METHOD_SET; */ + /* sa_mad_data.sm_key = 0; */ + + context.p_osmt = p_osmt; + req.query_context = &context; + req.query_type = OSMV_QUERY_USER_DEFINED; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.flags = OSM_SA_FLAGS_SYNC; + req.sm_key = 0; + req.timeout_ms = p_osmt->opt.transaction_timeout; + + user.method = IB_MAD_METHOD_SET; + user.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + if (ib_pkey_is_invalid(service_pkey)) { + /* if given an invalid service_pkey - don't turn the PKEY compmask on */ + user.comp_mask = IB_SR_COMPMASK_SID | + IB_SR_COMPMASK_SGID | + IB_SR_COMPMASK_SLEASE | + IB_SR_COMPMASK_SKEY | IB_SR_COMPMASK_SNAME; + } else { + user.comp_mask = IB_SR_COMPMASK_SID | + IB_SR_COMPMASK_SGID | + IB_SR_COMPMASK_SPKEY | + IB_SR_COMPMASK_SLEASE | + IB_SR_COMPMASK_SKEY | IB_SR_COMPMASK_SNAME; + } + user.p_attr = &svc_rec; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A01: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A02: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +ib_api_status_t +osmt_register_service_with_full_key(IN osmtest_t * const p_osmt, + IN ib_net64_t service_id, + IN ib_net16_t service_pkey, + IN ib_net32_t service_lease, + IN uint8_t * service_key, + IN char *service_name) +{ + osmv_query_req_t req; + osmv_user_query_t user; + osmtest_req_context_t context; + ib_service_record_t svc_rec, *p_rec; + osm_log_t *p_log = &p_osmt->log; + ib_api_status_t status; + uint8_t i, skey[16]; + + OSM_LOG_ENTER(p_log); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Registering service: name: %s id: 0x%" PRIx64 "\n", + service_name, cl_ntoh64(service_id)); + + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + memset(&user, 0, sizeof(user)); + memset(&svc_rec, 0, sizeof(svc_rec)); + + /* set the new service record fields */ + svc_rec.service_id = service_id; + svc_rec.service_pkey = service_pkey; + svc_rec.service_gid.unicast.prefix = p_osmt->local_port_gid.unicast.prefix; + svc_rec.service_gid.unicast.interface_id = p_osmt->local_port.port_guid; + svc_rec.service_lease = service_lease; + memset(&svc_rec.service_key, 0, 16 * sizeof(uint8_t)); + memcpy(svc_rec.service_key, service_key, 16 * sizeof(uint8_t)); + memset(svc_rec.service_name, 0, sizeof(svc_rec.service_name)); + memset(skey, 0, 16 * sizeof(uint8_t)); + memcpy(svc_rec.service_name, service_name, + (strlen(service_name) + 1) * sizeof(char)); + + /* prepare the data used for this query */ + /* sa_mad_data.method = IB_MAD_METHOD_SET; */ + /* sa_mad_data.sm_key = 0; */ + + context.p_osmt = p_osmt; + req.query_context = &context; + req.query_type = OSMV_QUERY_USER_DEFINED; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.flags = OSM_SA_FLAGS_SYNC; + req.sm_key = 0; + req.timeout_ms = p_osmt->opt.transaction_timeout; + + user.method = IB_MAD_METHOD_SET; + user.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + if (ib_pkey_is_invalid(service_pkey)) { + /* if given an invalid service_pkey - don't turn the PKEY compmask on */ + user.comp_mask = IB_SR_COMPMASK_SID | + IB_SR_COMPMASK_SGID | + IB_SR_COMPMASK_SLEASE | + IB_SR_COMPMASK_SKEY | IB_SR_COMPMASK_SNAME; + } else { + user.comp_mask = IB_SR_COMPMASK_SID | + IB_SR_COMPMASK_SGID | + IB_SR_COMPMASK_SPKEY | + IB_SR_COMPMASK_SLEASE | + IB_SR_COMPMASK_SKEY | IB_SR_COMPMASK_SNAME; + } + user.p_attr = &svc_rec; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A03: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A04: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + + /* Check service key on context to see if match */ + p_rec = osmv_get_query_svc_rec(context.result.p_result_madw, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Comparing service key...\n" "return key is:\n"); + for (i = 0; i <= 15; i++) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "service_key sent[%u] = %u, service_key returned[%u] = %u\n", + i, service_key[i], i, p_rec->service_key[i]); + } + /* since c15-0.1.14 not supported all key association queries should bring in return zero in service key */ + if (memcmp(skey, p_rec->service_key, 16 * sizeof(uint8_t)) != 0) { + status = IB_REMOTE_ERROR; + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A33: " + "Data mismatch in service_key\n"); + goto Exit; + } + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +ib_api_status_t +osmt_register_service_with_data(IN osmtest_t * const p_osmt, + IN ib_net64_t service_id, + IN ib_net16_t service_pkey, + IN ib_net32_t service_lease, + IN uint8_t service_key_lsb, + IN uint8_t * service_data8, + IN ib_net16_t * service_data16, + IN ib_net32_t * service_data32, + IN ib_net64_t * service_data64, + IN char *service_name) +{ + osmv_query_req_t req; + osmv_user_query_t user; + osmtest_req_context_t context; + ib_service_record_t svc_rec, *p_rec; + osm_log_t *p_log = &p_osmt->log; + ib_api_status_t status; + /* ib_service_record_t* p_rec; */ + + OSM_LOG_ENTER(p_log); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Registering service: name: %s id: 0x%" PRIx64 "\n", + service_name, cl_ntoh64(service_id)); + + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + memset(&user, 0, sizeof(user)); + memset(&svc_rec, 0, sizeof(svc_rec)); + + /* set the new service record fields */ + svc_rec.service_id = service_id; + svc_rec.service_pkey = service_pkey; + svc_rec.service_gid.unicast.prefix = p_osmt->local_port_gid.unicast.prefix; + svc_rec.service_gid.unicast.interface_id = p_osmt->local_port.port_guid; + svc_rec.service_lease = service_lease; + memset(&svc_rec.service_key, 0, 16 * sizeof(uint8_t)); + svc_rec.service_key[0] = service_key_lsb; + + /* Copy data to service_data arrays */ + memcpy(svc_rec.service_data8, service_data8, 16 * sizeof(uint8_t)); + memcpy(svc_rec.service_data16, service_data16, 8 * sizeof(ib_net16_t)); + memcpy(svc_rec.service_data32, service_data32, 4 * sizeof(ib_net32_t)); + memcpy(svc_rec.service_data64, service_data64, 2 * sizeof(ib_net64_t)); + + memset(svc_rec.service_name, 0, sizeof(svc_rec.service_name)); + memcpy(svc_rec.service_name, service_name, + (strlen(service_name) + 1) * sizeof(char)); + + /* prepare the data used for this query */ + /* sa_mad_data.method = IB_MAD_METHOD_SET; */ + /* sa_mad_data.sm_key = 0; */ + + context.p_osmt = p_osmt; + req.query_context = &context; + req.query_type = OSMV_QUERY_USER_DEFINED; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.flags = OSM_SA_FLAGS_SYNC; + req.sm_key = 0; + req.timeout_ms = p_osmt->opt.transaction_timeout; + + user.method = IB_MAD_METHOD_SET; + user.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + if (ib_pkey_is_invalid(service_pkey)) { + /* if given an invalid service_pkey - don't turn the PKEY compmask on */ + user.comp_mask = IB_SR_COMPMASK_SID | + IB_SR_COMPMASK_SGID | + IB_SR_COMPMASK_SLEASE | + IB_SR_COMPMASK_SKEY | + IB_SR_COMPMASK_SNAME | + IB_SR_COMPMASK_SDATA8_0 | + IB_SR_COMPMASK_SDATA8_1 | + IB_SR_COMPMASK_SDATA16_0 | + IB_SR_COMPMASK_SDATA16_1 | + IB_SR_COMPMASK_SDATA32_0 | + IB_SR_COMPMASK_SDATA32_1 | + IB_SR_COMPMASK_SDATA64_0 | IB_SR_COMPMASK_SDATA64_1; + } else { + user.comp_mask = IB_SR_COMPMASK_SID | + IB_SR_COMPMASK_SGID | + IB_SR_COMPMASK_SPKEY | + IB_SR_COMPMASK_SLEASE | + IB_SR_COMPMASK_SKEY | + IB_SR_COMPMASK_SNAME | + IB_SR_COMPMASK_SDATA8_0 | + IB_SR_COMPMASK_SDATA8_1 | + IB_SR_COMPMASK_SDATA16_0 | + IB_SR_COMPMASK_SDATA16_1 | + IB_SR_COMPMASK_SDATA32_0 | + IB_SR_COMPMASK_SDATA32_1 | + IB_SR_COMPMASK_SDATA64_0 | IB_SR_COMPMASK_SDATA64_1; + } + user.p_attr = &svc_rec; + + /* Dump to Service Data b4 send */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Dumping service data b4 send\n"); + osm_dump_service_record(&p_osmt->log, &svc_rec, OSM_LOG_VERBOSE); + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A05: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A06: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + + /* Check data on context to see if match */ + p_rec = osmv_get_query_svc_rec(context.result.p_result_madw, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Comparing service data...\n"); + if (memcmp(service_data8, p_rec->service_data8, 16 * sizeof(uint8_t)) != + 0 + || memcmp(service_data16, p_rec->service_data16, + 8 * sizeof(uint16_t)) != 0 + || memcmp(service_data32, p_rec->service_data32, + 4 * sizeof(uint32_t)) != 0 + || memcmp(service_data64, p_rec->service_data64, + 2 * sizeof(uint64_t)) != 0) { + status = IB_REMOTE_ERROR; + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Data mismatch in service_data8\n"); + goto Exit; + } + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +ib_api_status_t +osmt_get_service_by_id_and_name(IN osmtest_t * const p_osmt, + IN uint32_t rec_num, + IN ib_net64_t sid, + IN char *sr_name, + OUT ib_service_record_t * p_out_rec) +{ + + ib_api_status_t status = IB_SUCCESS; + osmtest_req_context_t context; + osmv_query_req_t req; + ib_service_record_t svc_rec, *p_rec; + uint32_t num_recs = 0; + osmv_user_query_t user; + const uint16_t ERR_SIZE = 512; + char mad_stat_err[ERR_SIZE]; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting service record: id: 0x%016" PRIx64 + " and name: %s\n", cl_ntoh64(sid), sr_name); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + /* prepare the data used for this query */ + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.sm_key = 0; + + memset(&svc_rec, 0, sizeof(svc_rec)); + memset(&user, 0, sizeof(user)); + /* set the new service record fields */ + memset(svc_rec.service_name, 0, sizeof(svc_rec.service_name)); + memcpy(svc_rec.service_name, sr_name, + (strlen(sr_name) + 1) * sizeof(char)); + svc_rec.service_id = sid; + req.p_query_input = &user; + + user.method = IB_MAD_METHOD_GET; + user.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + user.comp_mask = IB_SR_COMPMASK_SID | IB_SR_COMPMASK_SNAME; + user.p_attr = &svc_rec; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A07: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + num_recs = context.result.result_cnt; + + if (status != IB_SUCCESS) { + + /* If the failure is due to IB_SA_MAD_STATUS_NO_RECORDS and rec_num is 0, + then this is fine */ + if (status == IB_REMOTE_ERROR) + strncpy(mad_stat_err, + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw)), + ERR_SIZE -1); + else + strncpy(mad_stat_err, ib_get_err_str(status), + ERR_SIZE -1); + + mad_stat_err[ERR_SIZE -1] = '\0'; + + if (status == IB_REMOTE_ERROR && + !strcmp(mad_stat_err, "IB_SA_MAD_STATUS_NO_RECORDS") && + rec_num == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "IS EXPECTED ERROR ^^^^\n"); + status = IB_SUCCESS; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A08: " + "Query failed: %s (%s)\n", + ib_get_err_str(status), mad_stat_err); + goto Exit; + } + } + + if (rec_num && num_recs != rec_num) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Unmatched number of records: expected: %d, received: %d\n", + rec_num, num_recs); + status = IB_REMOTE_ERROR; + goto Exit; + } + + p_rec = osmv_get_query_svc_rec(context.result.p_result_madw, 0); + *p_out_rec = *p_rec; + + if (num_recs) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Found service record: name: %s id: 0x%016" PRIx64 "\n", + p_rec->service_name, cl_ntoh64(p_rec->service_id)); + + osm_dump_service_record(&p_osmt->log, p_rec, OSM_LOG_DEBUG); + } + +Exit: + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Expected and found %d records\n", rec_num); + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +ib_api_status_t +osmt_get_service_by_id(IN osmtest_t * const p_osmt, + IN uint32_t rec_num, + IN ib_net64_t sid, OUT ib_service_record_t * p_out_rec) +{ + + ib_api_status_t status = IB_SUCCESS; + osmtest_req_context_t context; + osmv_query_req_t req; + ib_service_record_t svc_rec, *p_rec; + uint32_t num_recs = 0; + osmv_user_query_t user; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting service record: id: 0x%016" PRIx64 "\n", + cl_ntoh64(sid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + /* prepare the data used for this query */ + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.sm_key = 0; + + memset(&svc_rec, 0, sizeof(svc_rec)); + memset(&user, 0, sizeof(user)); + /* set the new service record fields */ + svc_rec.service_id = sid; + req.p_query_input = &user; + + user.method = IB_MAD_METHOD_GET; + user.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + user.comp_mask = IB_SR_COMPMASK_SID; + user.p_attr = &svc_rec; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A09: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + num_recs = context.result.result_cnt; + + if (status != IB_SUCCESS) { + char mad_stat_err[256]; + + /* If the failure is due to IB_SA_MAD_STATUS_NO_RECORDS and rec_num is 0, + then this is fine */ + if (status == IB_REMOTE_ERROR) + strcpy(mad_stat_err, + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + else + strcpy(mad_stat_err, ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR && + !strcmp(mad_stat_err, "IB_SA_MAD_STATUS_NO_RECORDS") && + rec_num == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "IS EXPECTED ERROR ^^^^\n"); + status = IB_SUCCESS; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A0A: " + "Query failed: %s (%s)\n", + ib_get_err_str(status), mad_stat_err); + goto Exit; + } + } + + if (rec_num && num_recs != rec_num) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A0B: " + "Unmatched number of records: expected: %d received: %d\n", + rec_num, num_recs); + status = IB_REMOTE_ERROR; + goto Exit; + } + + p_rec = osmv_get_query_svc_rec(context.result.p_result_madw, 0); + *p_out_rec = *p_rec; + + if (num_recs) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Found service record: name: %s id: 0x%016" PRIx64 "\n", + p_rec->service_name, cl_ntoh64(p_rec->service_id)); + + osm_dump_service_record(&p_osmt->log, p_rec, OSM_LOG_DEBUG); + } + +Exit: + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Expected and found %d records\n", rec_num); + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +ib_api_status_t +osmt_get_service_by_name_and_key(IN osmtest_t * const p_osmt, + IN char *sr_name, + IN uint32_t rec_num, + IN uint8_t * skey, + OUT ib_service_record_t * p_out_rec) +{ + + ib_api_status_t status = IB_SUCCESS; + osmtest_req_context_t context; + osmv_query_req_t req; + ib_service_record_t svc_rec, *p_rec; + uint32_t num_recs = 0, i; + osmv_user_query_t user; + const uint16_t ERR_SIZE = 512; + char mad_stat_err[ERR_SIZE]; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting service record: name: %s and key: " + "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + sr_name, skey[0], skey[1], skey[2], skey[3], skey[4], skey[5], + skey[6], skey[7], skey[8], skey[9], skey[10], skey[11], + skey[12], skey[13], skey[14], skey[15]); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + /* prepare the data used for this query */ + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.sm_key = 0; + + memset(&svc_rec, 0, sizeof(svc_rec)); + memset(&user, 0, sizeof(user)); + /* set the new service record fields */ + memset(svc_rec.service_name, 0, sizeof(svc_rec.service_name)); + memcpy(svc_rec.service_name, sr_name, + (strlen(sr_name) + 1) * sizeof(char)); + for (i = 0; i <= 15; i++) + svc_rec.service_key[i] = skey[i]; + + req.p_query_input = &user; + + user.method = IB_MAD_METHOD_GET; + user.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + user.comp_mask = IB_SR_COMPMASK_SNAME | IB_SR_COMPMASK_SKEY; + user.p_attr = &svc_rec; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A0C: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + num_recs = context.result.result_cnt; + + if (status != IB_SUCCESS) { + + /* If the failure is due to IB_SA_MAD_STATUS_NO_RECORDS and rec_num is 0, + then this is fine */ + if (status == IB_REMOTE_ERROR) + strncpy(mad_stat_err, + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw)), + ERR_SIZE -1); + else + strncpy(mad_stat_err, ib_get_err_str(status), + ERR_SIZE -1); + + mad_stat_err[ERR_SIZE -1] = '\0'; + + if (status == IB_REMOTE_ERROR && + !strcmp(mad_stat_err, "IB_SA_MAD_STATUS_NO_RECORDS") && + rec_num == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "IS EXPECTED ERROR ^^^^\n"); + status = IB_SUCCESS; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A0D: " + "Query failed:%s (%s)\n", + ib_get_err_str(status), mad_stat_err); + goto Exit; + } + } + + if (rec_num && num_recs != rec_num) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Unmatched number of records: expected: %d, received: %d\n", + rec_num, num_recs); + status = IB_REMOTE_ERROR; + goto Exit; + } + + p_rec = osmv_get_query_svc_rec(context.result.p_result_madw, 0); + *p_out_rec = *p_rec; + + if (num_recs) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Found service record: name: %s id: 0x%016" PRIx64 "\n", + sr_name, cl_ntoh64(p_rec->service_id)); + + osm_dump_service_record(&p_osmt->log, p_rec, OSM_LOG_DEBUG); + } + +Exit: + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Expected and found %d records\n", rec_num); + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +ib_api_status_t +osmt_get_service_by_name(IN osmtest_t * const p_osmt, + IN char *sr_name, + IN uint32_t rec_num, + OUT ib_service_record_t * p_out_rec) +{ + + ib_api_status_t status = IB_SUCCESS; + osmtest_req_context_t context; + osmv_query_req_t req; + ib_service_record_t *p_rec; + ib_svc_name_t service_name; + uint32_t num_recs = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting service record: name: %s\n", sr_name); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + /* prepare the data used for this query */ + req.query_type = OSMV_QUERY_SVC_REC_BY_NAME; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.sm_key = 0; + + memset(service_name, 0, sizeof(service_name)); + memcpy(service_name, sr_name, (strlen(sr_name) + 1) * sizeof(char)); + req.p_query_input = service_name; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A0E: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + num_recs = context.result.result_cnt; + + if (status != IB_SUCCESS) { + char mad_stat_err[256]; + + /* If the failure is due to IB_SA_MAD_STATUS_NO_RECORDS and rec_num is 0, + then this is fine */ + if (status == IB_REMOTE_ERROR) + strcpy(mad_stat_err, + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + else + strcpy(mad_stat_err, ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR && + !strcmp(mad_stat_err, "IB_SA_MAD_STATUS_NO_RECORDS") && + rec_num == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "IS EXPECTED ERROR ^^^^\n"); + status = IB_SUCCESS; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A0F: " + "Query failed: %s (%s)\n", + ib_get_err_str(status), mad_stat_err); + goto Exit; + } + } + + if (rec_num && num_recs != rec_num) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A10: " + "Unmatched number of records: expected: %d, received: %d\n", + rec_num, num_recs); + status = IB_REMOTE_ERROR; + goto Exit; + } + + p_rec = osmv_get_query_svc_rec(context.result.p_result_madw, 0); + *p_out_rec = *p_rec; + + if (num_recs) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Found service record: name: %s id: 0x%016" PRIx64 "\n", + sr_name, cl_ntoh64(p_rec->service_id)); + + osm_dump_service_record(&p_osmt->log, p_rec, OSM_LOG_DEBUG); + } + +Exit: + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Expected and found %d records\n", rec_num); + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +#ifdef VENDOR_RMPP_SUPPORT +ib_api_status_t +osmt_get_all_services_and_check_names(IN osmtest_t * const p_osmt, + IN ib_svc_name_t * + const p_valid_service_names_arr, + IN uint8_t num_of_valid_names, + OUT uint32_t * num_services) +{ + ib_api_status_t status = IB_SUCCESS; + osmtest_req_context_t context; + osmv_query_req_t req; + ib_service_record_t *p_rec; + uint32_t num_recs = 0, i, j; + uint8_t *p_checked_names; + + OSM_LOG_ENTER(&p_osmt->log); + + /* Prepare tracker for the checked names */ + p_checked_names = + (uint8_t *) malloc(sizeof(uint8_t) * num_of_valid_names); + for (j = 0; j < num_of_valid_names; j++) { + p_checked_names[j] = 0; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Getting all service records\n"); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + req.query_type = OSMV_QUERY_ALL_SVC_RECS; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A12: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + if (status != IB_INVALID_PARAMETER) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A13: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + } + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + + num_recs = context.result.result_cnt; + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = osmv_get_query_svc_rec(context.result.p_result_madw, i); + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Found service record: name: %s id: 0x%016" PRIx64 "\n", + p_rec->service_name, cl_ntoh64(p_rec->service_id)); + osm_dump_service_record(&p_osmt->log, p_rec, OSM_LOG_VERBOSE); + for (j = 0; j < num_of_valid_names; j++) { + /* If the service names exist in the record, mark it as checked (1) */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "-I- Comparing source name : >%s<, with record name : >%s<, idx : %d\n", + p_valid_service_names_arr[j], + p_rec->service_name, p_checked_names[j]); + if (strcmp + ((char *)p_valid_service_names_arr[j], + (char *)p_rec->service_name) == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "-I- The service %s is valid\n", + p_valid_service_names_arr[j]); + p_checked_names[j] = 1; + break; + } + } + } + /* Check that all service names have been identified */ + for (j = 0; j < num_of_valid_names; j++) + if (p_checked_names[j] == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A14: " + "Missing valid service: name: %s\n", + p_valid_service_names_arr[j]); + status = IB_ERROR; + goto Exit; + } + *num_services = num_recs; + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + free(p_checked_names); + OSM_LOG_EXIT(&p_osmt->log); + return status; +} +#endif + +ib_api_status_t +osmt_delete_service_by_name(IN osmtest_t * const p_osmt, + IN uint8_t IsServiceExist, + IN char *sr_name, IN uint32_t rec_num) +{ + osmv_query_req_t req; + osmv_user_query_t user; + osmtest_req_context_t context; + ib_service_record_t svc_rec; + ib_api_status_t status; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Trying to Delete service name: %s\n", sr_name); + + memset(&svc_rec, 0, sizeof(svc_rec)); + + status = osmt_get_service_by_name(p_osmt, sr_name, rec_num, &svc_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A15: " + "Failed to get service: name: %s\n", sr_name); + goto ExitNoDel; + } + + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + memset(&user, 0, sizeof(user)); + + /* set the new service record fields */ + memset(svc_rec.service_name, 0, sizeof(svc_rec.service_name)); + memcpy(svc_rec.service_name, sr_name, + (strlen(sr_name) + 1) * sizeof(char)); + + /* prepare the data used for this query */ + context.p_osmt = p_osmt; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.query_context = &context; + req.query_type = OSMV_QUERY_USER_DEFINED; /* basically a don't care here */ + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.flags = OSM_SA_FLAGS_SYNC; + req.sm_key = 0; + + user.method = IB_MAD_METHOD_DELETE; + user.attr_id = IB_MAD_ATTR_SERVICE_RECORD; + user.comp_mask = IB_SR_COMPMASK_SNAME; + user.p_attr = &svc_rec; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A16: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + if (IsServiceExist) { + /* If IsServiceExist = 1 then we should succeed here */ + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A17: " + "ib_query failed (%s)\n", + ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 4A18: Remote error = %s\n", + ib_get_mad_status_str + (osm_madw_get_mad_ptr + (context.result.p_result_madw))); + } + } + } else { + /* If IsServiceExist = 0 then we should fail here */ + if (status == IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A19: " + "Succeeded to delete service: %s which " + "shouldn't exist", sr_name); + status = IB_ERROR; + } else { + /* The deletion should have failed, since the service_name + shouldn't exist. */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "IS EXPECTED ERROR ^^^^\n"); + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Failed to delete service_name: %s\n", sr_name); + status = IB_SUCCESS; + } + } + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + +ExitNoDel: + OSM_LOG_EXIT(&p_osmt->log); + return status; +} + +/* + * Run a complete service records flow: + * - register a service + * - register a service (with a lease period) + * - get a service by name + * - get all services / must be 2 + * - delete a service + * - get all services / must be 1 + * - wait for the lease to expire + * - get all services / must be 0 + * - get / set service by data + */ +ib_api_status_t osmt_run_service_records_flow(IN osmtest_t * const p_osmt) +{ + ib_service_record_t srv_rec; + ib_api_status_t status; + uint8_t instance, i; + uint8_t service_data8[16], service_key[16]; + ib_net16_t service_data16[8]; + ib_net32_t service_data32[4]; + ib_net64_t service_data64[2]; + uint64_t pid = getpid(); + uint64_t id[7]; + /* We use up to seven service names - we use the extra for bad flow */ + ib_svc_name_t service_name[7]; +#ifdef VENDOR_RMPP_SUPPORT + /* This array contain only the valid names after registering vs SM */ + ib_svc_name_t service_valid_names[3]; + uint32_t num_recs = 0; +#endif + + OSM_LOG_ENTER(&p_osmt->log); + + /* Init Service names */ + for (i = 0; i < 7; i++) { +#ifdef __WIN__ + uint64_t rand_val = rand() - (uint64_t) i; +#else + uint64_t rand_val = random() - (uint64_t) i; +#endif + id[i] = abs((int)(pid - rand_val)); + /* Just to be unique any place on any host */ + sprintf((char *)(service_name[i]), + "osmt.srvc.%" PRIu64 ".%" PRIu64, rand_val, pid); + /*printf("-I- Service Name is : %s, ID is : 0x%" PRIx64 "\n",service_name[i],id[i]); */ + } + + status = osmt_register_service(p_osmt, cl_ntoh64(id[0]), /* IN ib_net64_t service_id, */ + IB_DEFAULT_PKEY, /* IN ib_net16_t service_pkey, */ + 0xFFFFFFFF, /* IN ib_net32_t service_lease, */ + 11, /* IN uint8_t service_key_lsb, */ + (char *)service_name[0] /* IN char *service_name */ + ); + if (status != IB_SUCCESS) { + goto Exit; + } + + status = osmt_register_service(p_osmt, cl_ntoh64(id[1]), /* IN ib_net64_t service_id, */ + IB_DEFAULT_PKEY, /* IN ib_net16_t service_pkey, */ + cl_hton32(0x00000004), /* IN ib_net32_t service_lease, */ + 11, /* IN uint8_t service_key_lsb, */ + (char *)service_name[1] /* IN char *service_name */ + ); + if (status != IB_SUCCESS) { + goto Exit; + } + + status = osmt_register_service(p_osmt, cl_ntoh64(id[2]), /* IN ib_net64_t service_id, */ + 0, /* IN ib_net16_t service_pkey, */ + 0xFFFFFFFF, /* IN ib_net32_t service_lease, */ + 11, /* Remove Service Record IN uint8_t service_key_lsb, */ + (char *)service_name[2] /* IN char *service_name */ + ); + + if (status != IB_SUCCESS) { + goto Exit; + } + + /* Generate 2 instances of service record with consecutive data */ + for (instance = 0; instance < 2; instance++) { + /* First, clear all arrays */ + memset(service_data8, 0, 16 * sizeof(uint8_t)); + memset(service_data16, 0, 8 * sizeof(uint16_t)); + memset(service_data32, 0, 4 * sizeof(uint32_t)); + memset(service_data64, 0, 2 * sizeof(uint64_t)); + service_data8[instance] = instance + 1; + service_data16[instance] = cl_hton16(instance + 2); + service_data32[instance] = cl_hton32(instance + 3); + service_data64[instance] = cl_hton64(instance + 4); + status = osmt_register_service_with_data(p_osmt, cl_ntoh64(id[3]), /* IN ib_net64_t service_id, */ + IB_DEFAULT_PKEY, /* IN ib_net16_t service_pkey, */ + cl_ntoh32(10), /* IN ib_net32_t service_lease, */ + 12, /* IN uint8_t service_key_lsb, */ + service_data8, service_data16, service_data32, service_data64, /* service data structures */ + (char *)service_name[3] /* IN char *service_name */ + ); + + if (status != IB_SUCCESS) { + goto Exit; + } + + } + + /* Trying to create service with zero key */ + memset(service_key, 0, 16 * sizeof(uint8_t)); + status = osmt_register_service_with_full_key(p_osmt, cl_ntoh64(id[5]), /* IN ib_net64_t service_id, */ + 0, /* IN ib_net16_t service_pkey, */ + 0xFFFFFFFF, /* IN ib_net32_t service_lease, */ + service_key, /* full service_key, */ + (char *)service_name[5] /* IN char *service_name */ + ); + + if (status != IB_SUCCESS) { + goto Exit; + } + + /* Now update it with Unique key and different service name */ + for (i = 0; i <= 15; i++) { + service_key[i] = i + 1; + } + status = osmt_register_service_with_full_key(p_osmt, cl_ntoh64(id[5]), /* IN ib_net64_t service_id, */ + 0, /* IN ib_net16_t service_pkey, */ + 0xFFFFFFFF, /* IN ib_net32_t service_lease, */ + service_key, /* full service_key, */ + (char *)service_name[6] /* IN char *service_name */ + ); + if (status != IB_SUCCESS) { + goto Exit; + } + + /* Let OpenSM handle it */ + usleep(100); + + /* Make sure service_name[0] exists */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[0], 1, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A1A: " + "Fail to find service: name: %s\n", + (char *)service_name[0]); + status = IB_ERROR; + goto Exit; + } + + /* Make sure service_name[1] exists */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[1], 1, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A1B: " + "Fail to find service: name: %s\n", + (char *)service_name[1]); + status = IB_ERROR; + goto Exit; + } + + /* Make sure service_name[2] exists */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[2], 1, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A1C: " + "Fail to find service: name: %s\n", + (char *)service_name[2]); + status = IB_ERROR; + goto Exit; + } + + /* Make sure service_name[3] exists. */ + /* After 10 seconds the service should not exist: service_lease = 10 */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[3], 1, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A1D: " + "Fail to find service: name: %s\n", + (char *)service_name[3]); + status = IB_ERROR; + goto Exit; + } + + sleep(10); + + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[3], 0, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A1E: " + "Found service: name: %s that should have been " + "deleted due to service lease expiring\n", + (char *)service_name[3]); + status = IB_ERROR; + goto Exit; + } + + /* Check that for service: id[5] only one record exists */ + status = osmt_get_service_by_id(p_osmt, 1, cl_ntoh64(id[5]), &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A1F: " + "Found number of records != 1 for " + "service: id: 0x%016" PRIx64 "\n", id[5]); + status = IB_ERROR; + goto Exit; + } + + /* Bad Flow of Get with invalid Service ID: id[6] */ + status = osmt_get_service_by_id(p_osmt, 0, cl_ntoh64(id[6]), &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A20: " + "Found service: id: 0x%016" PRIx64 " " + "that is invalid\n", id[6]); + status = IB_ERROR; + goto Exit; + } + + /* Check by both id and service name: id[0], service_name[0] */ + status = osmt_get_service_by_id_and_name(p_osmt, 1, cl_ntoh64(id[0]), + (char *)service_name[0], + &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A21: " + "Fail to find service: id: 0x%016" PRIx64 " " + "name: %s\n", id[0], (char *)service_name[0]); + status = IB_ERROR; + goto Exit; + } + + /* Check by both id and service name: id[5], service_name[6] */ + status = osmt_get_service_by_id_and_name(p_osmt, 1, cl_ntoh64(id[5]), + (char *)service_name[6], + &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A22: " + "Fail to find service: id: 0x%016" PRIx64 " " + "name: %s\n", id[5], (char *)service_name[6]); + status = IB_ERROR; + goto Exit; + } + + /* Bad Flow of Get with invalid name(service_name[3]) and valid ID(id[0]) */ + status = osmt_get_service_by_id_and_name(p_osmt, 0, cl_ntoh64(id[0]), + (char *)service_name[3], + &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A23: " + "Found service: id: 0x%016" PRIx64 + "name: %s which is an invalid service\n", + id[0], (char *)service_name[3]); + status = IB_ERROR; + goto Exit; + } + + /* Bad Flow of Get with unmatched name(service_name[5]) and id(id[3]) (both valid) */ + status = osmt_get_service_by_id_and_name(p_osmt, 0, cl_ntoh64(id[3]), + (char *)service_name[5], + &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A24: " + "Found service: id: 0x%016" PRIx64 + "name: %s which is an invalid service\n", + id[3], (char *)service_name[5]); + status = IB_ERROR; + goto Exit; + } + + /* Bad Flow of Get with service name that doesn't exist (service_name[4]) */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[4], 0, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A25: " + "Found service: name: %s that shouldn't exist\n", + (char *)service_name[4]); + status = IB_ERROR; + goto Exit; + } + + /* Bad Flow : Check that getting service_name[5] brings no records since another service + has been updated with the same ID (service_name[6] */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[5], 0, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A26: " + "Found service: name: %s which is an " + "invalid service\n", (char *)service_name[5]); + status = IB_ERROR; + goto Exit; + } + + /* Check that getting service_name[6] by name ONLY is valid, + since we do not support key&name association, also trusted queries */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[6], 1, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A27: " + "Fail to find service: name: %s\n", + (char *)service_name[6]); + status = IB_ERROR; + goto Exit; + } + + /* Test Service Key */ + memset(service_key, 0, 16 * sizeof(uint8_t)); + + /* Check for service_name[5] with service_key=0 - the service shouldn't + exist with this name. */ + status = osmt_get_service_by_name_and_key(p_osmt, + (char *)service_name[5], + 0, service_key, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A28: " + "Found service: name: %s key:0 which is an " + "invalid service (wrong name)\n", + (char *)service_name[5]); + status = IB_ERROR; + goto Exit; + } + + /* Check for service_name[6] with service_key=0 - the service should + exist with different key. */ + status = osmt_get_service_by_name_and_key(p_osmt, + (char *)service_name[6], + 0, service_key, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A29: " + "Found service: name: %s key: 0 which is an " + "invalid service (wrong service_key)\n", + (char *)service_name[6]); + status = IB_ERROR; + goto Exit; + } + + /* check for service_name[6] with the correct service_key */ + for (i = 0; i <= 15; i++) + service_key[i] = i + 1; + status = osmt_get_service_by_name_and_key(p_osmt, + (char *)service_name[6], + 1, service_key, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A2A: " + "Fail to find service: name: %s with " + "correct service key\n", (char *)service_name[6]); + status = IB_ERROR; + goto Exit; + } +#ifdef VENDOR_RMPP_SUPPORT + /* These ar the only service_names which are valid */ + memcpy(&service_valid_names[0], &service_name[0], sizeof(uint8_t) * 64); + memcpy(&service_valid_names[1], &service_name[2], sizeof(uint8_t) * 64); + memcpy(&service_valid_names[2], &service_name[6], sizeof(uint8_t) * 64); + + status = + osmt_get_all_services_and_check_names(p_osmt, service_valid_names, + 3, &num_recs); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A2B: " + "Fail to find all services that should exist\n"); + status = IB_ERROR; + goto Exit; + } +#endif + + /* Delete service_name[0] */ + status = osmt_delete_service_by_name(p_osmt, 1, + (char *)service_name[0], 1); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A2C: " + "Fail to delete service: name: %s\n", + (char *)service_name[0]); + status = IB_ERROR; + goto Exit; + } + + /* Make sure deletion of service_name[0] succeeded */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[0], 0, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A2D: " + "Found service: name: %s that was deleted\n", + (char *)service_name[0]); + status = IB_ERROR; + goto Exit; + } + + /* Make sure service_name[1] doesn't exist (expired service lease) */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[1], 0, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A2E: " + "Found service: name: %s that should have expired\n", + (char *)service_name[1]); + status = IB_ERROR; + goto Exit; + } + + /* Make sure service_name[2] exists */ + status = osmt_get_service_by_name(p_osmt, + (char *)service_name[2], 1, &srv_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A2F: " + "Fail to find service: name: %s\n", + (char *)service_name[2]); + status = IB_ERROR; + goto Exit; + } + + /* Bad Flow - try to delete non-existent service_name[5] */ + status = osmt_delete_service_by_name(p_osmt, 0, + (char *)service_name[5], 0); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A30: " + "Succeed to delete non-existent service: name: %s\n", + (char *)service_name[5]); + status = IB_ERROR; + goto Exit; + } + + /* Delete service_name[2] */ + status = osmt_delete_service_by_name(p_osmt, 1, + (char *)service_name[2], 1); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A31: " + "Fail to delete service: name: %s\n", + (char *)service_name[2]); + status = IB_ERROR; + goto Exit; + } + + /* Delete service_name[6] */ + status = osmt_delete_service_by_name(p_osmt, 1, + (char *)service_name[6], 1); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 4A32: " + "Failed to delete service name: %s\n", + (char *)service_name[6]); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return status; +} diff --git a/osmtest/osmt_slvl_vl_arb.c b/osmtest/osmt_slvl_vl_arb.c new file mode 100644 index 0000000..9e36c04 --- /dev/null +++ b/osmtest/osmt_slvl_vl_arb.c @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Implementation of SLtoVL and VL Arbitration testing flow.. + * Top level is osmt_run_slvl_and_vlarb_records_flow: + * osmt_query_all_ports_vl_arb + * osmt_query_all_ports_slvl_map + * + */ + +#ifndef __WIN__ +#include +#endif +#include +#include +#include +#include +#include "osmtest.h" + +static ib_api_status_t +osmtest_write_vl_arb_table(IN osmtest_t * const p_osmt, + IN FILE * fh, + IN const ib_vl_arb_table_record_t * const p_rec) +{ + int i; + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + fprintf(fh, + "VL_ARBITRATION_TABLE\n" + "lid 0x%X\n" + "port_num 0x%X\n" + "block 0x%X\n", + cl_ntoh16(p_rec->lid), + p_rec->port_num, p_rec->block_num); + + fprintf(fh, " "); + for (i = 0; i < 32; i++) + fprintf(fh, "| %-2u ", i); + fprintf(fh, "|\nVL: "); + + for (i = 0; i < 32; i++) + fprintf(fh, "|0x%02X", p_rec->vl_arb_tbl.vl_entry[i].vl); + fprintf(fh, "|\nWEIGHT:"); + + for (i = 0; i < 32; i++) + fprintf(fh, "|0x%02X", p_rec->vl_arb_tbl.vl_entry[i].weight); + fprintf(fh, "|\nEND\n\n"); + + /* Exit: */ + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * GET A SINGLE PORT INFO BY NODE LID AND PORT NUMBER + **********************************************************************/ +ib_api_status_t +osmt_query_vl_arb(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN uint8_t const port_num, + IN uint8_t const block_num, IN FILE * fh) +{ + osmtest_req_context_t context; + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_vl_arb_table_record_t record, *p_rec; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Getting VL_Arbitration Table for port with LID 0x%X Num:0x%X\n", + cl_ntoh16(lid), port_num); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + record.lid = lid; + record.port_num = port_num; + record.block_num = block_num; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_VLARB_BY_LID_PORT_BLOCK; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0405: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0466: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + + /* ok it worked */ + p_rec = osmv_get_query_result(context.result.p_result_madw, 0); + if (fh) { + osmtest_write_vl_arb_table(p_osmt, fh, p_rec); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmt_query_all_ports_vl_arb(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + cl_status_t status = CL_SUCCESS; + cl_qmap_t *p_tbl; + port_t *p_src_port; + uint8_t block, anyErr = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Obtaining ALL Ports VL Arbitration Tables\n"); + + /* + * Go over all ports that exist in the subnet + * get the relevant VLarbs + */ + + p_tbl = &p_osmt->exp_subn.port_key_tbl; + + p_src_port = (port_t *) cl_qmap_head(p_tbl); + + while (p_src_port != (port_t *) cl_qmap_end(p_tbl)) { + + /* HACK we use capability_mask to know diff a CA port from switch port */ + if (p_src_port->rec.port_info.capability_mask) { + /* this is an hca port */ + for (block = 1; block <= 4; block++) { + /* NOTE to comply we must set port number to 0 and the SA should figure it out */ + /* since it is a CA port */ + status = + osmt_query_vl_arb(p_osmt, + p_src_port->rec.lid, 0, + block, fh); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0467: " + "Failed to get Lid:0x%X Port:0x%X (%s)\n", + cl_ntoh16(p_src_port->rec.lid), + 0, ib_get_err_str(status)); + anyErr = 1; + } + } + } else { + /* this is a switch port */ + for (block = 1; block <= 4; block++) { + status = + osmt_query_vl_arb(p_osmt, + p_src_port->rec.lid, + p_src_port->rec.port_num, + block, fh); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0468: " + "Failed to get Lid:0x%X Port:0x%X (%s)\n", + cl_ntoh16(p_src_port->rec.lid), + p_src_port->rec.port_num, + ib_get_err_str(status)); + anyErr = 1; + } + } + } + + p_src_port = (port_t *) cl_qmap_next(&p_src_port->map_item); + } + + OSM_LOG_EXIT(&p_osmt->log); + if (anyErr) { + status = IB_ERROR; + } + return (status); +} + +/******************************************************************************* + SLtoVL +*******************************************************************************/ +static ib_api_status_t +osmtest_write_slvl_map_table(IN osmtest_t * const p_osmt, + IN FILE * fh, + IN const ib_slvl_table_record_t * const p_rec) +{ + int i; + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + fprintf(fh, + "SLtoVL_MAP_TABLE\n" + "lid 0x%X\n" + "in_port_num 0x%X\n" + "out_port_num 0x%X\n", + cl_ntoh16(p_rec->lid), + p_rec->in_port_num, p_rec->out_port_num); + + fprintf(fh, "SL:"); + for (i = 0; i < 16; i++) + fprintf(fh, "| %-2u ", i); + fprintf(fh, "|\nVL:"); + + for (i = 0; i < 16; i++) + fprintf(fh, "| 0x%01X ", + ib_slvl_table_get(&p_rec->slvl_tbl, (uint8_t) i)); + fprintf(fh, "|\nEND\n\n"); + + /* Exit: */ + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * GET A SINGLE PORT INFO BY NODE LID AND PORT NUMBER + **********************************************************************/ +ib_api_status_t +osmt_query_slvl_map(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN uint8_t const out_port_num, + IN uint8_t const in_port_num, IN FILE * fh) +{ + osmtest_req_context_t context; + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_slvl_table_record_t record, *p_rec; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Getting SLtoVL Map Table for out-port with LID 0x%X Num:0x%X from In-Port:0x%X\n", + cl_ntoh16(lid), out_port_num, in_port_num); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + record.lid = lid; + record.in_port_num = in_port_num; + record.out_port_num = out_port_num; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_SLVL_BY_LID_AND_PORTS; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0469: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0470: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + + /* ok it worked */ + p_rec = osmv_get_query_result(context.result.p_result_madw, 0); + if (fh) { + osmtest_write_slvl_map_table(p_osmt, fh, p_rec); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmt_query_all_ports_slvl_map(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + cl_status_t status = CL_SUCCESS; + cl_qmap_t *p_tbl; + port_t *p_src_port; + uint8_t in_port, anyErr = 0, num_ports; + node_t *p_node; + const cl_qmap_t *p_node_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Go over all ports that exist in the subnet + * get the relevant SLtoVLs + */ + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Obtaining ALL Ports (to other ports) SLtoVL Maps\n"); + + p_tbl = &p_osmt->exp_subn.port_key_tbl; + p_node_tbl = &p_osmt->exp_subn.node_lid_tbl; + + p_src_port = (port_t *) cl_qmap_head(p_tbl); + + while (p_src_port != (port_t *) cl_qmap_end(p_tbl)) { + + /* HACK we use capability_mask to know diff a CA port from switch port */ + if (p_src_port->rec.port_info.capability_mask) { + /* this is an hca port */ + /* NOTE to comply we must set port number to 0 and the SA should figure it out */ + /* since it is a CA port */ + status = + osmt_query_slvl_map(p_osmt, p_src_port->rec.lid, 0, + 0, fh); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0471: " + "Failed to get Lid:0x%X In-Port:0x%X Out-Port:0x%X(%s)\n", + cl_ntoh16(p_src_port->rec.lid), 0, 0, + ib_get_err_str(status)); + anyErr = 1; + } + } else { + /* this is a switch port */ + /* get the node */ + p_node = + (node_t *) cl_qmap_get(p_node_tbl, + p_src_port->rec.lid); + if (p_node == (node_t *) cl_qmap_end(p_node_tbl)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0472: " + "Failed to get Node by Lid:0x%X\n", + p_src_port->rec.lid); + goto Exit; + } + + num_ports = p_node->rec.node_info.num_ports; + + for (in_port = 1; in_port <= num_ports; in_port++) { + status = + osmt_query_slvl_map(p_osmt, + p_src_port->rec.lid, + p_src_port->rec. + port_num, in_port, fh); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0473: " + "Failed to get Lid:0x%X In-Port:0x%X Out-Port:0x%X (%s)\n", + cl_ntoh16(p_src_port->rec.lid), + p_src_port->rec.port_num, + in_port, + ib_get_err_str(status)); + anyErr = 1; + } + } + } + + p_src_port = (port_t *) cl_qmap_next(&p_src_port->map_item); + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + if (anyErr) { + status = IB_ERROR; + } + return (status); +} + +/* + * Run a vl arbitration queries and sl2vl maps queries flow: + * Good flow: + * - for each physical port on the network - obtain the VL Arb + * - for each CA physical port obtain its SLtoVL Map + * - for each SW physical port (out) obtain the SLtoVL Map to each other port + * BAD flow: + * - Try get with multiple results + * - Try gettable + * - Try providing non existing port + */ +ib_api_status_t +osmt_run_slvl_and_vlarb_records_flow(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status; + FILE *fh; + ib_net16_t test_lid; + uint8_t lmc; + + OSM_LOG_ENTER(&p_osmt->log); + + fh = fopen("qos.txt", "w"); + if (!fh) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0474: " + "Failed to open file qos.txt for writing\n"); + status = IB_ERROR; + goto Exit; + } + + /* go over all ports in the subnet */ + status = osmt_query_all_ports_vl_arb(p_osmt, fh); + if (status != IB_SUCCESS) { + goto Exit; + } + + status = osmt_query_all_ports_slvl_map(p_osmt, fh); + if (status != IB_SUCCESS) { + goto Exit; + } + + /* If LMC > 0, test non base LID SA QoS Record requests */ + status = + osmtest_get_local_port_lmc(p_osmt, p_osmt->local_port.lid, &lmc); + if (status != IB_SUCCESS) + goto Exit; + + if (lmc != 0) { + test_lid = cl_ntoh16(p_osmt->local_port.lid + 1); + + status = osmt_query_vl_arb(p_osmt, test_lid, 0, 1, NULL); + if (status != IB_SUCCESS) + goto Exit; + + status = osmt_query_slvl_map(p_osmt, test_lid, 0, 0, NULL); + if (status != IB_SUCCESS) + goto Exit; + } + +Exit: + if (fh) + fclose(fh); + OSM_LOG_EXIT(&p_osmt->log); + return status; +} diff --git a/osmtest/osmtest.c b/osmtest/osmtest.c new file mode 100644 index 0000000..6e39d9c --- /dev/null +++ b/osmtest/osmtest.c @@ -0,0 +1,7639 @@ +/* + * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* TODO : Check why we dont free the cl_qmap_items we store when reading DB */ + +/* + * Abstract: + * Implementation of osmtest_t. + * This object represents the OSMTest Test object. + * + */ + +#include +#include +#include +#include +#include +#include "osmtest.h" + +#define POOL_MIN_ITEMS 64 +#define MAX_LOCAL_IBPORTS 64 + +typedef struct _osmtest_sm_info_rec { + ib_net64_t sm_guid; + ib_net16_t lid; + uint8_t priority; + uint8_t sm_state; +} osmtest_sm_info_rec_t; + +typedef struct _osmtest_inform_info { + boolean_t subscribe; + ib_net32_t qpn; + ib_net16_t trap; +} osmtest_inform_info_t; + +typedef struct _osmtest_inform_info_rec { + ib_gid_t subscriber_gid; + ib_net16_t subscriber_enum; +} osmtest_inform_info_rec_t; + +typedef enum _osmtest_token_val { + OSMTEST_TOKEN_COMMENT = 0, + OSMTEST_TOKEN_END, + OSMTEST_TOKEN_DEFINE_NODE, + OSMTEST_TOKEN_DEFINE_PORT, + OSMTEST_TOKEN_DEFINE_PATH, + OSMTEST_TOKEN_DEFINE_LINK, + OSMTEST_TOKEN_LID, + OSMTEST_TOKEN_BASE_VERSION, + OSMTEST_TOKEN_CLASS_VERSION, + OSMTEST_TOKEN_NODE_TYPE, + OSMTEST_TOKEN_NUM_PORTS, + OSMTEST_TOKEN_SYS_GUID, + OSMTEST_TOKEN_NODE_GUID, + OSMTEST_TOKEN_PORT_GUID, + OSMTEST_TOKEN_PARTITION_CAP, + OSMTEST_TOKEN_DEVICE_ID, + OSMTEST_TOKEN_REVISION, + OSMTEST_TOKEN_PORT_NUM, + OSMTEST_TOKEN_VENDOR_ID, + OSMTEST_TOKEN_DGID, + OSMTEST_TOKEN_SGID, + OSMTEST_TOKEN_DLID, + OSMTEST_TOKEN_SLID, + OSMTEST_TOKEN_HOP_FLOW_RAW, + OSMTEST_TOKEN_TCLASS, + OSMTEST_TOKEN_NUM_PATH, + OSMTEST_TOKEN_PKEY, + OSMTEST_TOKEN_SL, + OSMTEST_TOKEN_RATE, + OSMTEST_TOKEN_PKT_LIFE, + OSMTEST_TOKEN_PREFERENCE, + OSMTEST_TOKEN_MKEY, + OSMTEST_TOKEN_SUBN_PREF, + OSMTEST_TOKEN_BASE_LID, + OSMTEST_TOKEN_SM_BASE_LID, + OSMTEST_TOKEN_CAP_MASK, + OSMTEST_TOKEN_DIAG_CODE, + OSMTEST_TOKEN_MKEY_LEASE_PER, + OSMTEST_TOKEN_LOC_PORT_NUM, + OSMTEST_TOKEN_LINK_WID_EN, + OSMTEST_TOKEN_LINK_WID_SUP, + OSMTEST_TOKEN_LINK_WID_ACT, + OSMTEST_TOKEN_LINK_SPEED_SUP, + OSMTEST_TOKEN_PORT_STATE, + OSMTEST_TOKEN_STATE_INFO2, + OSMTEST_TOKEN_MKEY_PROT_BITS, + OSMTEST_TOKEN_LMC, + OSMTEST_TOKEN_LINK_SPEED, + OSMTEST_TOKEN_MTU_SMSL, + OSMTEST_TOKEN_VL_CAP, + OSMTEST_TOKEN_VL_HIGH_LIMIT, + OSMTEST_TOKEN_VL_ARB_HIGH_CAP, + OSMTEST_TOKEN_VL_ARB_LOW_CAP, + OSMTEST_TOKEN_MTU_CAP, + OSMTEST_TOKEN_VL_STALL_LIFE, + OSMTEST_TOKEN_VL_ENFORCE, + OSMTEST_TOKEN_MKEY_VIOL, + OSMTEST_TOKEN_PKEY_VIOL, + OSMTEST_TOKEN_QKEY_VIOL, + OSMTEST_TOKEN_GUID_CAP, + OSMTEST_TOKEN_SUBN_TIMEOUT, + OSMTEST_TOKEN_RESP_TIME_VAL, + OSMTEST_TOKEN_ERR_THRESHOLD, + OSMTEST_TOKEN_MTU, + OSMTEST_TOKEN_FROMLID, + OSMTEST_TOKEN_FROMPORTNUM, + OSMTEST_TOKEN_TOPORTNUM, + OSMTEST_TOKEN_TOLID, + OSMTEST_TOKEN_UNKNOWN +} osmtest_token_val_t; + +typedef struct _osmtest_token { + osmtest_token_val_t val; + size_t str_size; + const char *str; +} osmtest_token_t; + +static const osmtest_token_t token_array[] = { + {OSMTEST_TOKEN_COMMENT, 1, "#"}, + {OSMTEST_TOKEN_END, 3, "END"}, + {OSMTEST_TOKEN_DEFINE_NODE, 11, "DEFINE_NODE"}, + {OSMTEST_TOKEN_DEFINE_PORT, 11, "DEFINE_PORT"}, + {OSMTEST_TOKEN_DEFINE_PATH, 11, "DEFINE_PATH"}, + {OSMTEST_TOKEN_DEFINE_LINK, 11, "DEFINE_LINK"}, + {OSMTEST_TOKEN_LID, 3, "LID"}, + {OSMTEST_TOKEN_BASE_VERSION, 12, "BASE_VERSION"}, + {OSMTEST_TOKEN_CLASS_VERSION, 13, "CLASS_VERSION"}, + {OSMTEST_TOKEN_NODE_TYPE, 9, "NODE_TYPE"}, + {OSMTEST_TOKEN_NUM_PORTS, 9, "NUM_PORTS"}, + {OSMTEST_TOKEN_SYS_GUID, 8, "SYS_GUID"}, + {OSMTEST_TOKEN_NODE_GUID, 9, "NODE_GUID"}, + {OSMTEST_TOKEN_PORT_GUID, 9, "PORT_GUID"}, + {OSMTEST_TOKEN_PARTITION_CAP, 13, "PARTITION_CAP"}, + {OSMTEST_TOKEN_DEVICE_ID, 9, "DEVICE_ID"}, + {OSMTEST_TOKEN_REVISION, 8, "REVISION"}, + {OSMTEST_TOKEN_PORT_NUM, 8, "PORT_NUM"}, + {OSMTEST_TOKEN_VENDOR_ID, 9, "VENDOR_ID"}, + {OSMTEST_TOKEN_DGID, 4, "DGID"}, + {OSMTEST_TOKEN_SGID, 4, "SGID"}, + {OSMTEST_TOKEN_DLID, 4, "DLID"}, + {OSMTEST_TOKEN_SLID, 4, "SLID"}, + {OSMTEST_TOKEN_HOP_FLOW_RAW, 12, "HOP_FLOW_RAW"}, + {OSMTEST_TOKEN_TCLASS, 6, "TCLASS"}, + {OSMTEST_TOKEN_NUM_PATH, 8, "NUM_PATH"}, + {OSMTEST_TOKEN_PKEY, 4, "PKEY"}, + {OSMTEST_TOKEN_SL, 2, "SL"}, + {OSMTEST_TOKEN_RATE, 4, "RATE"}, + {OSMTEST_TOKEN_PKT_LIFE, 8, "PKT_LIFE"}, + {OSMTEST_TOKEN_PREFERENCE, 10, "PREFERENCE"}, + {OSMTEST_TOKEN_SUBN_PREF, 13, "SUBNET_PREFIX"}, + {OSMTEST_TOKEN_BASE_LID, 8, "BASE_LID"}, + {OSMTEST_TOKEN_SM_BASE_LID, 18, "MASTER_SM_BASE_LID"}, + {OSMTEST_TOKEN_CAP_MASK, 15, "CAPABILITY_MASK"}, + {OSMTEST_TOKEN_DIAG_CODE, 9, "DIAG_CODE"}, + {OSMTEST_TOKEN_MKEY_LEASE_PER, 18, "m_key_lease_period"}, + {OSMTEST_TOKEN_LOC_PORT_NUM, 14, "local_port_num"}, + {OSMTEST_TOKEN_LINK_WID_EN, 18, "link_width_enabled"}, + {OSMTEST_TOKEN_LINK_WID_SUP, 20, "link_width_supported"}, + {OSMTEST_TOKEN_LINK_WID_ACT, 17, "link_width_active"}, + {OSMTEST_TOKEN_LINK_SPEED_SUP, 20, "link_speed_supported"}, + {OSMTEST_TOKEN_PORT_STATE, 10, "port_state"}, + {OSMTEST_TOKEN_STATE_INFO2, 10, "state_info2"}, + {OSMTEST_TOKEN_MKEY_PROT_BITS, 3, "mpb"}, + {OSMTEST_TOKEN_LMC, 3, "lmc"}, + {OSMTEST_TOKEN_LINK_SPEED, 10, "link_speed"}, + {OSMTEST_TOKEN_MTU_SMSL, 8, "mtu_smsl"}, + {OSMTEST_TOKEN_VL_CAP, 6, "vl_cap"}, + {OSMTEST_TOKEN_VL_HIGH_LIMIT, 13, "vl_high_limit"}, + {OSMTEST_TOKEN_VL_ARB_HIGH_CAP, 15, "vl_arb_high_cap"}, + {OSMTEST_TOKEN_VL_ARB_LOW_CAP, 14, "vl_arb_low_cap"}, + {OSMTEST_TOKEN_MTU_CAP, 7, "mtu_cap"}, + {OSMTEST_TOKEN_VL_STALL_LIFE, 13, "vl_stall_life"}, + {OSMTEST_TOKEN_VL_ENFORCE, 10, "vl_enforce"}, + {OSMTEST_TOKEN_MKEY_VIOL, 16, "m_key_violations"}, + {OSMTEST_TOKEN_PKEY_VIOL, 16, "p_key_violations"}, + {OSMTEST_TOKEN_QKEY_VIOL, 16, "q_key_violations"}, + {OSMTEST_TOKEN_GUID_CAP, 8, "guid_cap"}, + {OSMTEST_TOKEN_SUBN_TIMEOUT, 14, "subnet_timeout"}, + {OSMTEST_TOKEN_RESP_TIME_VAL, 15, "resp_time_value"}, + {OSMTEST_TOKEN_ERR_THRESHOLD, 15, "error_threshold"}, + {OSMTEST_TOKEN_MKEY, 4, "M_KEY"}, /* must be after the other mkey... tokens. */ + {OSMTEST_TOKEN_MTU, 3, "MTU"}, /* must be after the other mtu... tokens. */ + {OSMTEST_TOKEN_FROMLID, 8, "from_lid"}, + {OSMTEST_TOKEN_FROMPORTNUM, 13, "from_port_num"}, + {OSMTEST_TOKEN_TOPORTNUM, 11, "to_port_num"}, + {OSMTEST_TOKEN_TOLID, 6, "to_lid"}, + {OSMTEST_TOKEN_UNKNOWN, 0, ""} /* must be last entry */ +}; + +static const char ib_mad_status_str_busy[] = "IB_MAD_STATUS_BUSY"; +static const char ib_mad_status_str_redirect[] = "IB_MAD_STATUS_REDIRECT"; +static const char ib_mad_status_str_unsup_class_ver[] = + "IB_MAD_STATUS_UNSUP_CLASS_VER"; +static const char ib_mad_status_str_unsup_method[] = + "IB_MAD_STATUS_UNSUP_METHOD"; +static const char ib_mad_status_str_unsup_method_attr[] = + "IB_MAD_STATUS_UNSUP_METHOD_ATTR"; +static const char ib_mad_status_str_invalid_field[] = + "IB_MAD_STATUS_INVALID_FIELD"; +static const char ib_mad_status_str_no_resources[] = + "IB_SA_MAD_STATUS_NO_RESOURCES"; +static const char ib_mad_status_str_req_invalid[] = + "IB_SA_MAD_STATUS_REQ_INVALID"; +static const char ib_mad_status_str_no_records[] = + "IB_SA_MAD_STATUS_NO_RECORDS"; +static const char ib_mad_status_str_too_many_records[] = + "IB_SA_MAD_STATUS_TOO_MANY_RECORDS"; +static const char ib_mad_status_str_invalid_gid[] = + "IB_SA_MAD_STATUS_INVALID_GID"; +static const char ib_mad_status_str_insuf_comps[] = + "IB_SA_MAD_STATUS_INSUF_COMPS"; +static const char generic_or_str[] = " | "; + +static ib_api_status_t osmtest_create_db(IN osmtest_t * const p_osmt); + +const char *ib_get_mad_status_str(IN const ib_mad_t * const p_mad) +{ + static char line[512]; + uint32_t offset = 0; + ib_net16_t status; + boolean_t first = TRUE; + + line[offset] = '\0'; + + status = (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + + if (status == 0) { + strcat(&line[offset], "IB_SUCCESS"); + return (line); + } + + if (status & IB_MAD_STATUS_BUSY) { + strcat(&line[offset], ib_mad_status_str_busy); + offset += sizeof(ib_mad_status_str_busy); + } + if (status & IB_MAD_STATUS_REDIRECT) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_redirect); + offset += sizeof(ib_mad_status_str_redirect) - 1; + } + if ((status & IB_MAD_STATUS_INVALID_FIELD) == + IB_MAD_STATUS_UNSUP_CLASS_VER) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_unsup_class_ver); + offset += sizeof(ib_mad_status_str_unsup_class_ver) - 1; + } + if ((status & IB_MAD_STATUS_INVALID_FIELD) == + IB_MAD_STATUS_UNSUP_METHOD) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_unsup_method); + offset += sizeof(ib_mad_status_str_unsup_method) - 1; + } + if ((status & IB_MAD_STATUS_INVALID_FIELD) == + IB_MAD_STATUS_UNSUP_METHOD_ATTR) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_unsup_method_attr); + offset += sizeof(ib_mad_status_str_unsup_method_attr) - 1; + } + if ((status & IB_MAD_STATUS_INVALID_FIELD) == + IB_MAD_STATUS_INVALID_FIELD) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_invalid_field); + offset += sizeof(ib_mad_status_str_invalid_field) - 1; + } + if ((status & IB_MAD_STATUS_CLASS_MASK) == + IB_SA_MAD_STATUS_NO_RESOURCES) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_no_resources); + offset += sizeof(ib_mad_status_str_no_resources) - 1; + } + if ((status & IB_MAD_STATUS_CLASS_MASK) == IB_SA_MAD_STATUS_REQ_INVALID) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_req_invalid); + offset += sizeof(ib_mad_status_str_req_invalid) - 1; + } + if ((status & IB_MAD_STATUS_CLASS_MASK) == IB_SA_MAD_STATUS_NO_RECORDS) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_no_records); + offset += sizeof(ib_mad_status_str_no_records) - 1; + } + if ((status & IB_MAD_STATUS_CLASS_MASK) == + IB_SA_MAD_STATUS_TOO_MANY_RECORDS) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_too_many_records); + offset += sizeof(ib_mad_status_str_too_many_records) - 1; + } + if ((status & IB_MAD_STATUS_CLASS_MASK) == IB_SA_MAD_STATUS_INVALID_GID) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_invalid_gid); + offset += sizeof(ib_mad_status_str_invalid_gid) - 1; + } + if ((status & IB_MAD_STATUS_CLASS_MASK) == IB_SA_MAD_STATUS_INSUF_COMPS) { + if (!first) { + strcat(&line[offset], generic_or_str); + offset += sizeof(generic_or_str) - 1; + } + first = FALSE; + strcat(&line[offset], ib_mad_status_str_insuf_comps); + offset += sizeof(ib_mad_status_str_insuf_comps) - 1; + } + + return (line); +} + +void subnet_construct(IN subnet_t * const p_subn) +{ + cl_qmap_init(&p_subn->link_tbl); + cl_qmap_init(&p_subn->node_lid_tbl); + cl_qmap_init(&p_subn->node_guid_tbl); + cl_qmap_init(&p_subn->mgrp_mlid_tbl); + + /* NO WAY TO HAVE UNIQUE PORT BY LID OR GUID */ + /* cl_qmap_init( &p_subn->port_lid_tbl ); */ + /* cl_qmap_init( &p_subn->port_guid_tbl ); */ + + /* port key is a lid and num pair */ + cl_qmap_init(&p_subn->port_key_tbl); + cl_qmap_init(&p_subn->path_tbl); +} + +cl_status_t subnet_init(IN subnet_t * const p_subn) +{ + cl_status_t status = IB_SUCCESS; + + subnet_construct(p_subn); + + return (status); +} + +void osmtest_construct(IN osmtest_t * const p_osmt) +{ + memset(p_osmt, 0, sizeof(*p_osmt)); + osm_log_construct(&p_osmt->log); + subnet_construct(&p_osmt->exp_subn); +} + +void osmtest_destroy(IN osmtest_t * const p_osmt) +{ + cl_map_item_t *p_item, *p_next_item; + + /* Currently there is a problem with IBAL exit flow - memory overrun, + so bypass vendor deletion - it will be cleaned by the Windows OS */ +#ifndef __WIN__ + if (p_osmt->p_vendor) + osm_vendor_delete(&p_osmt->p_vendor); +#endif + + cl_qpool_destroy(&p_osmt->port_pool); + cl_qpool_destroy(&p_osmt->node_pool); + + /* destroy the qmap tables */ + p_next_item = cl_qmap_head(&p_osmt->exp_subn.link_tbl); + while (p_next_item != cl_qmap_end(&p_osmt->exp_subn.link_tbl)) { + p_item = p_next_item; + p_next_item = cl_qmap_next(p_item); + free(p_item); + } + p_next_item = cl_qmap_head(&p_osmt->exp_subn.mgrp_mlid_tbl); + while (p_next_item != cl_qmap_end(&p_osmt->exp_subn.mgrp_mlid_tbl)) { + p_item = p_next_item; + p_next_item = cl_qmap_next(p_item); + free(p_item); + } + p_next_item = cl_qmap_head(&p_osmt->exp_subn.node_guid_tbl); + while (p_next_item != cl_qmap_end(&p_osmt->exp_subn.node_guid_tbl)) { + p_item = p_next_item; + p_next_item = cl_qmap_next(p_item); + free(p_item); + } + + p_next_item = cl_qmap_head(&p_osmt->exp_subn.node_lid_tbl); + while (p_next_item != cl_qmap_end(&p_osmt->exp_subn.node_lid_tbl)) { + p_item = p_next_item; + p_next_item = cl_qmap_next(p_item); + free(p_item); + } + + p_next_item = cl_qmap_head(&p_osmt->exp_subn.path_tbl); + while (p_next_item != cl_qmap_end(&p_osmt->exp_subn.path_tbl)) { + p_item = p_next_item; + p_next_item = cl_qmap_next(p_item); + free(p_item); + } + p_next_item = cl_qmap_head(&p_osmt->exp_subn.port_key_tbl); + while (p_next_item != cl_qmap_end(&p_osmt->exp_subn.port_key_tbl)) { + p_item = p_next_item; + p_next_item = cl_qmap_next(p_item); + free(p_item); + } + + osm_log_destroy(&p_osmt->log); +} + +ib_api_status_t +osmtest_init(IN osmtest_t * const p_osmt, + IN const osmtest_opt_t * const p_opt, + IN const osm_log_level_t log_flags) +{ + ib_api_status_t status; + + /* Can't use log macros here, since we're initializing the log. */ + osmtest_construct(p_osmt); + + status = osm_log_init_v2(&p_osmt->log, p_opt->force_log_flush, + 0x0001, p_opt->log_file, 0, TRUE); + if (status != IB_SUCCESS) + return (status); + + /* but we do not want any extra stuff here */ + osm_log_set_level(&p_osmt->log, log_flags); + + OSM_LOG(&p_osmt->log, OSM_LOG_FUNCS, "[\n"); + + p_osmt->opt = *p_opt; + + status = cl_qpool_init(&p_osmt->node_pool, POOL_MIN_ITEMS, 0, + POOL_MIN_ITEMS, sizeof(node_t), NULL, NULL, + NULL); + CL_ASSERT(status == CL_SUCCESS); + + status = cl_qpool_init(&p_osmt->port_pool, POOL_MIN_ITEMS, 0, + POOL_MIN_ITEMS, sizeof(port_t), NULL, NULL, + NULL); + CL_ASSERT(status == CL_SUCCESS); + + p_osmt->p_vendor = osm_vendor_new(&p_osmt->log, + p_opt->transaction_timeout); + + if (p_osmt->p_vendor == NULL) { + status = IB_INSUFFICIENT_RESOURCES; + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0001: " + "Unable to allocate vendor object"); + status = IB_ERROR; + goto Exit; + } + + osm_mad_pool_construct(&p_osmt->mad_pool); + status = osm_mad_pool_init(&p_osmt->mad_pool); + if (status != IB_SUCCESS) + goto Exit; + +Exit: + OSM_LOG(&p_osmt->log, OSM_LOG_FUNCS, "]\n"); + return (status); +} + +void osmtest_query_res_cb(IN osmv_query_res_t * p_rec) +{ + osmtest_req_context_t *const p_ctxt = + (osmtest_req_context_t *) p_rec->query_context; + osmtest_t *const p_osmt = p_ctxt->p_osmt; + + OSM_LOG_ENTER(&p_osmt->log); + + p_ctxt->result = *p_rec; + + if (p_rec->status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0003: " + "Error on query (%s)\n", ib_get_err_str(p_rec->status)); + } + + OSM_LOG_EXIT(&p_osmt->log); +} + +ib_api_status_t +osmtest_get_all_recs(IN osmtest_t * const p_osmt, + IN ib_net16_t const attr_id, + IN size_t const attr_size, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "Getting all %s records\n", + ib_get_sa_attr_str(attr_id)); + + /* + * Do a blocking query for all records in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + + p_context->p_osmt = p_osmt; + user.attr_id = attr_id; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0004: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0064: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (p_context->result. + p_result_madw))); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_validate_sa_class_port_info(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_query_req_t req; + ib_class_port_info_t *p_cpi; + osmtest_req_context_t context; + osmtest_req_context_t *p_context = &context; + ib_sa_mad_t *p_resp_sa_madp; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Getting ClassPortInfo\n"); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + + p_context->p_osmt = p_osmt; + req.query_type = OSMV_QUERY_CLASS_PORT_INFO; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = NULL; + req.sm_key = 0; + + if (p_osmt->opt.flow != OSMT_FLOW_CREATE_INVENTORY && + p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0065: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0070: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (p_context->result. + p_result_madw))); + } + goto Exit; + } + + /* ok we got it so please print it out */ + p_resp_sa_madp = + (ib_sa_mad_t *) osm_madw_get_mad_ptr(context.result.p_result_madw); + p_cpi = + (ib_class_port_info_t *) ib_sa_mad_get_payload_ptr(p_resp_sa_madp); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "\n-----------------------------\n" + "SA Class Port Info:\n" + " base_ver:%u\n" + " class_ver:%u\n" + " cap_mask:0x%X\n" + " cap_mask2:0x%X\n" + " resp_time_val:0x%X\n" + "-----------------------------\n", + p_cpi->base_ver, p_cpi->class_ver, cl_ntoh16(p_cpi->cap_mask), + ib_class_cap_mask2(p_cpi), ib_class_resp_time_val(p_cpi)); + +Exit: +#if 0 + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } +#endif + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Get a node record by node LID + **********************************************************************/ +static ib_api_status_t +osmtest_get_node_rec_by_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_node_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting node record for LID 0x%02X\n", cl_ntoh16(lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + + p_context->p_osmt = p_osmt; + user.comp_mask = IB_NR_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_NODE_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0073: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0074: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_get_path_rec_by_half_world_query(IN osmtest_t * const p_osmt, + IN ib_net64_t sguid, + IN osmtest_req_context_t * p_context) +{ + cl_status_t status = IB_SUCCESS; + osmv_query_req_t req; + osmv_user_query_t user; + ib_path_rec_t record; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&req, 0, sizeof(req)); + memset(p_context, 0, sizeof(*p_context)); + memset(&record, 0, sizeof(record)); + memset(&user, 0, sizeof(user)); + + ib_gid_set_default(&(record.sgid), sguid); + record.num_path = 0x01; + + p_context->p_osmt = p_osmt; + user.comp_mask = (IB_PR_COMPMASK_SGID | IB_PR_COMPMASK_NUMBPATH); + user.attr_id = IB_MAD_ATTR_PATH_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0063: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = (*p_context).result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0066: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + ((*p_context).result. + p_result_madw))); + } + goto Exit; + } + +Exit: + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_get_path_rec_by_guid_pair(IN osmtest_t * const p_osmt, + IN ib_net64_t sguid, + IN ib_net64_t dguid, + IN osmtest_req_context_t * p_context) +{ + cl_status_t status = IB_SUCCESS; + osmv_query_req_t req; + osmv_guid_pair_t guid_pair; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&req, 0, sizeof(req)); + memset(p_context, 0, sizeof(*p_context)); + + p_context->p_osmt = p_osmt; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + + req.query_type = OSMV_QUERY_PATH_REC_BY_PORT_GUIDS; + + guid_pair.dest_guid = dguid; + guid_pair.src_guid = sguid; + + req.p_query_input = &guid_pair; + req.sm_key = 0; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Query for path from 0x%" PRIx64 " to 0x%" PRIx64 "\n", + cl_ntoh64(sguid), cl_ntoh64(dguid)); + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0063: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = (*p_context).result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0066: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + ((*p_context).result. + p_result_madw))); + } + goto Exit; + } + +Exit: + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_get_path_rec_by_gid_pair(IN osmtest_t * const p_osmt, + IN ib_gid_t sgid, + IN ib_gid_t dgid, + IN osmtest_req_context_t * p_context) +{ + cl_status_t status = IB_SUCCESS; + osmv_query_req_t req; + osmv_gid_pair_t gid_pair; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&req, 0, sizeof(req)); + memset(p_context, 0, sizeof(*p_context)); + + p_context->p_osmt = p_osmt; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + + req.query_type = OSMV_QUERY_PATH_REC_BY_GIDS; + + gid_pair.dest_gid = dgid; + gid_pair.src_gid = sgid; + + req.p_query_input = &gid_pair; + req.sm_key = 0; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Query for path from 0x%016" PRIx64 " 0x%016" PRIx64 + " to 0x%016" PRIx64 " 0x%016" PRIx64 "\n", + cl_ntoh64(sgid.unicast.prefix), + cl_ntoh64(sgid.unicast.interface_id), + cl_ntoh64(dgid.unicast.prefix), + cl_ntoh64(dgid.unicast.interface_id)); + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 006A: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = (*p_context).result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 006B: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + ((*p_context).result. + p_result_madw))); + } + goto Exit; + } + +Exit: + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) +static ib_api_status_t +osmtest_get_multipath_rec(IN osmtest_t * const p_osmt, + IN osmv_multipath_req_t * p_request, + IN osmtest_req_context_t * p_context) +{ + cl_status_t status = IB_SUCCESS; + osmv_query_req_t req; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + + p_context->p_osmt = p_osmt; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + + req.query_type = OSMV_QUERY_MULTIPATH_REC; + + req.p_query_input = p_request; + req.sm_key = 0; + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0068: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0069: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (p_context->result. + p_result_madw))); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} +#endif + +static ib_api_status_t +osmtest_get_port_rec(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_portinfo_record_t record; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Getting PortInfoRecord for port with LID 0x%X\n", + cl_ntoh16(lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + + p_context->p_osmt = p_osmt; + user.comp_mask = IB_PIR_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_PORTINFO_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0075: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0076: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (p_context->result. + p_result_madw))); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_get_port_rec_by_num(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN uint8_t const port_num, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_portinfo_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Getting PortInfoRecord for port with LID 0x%X Num:0x%X\n", + cl_ntoh16(lid), port_num); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + record.port_num = port_num; + user.p_attr = &record; + + p_context->p_osmt = p_osmt; + + req.query_type = OSMV_QUERY_PORT_REC_BY_LID_AND_NUM; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0077: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0078: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_stress_port_recs_large(IN osmtest_t * const p_osmt, + OUT uint32_t * const p_num_recs, + OUT uint32_t * const p_num_queries) +{ + osmtest_req_context_t context; + ib_portinfo_record_t *p_rec; + uint32_t i; + cl_status_t status; + uint32_t num_recs = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + /* + * Do a blocking query for all PortInfoRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_PORTINFO_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0006: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Populate the database with the received records. + */ + num_recs = context.result.result_cnt; + *p_num_recs += num_recs; + ++*p_num_queries; + + if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_portinfo_rec(context.result. + p_result_madw, i); + osm_dump_portinfo_record(&p_osmt->log, p_rec, + OSM_LOG_VERBOSE); + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_stress_node_recs_large(IN osmtest_t * const p_osmt, + OUT uint32_t * const p_num_recs, + OUT uint32_t * const p_num_queries) +{ + osmtest_req_context_t context; + ib_node_record_t *p_rec; + uint32_t i; + cl_status_t status; + uint32_t num_recs = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all NodeRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_NODE_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0007: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Populate the database with the received records. + */ + num_recs = context.result.result_cnt; + *p_num_recs += num_recs; + ++*p_num_queries; + + if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_node_rec(context.result. + p_result_madw, i); + osm_dump_node_record(&p_osmt->log, p_rec, + OSM_LOG_VERBOSE); + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_stress_path_recs_large(IN osmtest_t * const p_osmt, + OUT uint32_t * const p_num_recs, + OUT uint32_t * const p_num_queries) +{ + osmtest_req_context_t context; + ib_path_rec_t *p_rec; + uint32_t i; + cl_status_t status; + uint32_t num_recs = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all PathRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_PATH_RECORD, + sizeof(*p_rec), &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0008: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Populate the database with the received records. + */ + num_recs = context.result.result_cnt; + *p_num_recs += num_recs; + ++*p_num_queries; + + if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_path_rec(context.result. + p_result_madw, i); + osm_dump_path_record(&p_osmt->log, p_rec, + OSM_LOG_VERBOSE); + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_stress_path_recs_by_guid(IN osmtest_t * const p_osmt, + OUT uint32_t * const p_num_recs, + OUT uint32_t * const p_num_queries) +{ + osmtest_req_context_t context; + ib_path_rec_t *p_rec; + uint32_t i; + cl_status_t status = IB_SUCCESS; + uint32_t num_recs = 0; + node_t *p_src_node, *p_dst_node; + cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + + p_tbl = &p_osmt->exp_subn.node_guid_tbl; + + p_src_node = (node_t *) cl_qmap_head(p_tbl); + + /* + * Go over all nodes that exist in the subnet + * for each pair that are not switch nodes get the path record + */ + while (p_src_node != (node_t *) cl_qmap_end(p_tbl)) { + p_dst_node = (node_t *) cl_qmap_head(p_tbl); + + while (p_dst_node != (node_t *) cl_qmap_end(p_tbl)) { + /* + * Do a blocking query for CA to CA Path Record + */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Source : guid = 0x%" PRIx64 " type = %d" + "Target : guid = 0x%" PRIx64 " type = %d\n", + cl_ntoh64(p_src_node->rec.node_info.port_guid), + p_src_node->rec.node_info.node_type, + cl_ntoh64(p_dst_node->rec.node_info.port_guid), + p_dst_node->rec.node_info.node_type); + + if (p_src_node->rec.node_info.node_type == + IB_NODE_TYPE_CA + && p_dst_node->rec.node_info.node_type == + IB_NODE_TYPE_CA) { + status = + osmtest_get_path_rec_by_guid_pair(p_osmt, + p_src_node-> + rec. + node_info. + port_guid, + p_dst_node-> + rec. + node_info. + port_guid, + &context); + + /* In a case of TIMEOUT you still can try sending but cant count, maybe its a temporary issue */ + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0009: " + "osmtest_get_path_rec_by_guid_pair failed (%s)\n", + ib_get_err_str(status)); + if (status != IB_TIMEOUT) + goto Exit; + } else { + /* we might have received several records */ + num_recs = context.result.result_cnt; + /* + * Populate the database with the received records. + */ + *p_num_recs += num_recs; + ++*p_num_queries; + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %u records\n", num_recs); + /* Dont waste time if not VERBOSE and above */ + if (p_osmt->log.level & OSM_LOG_VERBOSE) { + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_path_rec + (context.result. + p_result_madw, i); + osm_dump_path_record + (&p_osmt->log, + p_rec, + OSM_LOG_VERBOSE); + } + } + } + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result. + p_result_madw); + context.result.p_result_madw = NULL; + } + } + /* next one please */ + p_dst_node = + (node_t *) cl_qmap_next(&p_dst_node->map_item); + } + + p_src_node = (node_t *) cl_qmap_next(&p_src_node->map_item); + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_stress_port_recs_small(IN osmtest_t * const p_osmt, + OUT uint32_t * const p_num_recs, + OUT uint32_t * const p_num_queries) +{ + osmtest_req_context_t context; + ib_portinfo_record_t *p_rec; + uint32_t i; + cl_status_t status; + uint32_t num_recs = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for our own PortInfoRecord in the subnet. + */ + status = osmtest_get_port_rec(p_osmt, + cl_ntoh16(p_osmt->local_port.lid), + &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0010: " + "osmtest_get_port_rec failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Populate the database with the received records. + */ + num_recs = context.result.result_cnt; + *p_num_recs += num_recs; + ++*p_num_queries; + + if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_portinfo_rec(context.result. + p_result_madw, i); + osm_dump_portinfo_record(&p_osmt->log, p_rec, + OSM_LOG_VERBOSE); + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +ib_api_status_t +osmtest_get_local_port_lmc(IN osmtest_t * const p_osmt, + IN ib_net16_t lid, OUT uint8_t * const p_lmc) +{ + osmtest_req_context_t context; + ib_portinfo_record_t *p_rec; + uint32_t i; + cl_status_t status; + uint32_t num_recs = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for our own PortInfoRecord in the subnet. + */ + status = osmtest_get_port_rec(p_osmt, cl_ntoh16(lid), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 001A: " + "osmtest_get_port_rec failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_portinfo_rec(context.result.p_result_madw, + i); + osm_dump_portinfo_record(&p_osmt->log, p_rec, OSM_LOG_VERBOSE); + if (p_lmc) { + *p_lmc = ib_port_info_get_lmc(&p_rec->port_info); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "LMC %d\n", *p_lmc); + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Use a wrong SM_Key in a simple port query and report success if + * failed. + **********************************************************************/ +static ib_api_status_t osmtest_wrong_sm_key_ignored(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_portinfo_record_t record; + osmtest_req_context_t context; + osmtest_req_context_t *p_context = &context; + uint8_t port_num = 1; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Trying PortInfoRecord for port with LID 0x%X Num:0x%X\n", + p_osmt->local_port.sm_lid, port_num); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = p_osmt->local_port.sm_lid; + record.port_num = port_num; + user.p_attr = &record; + + p_context->p_osmt = p_osmt; + + req.query_type = OSMV_QUERY_PORT_REC_BY_LID_AND_NUM; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 9999; + context.result.p_result_madw = NULL; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmv_query_sa(p_osmt->h_bind, &req); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + /* since we use a wrong sm_key we should get a timeout */ + if (status != IB_TIMEOUT) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0011: " + "Did not get a timeout but got (%s)\n", + ib_get_err_str(status)); + if (status == IB_SUCCESS) { + /* assign some error value to status, since IB_SUCCESS is a bad rc */ + status = IB_ERROR; + } + goto Exit; + } else { + status = IB_SUCCESS; + } + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_write_port_info(IN osmtest_t * const p_osmt, + IN FILE * fh, + IN const ib_portinfo_record_t * const p_rec) +{ + int result; + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + result = fprintf(fh, + "DEFINE_PORT\n" + "lid 0x%X\n" + "port_num 0x%X\n" + "m_key 0x%016" PRIx64 "\n" + "subnet_prefix 0x%016" PRIx64 "\n" + "base_lid 0x%X\n" + "master_sm_base_lid 0x%X\n" + "capability_mask 0x%X\n" + "diag_code 0x%X\n" + "m_key_lease_period 0x%X\n" + "local_port_num 0x%X\n" + "link_width_enabled 0x%X\n" + "link_width_supported 0x%X\n" + "link_width_active 0x%X\n" + "link_speed_supported 0x%X\n" + "port_state %s\n" + "state_info2 0x%X\n" + "mpb 0x%X\n" + "lmc 0x%X\n" + "link_speed 0x%X\n" + "mtu_smsl 0x%X\n" + "vl_cap 0x%X\n" + "vl_high_limit 0x%X\n" + "vl_arb_high_cap 0x%X\n" + "vl_arb_low_cap 0x%X\n" + "mtu_cap 0x%X\n" + "vl_stall_life 0x%X\n" + "vl_enforce 0x%X\n" + "m_key_violations 0x%X\n" + "p_key_violations 0x%X\n" + "q_key_violations 0x%X\n" + "guid_cap 0x%X\n" + "subnet_timeout 0x%X\n" + "resp_time_value 0x%X\n" + "error_threshold 0x%X\n" + "END\n\n", + cl_ntoh16(p_rec->lid), + p_rec->port_num, + cl_ntoh64(p_rec->port_info.m_key), + cl_ntoh64(p_rec->port_info.subnet_prefix), + cl_ntoh16(p_rec->port_info.base_lid), + cl_ntoh16(p_rec->port_info.master_sm_base_lid), + cl_ntoh32(p_rec->port_info.capability_mask), + cl_ntoh16(p_rec->port_info.diag_code), + cl_ntoh16(p_rec->port_info.m_key_lease_period), + p_rec->port_info.local_port_num, + p_rec->port_info.link_width_enabled, + p_rec->port_info.link_width_supported, + p_rec->port_info.link_width_active, + ib_port_info_get_link_speed_sup(&p_rec->port_info), + ib_get_port_state_str(ib_port_info_get_port_state + (&p_rec->port_info)), + p_rec->port_info.state_info2, + ib_port_info_get_mpb(&p_rec->port_info), + ib_port_info_get_lmc(&p_rec->port_info), + p_rec->port_info.link_speed, p_rec->port_info.mtu_smsl, + p_rec->port_info.vl_cap, + p_rec->port_info.vl_high_limit, + p_rec->port_info.vl_arb_high_cap, + p_rec->port_info.vl_arb_low_cap, + p_rec->port_info.mtu_cap, + p_rec->port_info.vl_stall_life, + p_rec->port_info.vl_enforce, + cl_ntoh16(p_rec->port_info.m_key_violations), + cl_ntoh16(p_rec->port_info.p_key_violations), + cl_ntoh16(p_rec->port_info.q_key_violations), + p_rec->port_info.guid_cap, + ib_port_info_get_timeout(&p_rec->port_info), + p_rec->port_info.resp_time_value, + p_rec->port_info.error_threshold); + + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0161: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_write_path_info(IN osmtest_t * const p_osmt, + IN FILE * fh, IN const ib_path_rec_t * const p_rec) +{ + int result; + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + result = fprintf(fh, + "DEFINE_PATH\n" + "dgid 0x%016" PRIx64 " 0x%016" + PRIx64 "\nsgid 0x%016" PRIx64 + " 0x%016" PRIx64 "\ndlid 0x%X\n" + "slid 0x%X\n" + "# hop_flow_raw 0x%X\n" + "# tclass 0x%X\n" + "# num_path 0x%X\n" + "pkey 0x%X\n" + "# sl 0x%X\n" + "# qos_class 0x%X\n" + "# mtu 0x%X\n" + "# rate 0x%X\n" + "# pkt_life 0x%X\n" + "# preference 0x%X\n" "END\n\n", + cl_ntoh64(p_rec->dgid.unicast.prefix), + cl_ntoh64(p_rec->dgid.unicast.interface_id), + cl_ntoh64(p_rec->sgid.unicast.prefix), + cl_ntoh64(p_rec->sgid.unicast.interface_id), + cl_ntoh16(p_rec->dlid), cl_ntoh16(p_rec->slid), + cl_ntoh32(p_rec->hop_flow_raw), p_rec->tclass, + p_rec->num_path, cl_ntoh16(p_rec->pkey), + ib_path_rec_sl(p_rec), ib_path_rec_qos_class(p_rec), + p_rec->mtu, p_rec->rate, p_rec->pkt_life, + p_rec->preference); + + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0162: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_write_node_info(IN osmtest_t * const p_osmt, + IN FILE * fh, IN const ib_node_record_t * const p_rec) +{ + int result; + cl_status_t status = IB_SUCCESS; + char desc[IB_NODE_DESCRIPTION_SIZE + 1]; + + OSM_LOG_ENTER(&p_osmt->log); + + memcpy(desc, p_rec->node_desc.description, IB_NODE_DESCRIPTION_SIZE); + desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; + + result = fprintf(fh, + "DEFINE_NODE\n" + "lid 0x%X\n" + "base_version 0x%X\n" + "class_version 0x%X\n" + "node_type 0x%X # (%s)\n" + "num_ports 0x%X\n" + "sys_guid 0x%016" PRIx64 "\n" + "node_guid 0x%016" PRIx64 "\n" + "port_guid 0x%016" PRIx64 "\n" + "partition_cap 0x%X\n" + "device_id 0x%X\n" + "revision 0x%X\n" + "# port_num 0x%X\n" + "# vendor_id 0x%X\n" + "# node_desc %s\n" + "END\n\n", + cl_ntoh16(p_rec->lid), + p_rec->node_info.base_version, + p_rec->node_info.class_version, + p_rec->node_info.node_type, + ib_get_node_type_str(p_rec->node_info.node_type), + p_rec->node_info.num_ports, + cl_ntoh64(p_rec->node_info.sys_guid), + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh64(p_rec->node_info.port_guid), + cl_ntoh16(p_rec->node_info.partition_cap), + cl_ntoh16(p_rec->node_info.device_id), + cl_ntoh32(p_rec->node_info.revision), + ib_node_info_get_local_port_num(&p_rec->node_info), + cl_ntoh32(ib_node_info_get_vendor_id + (&p_rec->node_info)), desc); + + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0163: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_write_link(IN osmtest_t * const p_osmt, + IN FILE * fh, IN const ib_link_record_t * const p_rec) +{ + int result; + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + result = fprintf(fh, + "DEFINE_LINK\n" + "from_lid 0x%X\n" + "from_port_num 0x%X\n" + "to_port_num 0x%X\n" + "to_lid 0x%X\n" + "END\n\n", + cl_ntoh16(p_rec->from_lid), + p_rec->from_port_num, + p_rec->to_port_num, cl_ntoh16(p_rec->to_lid)); + + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0164: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_write_all_link_recs(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + osmtest_req_context_t context; + const ib_link_record_t *p_rec; + uint32_t i; + cl_status_t status; + size_t num_recs; + int result; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all NodeRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_LINK_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0165: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Write the received records out to the file. + */ + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %zu records\n", num_recs); + + result = fprintf(fh, "#\n" "# Link Records\n" "#\n"); + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0166: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + + for (i = 0; i < num_recs; i++) { + p_rec = + (ib_link_record_t *) osmv_get_query_result(context.result. + p_result_madw, + i); + + osmtest_write_link(p_osmt, fh, p_rec); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_get_path_rec_by_lid_pair(IN osmtest_t * const p_osmt, + IN ib_net16_t slid, + IN ib_net16_t dlid, + IN osmtest_req_context_t * p_context) +{ + cl_status_t status = IB_SUCCESS; + osmv_query_req_t req; + osmv_lid_pair_t lid_pair; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&req, 0, sizeof(req)); + memset(p_context, 0, sizeof(*p_context)); + + p_context->p_osmt = p_osmt; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + + req.query_type = OSMV_QUERY_PATH_REC_BY_LIDS; + + lid_pair.dest_lid = dlid; + lid_pair.src_lid = slid; + + req.p_query_input = &lid_pair; + req.sm_key = 0; + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Query for path from 0x%X to 0x%X\n", + cl_ntoh16(slid), cl_ntoh16(dlid)); + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0053: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = (*p_context).result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0067: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + ((*p_context).result. + p_result_madw))); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +#ifdef VENDOR_RMPP_SUPPORT +/********************************************************************** + * ASSUMES RMPP + **********************************************************************/ +static ib_api_status_t +osmtest_write_all_node_recs(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + osmtest_req_context_t context; + const ib_node_record_t *p_rec; + uint32_t i; + cl_status_t status; + size_t num_recs; + int result; + node_t *p_guid_node; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all NodeRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_NODE_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0022: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Write the received records out to the file. + */ + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %zu records\n", num_recs); + + result = fprintf(fh, "#\n" "# Node Records\n" "#\n"); + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0023: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_node_rec(context.result.p_result_madw, i); + osmtest_write_node_info(p_osmt, fh, p_rec); + + /* create a subnet object */ + p_guid_node = node_new(); + CL_ASSERT(p_guid_node != NULL); + + /* copy the info to the subnet node object */ + p_guid_node->rec = *p_rec; + cl_qmap_insert(&p_osmt->exp_subn.node_guid_tbl, + p_guid_node->rec.node_info.port_guid, + &p_guid_node->map_item); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * ASSUMES RMPP + **********************************************************************/ +static ib_api_status_t +osmtest_write_all_port_recs(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + osmtest_req_context_t context; + const ib_portinfo_record_t *p_rec; + uint32_t i; + cl_status_t status; + size_t num_recs; + int result; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all NodeRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_PORTINFO_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0167: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Write the received records out to the file. + */ + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %zu records\n", num_recs); + + result = fprintf(fh, "#\n" "# PortInfo Records\n" "#\n"); + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0024: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_portinfo_rec(context.result.p_result_madw, + i); + osmtest_write_port_info(p_osmt, fh, p_rec); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * ASSUMES RMPP + **********************************************************************/ +static ib_api_status_t +osmtest_write_all_path_recs(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + osmtest_req_context_t context; + const ib_path_rec_t *p_rec; + uint32_t i; + cl_status_t status = CL_SUCCESS; + size_t num_recs; + int result; + node_t *p_dst_node; + cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + if (p_osmt->opt.full_world_path_recs) + goto full_world; + + result = fprintf(fh, "#\n" "# Path Records\n" "#\n"); + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0026: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + + p_tbl = &p_osmt->exp_subn.node_guid_tbl; + p_dst_node = (node_t *) cl_qmap_head(p_tbl); + + while (p_dst_node != (node_t *) cl_qmap_end(p_tbl)) { + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG,"Source : lid = 0x%d type = %d\n", + cl_ntoh16(p_dst_node->rec.lid), + p_dst_node->rec.node_info.node_type); + + status = osmtest_get_path_rec_by_half_world_query(p_osmt, p_dst_node->rec.node_info.port_guid, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0025: " + "osmtest_get_all_path_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + /* + * Write the received records out to the file. + */ + num_recs = context.result.result_cnt; + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %zu records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_path_rec(context.result.p_result_madw, i); + osmtest_write_path_info(p_osmt, fh, p_rec); + } + p_dst_node = (node_t *) cl_qmap_next(&p_dst_node->map_item); + } + goto Exit; + +full_world: + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all PathRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_PATH_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0002: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + /* + * Write the received records out to the file. + */ + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %zu records\n", num_recs); + + result = fprintf(fh, "#\n" "# Path Records\n" "#\n"); + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0005: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_path_rec(context.result.p_result_madw, i); + osmtest_write_path_info(p_osmt, fh, p_rec); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +#else +/* + * NON RMPP BASED QUERY FOR ALL NODES: BASED ON THE MAX LID GIVEN BY THE USER + */ +static ib_api_status_t +osmtest_write_all_node_recs(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + osmtest_req_context_t context; + node_t *p_node; + node_t *p_guid_node; + const ib_node_record_t *p_rec; + cl_status_t status = CL_SUCCESS; + int result; + uint16_t lid; + + OSM_LOG_ENTER(&p_osmt->log); + + result = fprintf(fh, "#\n" "# Node Records\n" "#\n"); + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0027: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + + /* + * Go over all LIDs in the range 1 to max_lid and do a + * NodeRecord query by that lid. + */ + for (lid = 1; lid <= p_osmt->max_lid; lid++) { + /* prepare the query context */ + memset(&context, 0, sizeof(context)); + + status = + osmtest_get_node_rec_by_lid(p_osmt, cl_ntoh16(lid), + &context); + if (status != IB_SUCCESS) { + if (status != IB_SA_MAD_STATUS_NO_RECORDS) { + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "ERR 0028: " + "failed to get node info for LID:0x%02X (%s)\n", + cl_ntoh16(lid), ib_get_err_str(status)); + goto Exit; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "WRN 0121: " + "failed to get node info for LID:0x%02X (%s)\n", + cl_ntoh16(lid), ib_get_err_str(status)); + status = IB_SUCCESS; + } + } else { + /* OK we got something */ + p_rec = + osmv_get_query_node_rec(context.result. + p_result_madw, 0); + osmtest_write_node_info(p_osmt, fh, p_rec); + + /* create a subnet object */ + p_node = node_new(); + CL_ASSERT(p_node != NULL); + + /* copy the info to the subnet node object */ + p_node->rec = *p_rec; + + cl_qmap_insert(&p_osmt->exp_subn.node_lid_tbl, + p_node->rec.lid, &p_node->map_item); + + p_guid_node = node_new(); + CL_ASSERT(p_guid_node != NULL); + + *p_guid_node = *p_node; + + cl_qmap_insert(&p_osmt->exp_subn.node_guid_tbl, + p_guid_node->rec.node_info.node_guid, + &p_guid_node->map_item); + + } + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + } + +Exit: + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/* + * GET ALL PORT RECORDS IN THE FABRIC - + * one by one by using the node info received + */ +static ib_api_status_t +osmtest_write_all_port_recs(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + osmtest_req_context_t context; + const ib_node_record_t *p_node_rec; + const ib_portinfo_record_t *p_rec; + uint8_t port_num; + cl_status_t status = CL_SUCCESS; + cl_qmap_t *p_tbl; + node_t *p_node; + port_t *p_port; + int result; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* print header */ + result = fprintf(fh, "#\n" "# PortInfo Records\n" "#\n"); + if (result < 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0029: " + "Write failed\n"); + status = IB_ERROR; + goto Exit; + } + + /* use the pre-explored set of nodes */ + p_tbl = &p_osmt->exp_subn.node_lid_tbl; + p_node = (node_t *) cl_qmap_head(p_tbl); + + /* + * Go over all LIDs in the range 1 to max_lid and do a + * NodeRecord query by that lid. + */ + while (p_node != (node_t *) cl_qmap_end(p_tbl)) { + + p_node_rec = &(p_node->rec); + + /* go through all ports of the node: */ + for (port_num = 0; port_num <= p_node_rec->node_info.num_ports; + port_num++) { + /* prepare the query context */ + memset(&context, 0, sizeof(context)); + + status = osmtest_get_port_rec_by_num(p_osmt, + p_node_rec->lid, + port_num, + &context); + if (status != IB_SUCCESS) { + if (status != IB_SA_MAD_STATUS_NO_RECORDS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "WRN 0122: " + "Error encountered getting port info for LID:0x%04X Num:0x%02X (%s)\n", + p_node_rec->lid, port_num, + ib_get_err_str(status)); + goto Exit; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "WRN 0123: " + "failed to get port info for LID:0x%04X Num:0x%02X (%s)\n", + p_node_rec->lid, port_num, + ib_get_err_str(status)); + status = IB_SUCCESS; + } + } else { + /* OK we got something */ + p_rec = + osmv_get_query_portinfo_rec(context.result. + p_result_madw, + 0); + osmtest_write_port_info(p_osmt, fh, p_rec); + + /* create a subnet object */ + p_port = port_new(); + CL_ASSERT(p_port != NULL); + + /* copy the info to the subnet node object */ + p_port->rec = *p_rec; + + cl_qmap_insert(&p_osmt->exp_subn.port_key_tbl, + port_gen_id(p_node_rec->lid, + port_num), + &p_port->map_item); + } + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + } + p_node = (node_t *) cl_qmap_next(&p_node->map_item); + } + + /* we must set the exist status to avoid abort of the over all algorith */ + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * ASSUMES NO RMPP + **********************************************************************/ +static ib_api_status_t +osmtest_write_all_path_recs(IN osmtest_t * const p_osmt, IN FILE * fh) +{ + osmtest_req_context_t context; + const ib_path_rec_t *p_rec; + cl_status_t status = CL_SUCCESS; + int num_recs, i; + cl_qmap_t *p_tbl; + node_t *p_src_node, *p_dst_node; + ib_api_status_t got_status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Go over all nodes that exist in the subnet + * for each pair that are not switch nodes get the path record + */ + + context.p_osmt = p_osmt; + + p_tbl = &p_osmt->exp_subn.node_lid_tbl; + + p_src_node = (node_t *) cl_qmap_head(p_tbl); + + while (p_src_node != (node_t *) cl_qmap_end(p_tbl)) { + /* HACK we use capability_mask to know diff a CA node from switch node */ + /* if(p_src_node->rec.node_info.capability_mask ) { */ + p_dst_node = (node_t *) cl_qmap_head(p_tbl); + + while (p_dst_node != (node_t *) cl_qmap_end(p_tbl)) { + /* HACK we use capability_mask to know diff a CA node from switch node */ + /* if (p_dst_node->rec.node_info.capability_mask) { */ + + /* query for it: */ + status = osmtest_get_path_rec_by_lid_pair(p_osmt, + p_src_node-> + rec.lid, + p_dst_node-> + rec.lid, + &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 012D: " + "failed to get path info from LID:0x%X To LID:0x%X (%s)\n", + p_src_node->rec.lid, + p_dst_node->rec.lid, + ib_get_err_str(status)); + /* remember the first error status */ + got_status = + (got_status == + IB_SUCCESS) ? status : got_status; + } else { + /* we might have received several records */ + num_recs = context.result.result_cnt; + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_path_rec(context. + result. + p_result_madw, + i); + osmtest_write_path_info(p_osmt, fh, + p_rec); + } + } +/* } */ + + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + /* next one please */ + p_dst_node = + (node_t *) cl_qmap_next(&p_dst_node->map_item); + } +/* } */ + + p_src_node = (node_t *) cl_qmap_next(&p_src_node->map_item); + } + + if (got_status != IB_SUCCESS) + status = got_status; + + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +#endif + +static ib_api_status_t +osmtest_create_inventory_file(IN osmtest_t * const p_osmt) +{ + FILE *fh; + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + fh = fopen(p_osmt->opt.file_name, "w"); + if (fh == NULL) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0079: " + "Unable to open inventory file (%s)\n", + p_osmt->opt.file_name); + status = IB_ERROR; + goto Exit; + } + + /* HACK: the order is important: nodes ports paths */ + status = osmtest_write_all_node_recs(p_osmt, fh); + if (status != IB_SUCCESS) + goto CloseFile; + + status = osmtest_write_all_port_recs(p_osmt, fh); + if (status != IB_SUCCESS) + goto CloseFile; + + if (!p_osmt->opt.ignore_path_records) { + status = osmtest_write_all_path_recs(p_osmt, fh); + if (status != IB_SUCCESS) + goto CloseFile; + } + + status = osmtest_write_all_link_recs(p_osmt, fh); + if (status != IB_SUCCESS) + goto CloseFile; + + +CloseFile: + fclose(fh); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_stress_large_rmpp_pr(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + uint64_t num_recs = 0; + uint64_t num_queries = 0; + uint32_t delta_recs; + uint32_t delta_queries; + uint32_t print_freq = 0; + struct timeval start_tv, end_tv; + long sec_diff, usec_diff; + float ratio; + + OSM_LOG_ENTER(&p_osmt->log); + gettimeofday(&start_tv, NULL); + printf("-I- Start time is : %09ld:%06ld [sec:usec]\n", start_tv.tv_sec, + (long)start_tv.tv_usec); + + while (num_queries < STRESS_LARGE_PR_RMPP_THR) { + delta_recs = 0; + delta_queries = 0; + + status = osmtest_stress_path_recs_by_guid(p_osmt, &delta_recs, + &delta_queries); + if (status != IB_SUCCESS) + goto Exit; + + num_recs += delta_recs; + num_queries += delta_queries; + + print_freq += delta_recs; + if (print_freq > 10000) { + gettimeofday(&end_tv, NULL); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = + 1000000 - (start_tv.tv_usec - + end_tv.tv_usec); + } + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + printf("-I- Querying %" PRId64 + " Path Record queries CA to CA (rmpp)\n\ttook %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + if (num_recs == 0) + ratio = 0; + else + ratio = ((float)num_queries / (float)num_recs); + printf("-I- Queries to Record Ratio is %" PRIu64 + " records, %" PRIu64 " queries : %.2f \n", + num_recs, num_queries, ratio); + print_freq = 0; + } + } + +Exit: + gettimeofday(&end_tv, NULL); + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = 1000000 - (start_tv.tv_usec - end_tv.tv_usec); + } + + printf("-I- Querying %" PRId64 + " Path Record queries (rmpp) took %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_stress_large_rmpp(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + uint64_t num_recs = 0; + uint64_t num_queries = 0; + uint32_t delta_recs; + uint32_t delta_queries; + uint32_t print_freq = 0; + struct timeval start_tv, end_tv; + long sec_diff, usec_diff; + + OSM_LOG_ENTER(&p_osmt->log); + gettimeofday(&start_tv, NULL); + printf("-I- Start time is : %09ld:%06ld [sec:usec]\n", start_tv.tv_sec, + (long)start_tv.tv_usec); + + while (num_queries < STRESS_LARGE_RMPP_THR) { + delta_recs = 0; + delta_queries = 0; + + status = osmtest_stress_node_recs_large(p_osmt, &delta_recs, + &delta_queries); + if (status != IB_SUCCESS) + goto Exit; + + status = osmtest_stress_path_recs_large(p_osmt, &delta_recs, + &delta_queries); + if (status != IB_SUCCESS) + goto Exit; + + status = osmtest_stress_port_recs_large(p_osmt, &delta_recs, + &delta_queries); + if (status != IB_SUCCESS) + goto Exit; + + num_recs += delta_recs; + num_queries += delta_queries; + + print_freq += delta_recs; + + if (print_freq > 100000) { + gettimeofday(&end_tv, NULL); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = + 1000000 - (start_tv.tv_usec - + end_tv.tv_usec); + } + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + printf("-I- Querying %" PRId64 + " large mixed queries (rmpp) took %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + printf("%" PRIu64 " records, %" PRIu64 " queries\n", + num_recs, num_queries); + print_freq = 0; + } + } + +Exit: + gettimeofday(&end_tv, NULL); + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = 1000000 - (start_tv.tv_usec - end_tv.tv_usec); + } + + printf("-I- Querying %" PRId64 + " large mixed queries (rmpp) took %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_stress_small_rmpp(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + uint64_t num_recs = 0; + uint64_t num_queries = 0; + uint32_t delta_recs; + uint32_t delta_queries; + uint32_t print_freq = 0; + int num_timeouts = 0; + struct timeval start_tv, end_tv; + long sec_diff, usec_diff; + + OSM_LOG_ENTER(&p_osmt->log); + gettimeofday(&start_tv, NULL); + printf("-I- Start time is : %09ld:%06ld [sec:usec]\n", + start_tv.tv_sec, (long)start_tv.tv_usec); + + while ((num_queries < STRESS_SMALL_RMPP_THR) && (num_timeouts < 100)) { + delta_recs = 0; + delta_queries = 0; + + status = osmtest_stress_port_recs_small(p_osmt, &delta_recs, + &delta_queries); + if (status == IB_TIMEOUT) { + num_timeouts++; + continue; + } + else if (status != IB_SUCCESS) + goto Exit; + + num_recs += delta_recs; + num_queries += delta_queries; + + print_freq += delta_recs; + if (print_freq > 5000) { + gettimeofday(&end_tv, NULL); + printf("%" PRIu64 " records, %" PRIu64 " queries\n", + num_recs, num_queries); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = + 1000000 - (start_tv.tv_usec - + end_tv.tv_usec); + } + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + printf("-I- Querying %" PRId64 + " port_info queries (single mad) took %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + print_freq = 0; + } + } + +Exit: + gettimeofday(&end_tv, NULL); + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = 1000000 - (start_tv.tv_usec - end_tv.tv_usec); + } + + printf("-I- Querying %" PRId64 + " port_info queries (single mad) took %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + if (num_timeouts > 50) { + status = IB_TIMEOUT; + } + /* Exit: */ + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_stress_path_recs_by_lid(IN osmtest_t * const p_osmt, + OUT uint32_t * const p_num_recs, + OUT uint32_t * const p_num_queries) +{ + osmtest_req_context_t context; + ib_path_rec_t *p_rec; + cl_status_t status; + ib_net16_t dlid, slid; + int num_recs, i; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + slid = cl_ntoh16(p_osmt->local_port.lid); + dlid = cl_ntoh16(p_osmt->local_port.sm_lid); + + /* + * Do a blocking query for the PathRecord. + */ + status = osmtest_get_path_rec_by_lid_pair(p_osmt, slid, dlid, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 000A: " + "osmtest_get_path_rec_by_lid_pair failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Populate the database with the received records. + */ + num_recs = context.result.result_cnt; + *p_num_recs += num_recs; + ++*p_num_queries; + + if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %u records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = osmv_get_query_path_rec(context.result.p_result_madw, 0); + osm_dump_path_record(&p_osmt->log, p_rec, OSM_LOG_VERBOSE); + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_stress_get_pr(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + uint64_t num_recs = 0; + uint64_t num_queries = 0; + uint32_t delta_recs; + uint32_t delta_queries; + uint32_t print_freq = 0; + int num_timeouts = 0; + struct timeval start_tv, end_tv; + long sec_diff, usec_diff; + + OSM_LOG_ENTER(&p_osmt->log); + gettimeofday(&start_tv, NULL); + printf("-I- Start time is : %09ld:%06ld [sec:usec]\n", + start_tv.tv_sec, (long)start_tv.tv_usec); + + while ((num_queries < STRESS_GET_PR) && (num_timeouts < 100)) { + delta_recs = 0; + delta_queries = 0; + + status = osmtest_stress_path_recs_by_lid(p_osmt, + &delta_recs, + &delta_queries); + if (status == IB_TIMEOUT) { + num_timeouts++; + continue; + } + else if (status != IB_SUCCESS) + goto Exit; + + num_recs += delta_recs; + num_queries += delta_queries; + + print_freq += delta_recs; + if (print_freq > 5000) { + gettimeofday(&end_tv, NULL); + printf("%" PRIu64 " records, %" PRIu64 " queries\n", + num_recs, num_queries); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = + 1000000 - (start_tv.tv_usec - + end_tv.tv_usec); + } + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + printf("-I- Querying %" PRId64 + " path_rec queries took %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + print_freq = 0; + } + } + +Exit: + gettimeofday(&end_tv, NULL); + printf("-I- End time is : %09ld:%06ld [sec:usec]\n", + end_tv.tv_sec, (long)end_tv.tv_usec); + if (end_tv.tv_usec > start_tv.tv_usec) { + sec_diff = end_tv.tv_sec - start_tv.tv_sec; + usec_diff = end_tv.tv_usec - start_tv.tv_usec; + } else { + sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1; + usec_diff = 1000000 - (start_tv.tv_usec - end_tv.tv_usec); + } + + printf("-I- Querying %" PRId64 + " path_rec queries took %04ld:%06ld [sec:usec]\n", + num_queries, sec_diff, usec_diff); + if (num_timeouts > 50) { + status = IB_TIMEOUT; + } + /* Exit: */ + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static void +osmtest_prepare_db_generic(IN osmtest_t * const p_osmt, + IN cl_qmap_t * const p_tbl) +{ + generic_t *p_generic; + + OSM_LOG_ENTER(&p_osmt->log); + + p_generic = (generic_t *) cl_qmap_head(p_tbl); + + while (p_generic != (generic_t *) cl_qmap_end(p_tbl)) { + p_generic->count = 0; + p_generic = (generic_t *) cl_qmap_next(&p_generic->map_item); + } + + OSM_LOG_EXIT(&p_osmt->log); +} + +static void osmtest_prepare_db(IN osmtest_t * const p_osmt) +{ + OSM_LOG_ENTER(&p_osmt->log); + + osmtest_prepare_db_generic(p_osmt, &p_osmt->exp_subn.node_lid_tbl); + osmtest_prepare_db_generic(p_osmt, &p_osmt->exp_subn.path_tbl); + + OSM_LOG_EXIT(&p_osmt->log); +} + +static ib_api_status_t osmtest_check_missing_nodes(IN osmtest_t * const p_osmt) +{ + const node_t *p_node; + cl_status_t status = IB_SUCCESS; + cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + p_tbl = &p_osmt->exp_subn.node_lid_tbl; + + p_node = (node_t *) cl_qmap_head(p_tbl); + + while (p_node != (node_t *) cl_qmap_end(p_tbl)) { + if (p_node->count == 0) { + /* + * This node was not reported by the SA + */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0080: " + "Missing node 0x%016" PRIx64 "\n", + cl_ntoh64(p_node->rec.node_info.node_guid)); + status = IB_ERROR; + } + + p_node = (node_t *) cl_qmap_next(&p_node->map_item); + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_check_missing_ports(IN osmtest_t * const p_osmt) +{ + const port_t *p_port; + cl_status_t status = IB_SUCCESS; + cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + p_tbl = &p_osmt->exp_subn.port_key_tbl; + + p_port = (port_t *) cl_qmap_head(p_tbl); + + while (p_port != (port_t *) cl_qmap_end(p_tbl)) { + if (p_port->count == 0) { + /* + * This port was not reported by the SA + */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0081: " + "Missing port LID:0x%X Num:0x%X\n", + cl_ntoh16(p_port->rec.lid), + p_port->rec.port_num); + status = IB_ERROR; + } + + p_port = (port_t *) cl_qmap_next(&p_port->map_item); + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_check_missing_paths(IN osmtest_t * const p_osmt) +{ + const path_t *p_path; + cl_status_t status = IB_SUCCESS; + cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + p_tbl = &p_osmt->exp_subn.path_tbl; + + p_path = (path_t *) cl_qmap_head(p_tbl); + + while (p_path != (path_t *) cl_qmap_end(p_tbl)) { + if (p_path->count == 0) { + /* + * This path was not reported by the SA + */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0051: " + "SA did not return path SLID 0x%X to DLID 0x%X\n", + cl_ntoh16(p_path->rec.slid), + cl_ntoh16(p_path->rec.dlid)); + status = IB_ERROR; + goto Exit; + } + + p_path = (path_t *) cl_qmap_next(&p_path->map_item); + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static inline uint32_t +osmtest_path_rec_key_get(IN const ib_path_rec_t * const p_rec) +{ + return (p_rec->dlid << 16 | p_rec->slid); +} + +static boolean_t +osmtest_path_rec_kay_is_valid(IN osmtest_t * const p_osmt, + IN const path_t * const p_path) +{ + if ((p_path->comp.dlid == 0) || (p_path->comp.slid == 0)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0168: " + "SLID and DLID must be specified for defined paths\n"); + return (FALSE); + } + + return (TRUE); +} + +static ib_api_status_t +osmtest_validate_path_data(IN osmtest_t * const p_osmt, + IN path_t * const p_path, + IN const ib_path_rec_t * const p_rec) +{ + cl_status_t status = IB_SUCCESS; + uint8_t lmc = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Checking path SLID 0x%X to DLID 0x%X\n", + cl_ntoh16(p_rec->slid), cl_ntoh16(p_rec->dlid)); + + status = + osmtest_get_local_port_lmc(p_osmt, p_osmt->local_port.lid, &lmc); + if (status != IB_SUCCESS) + goto Exit; + + /* HACK: Assume uniform LMC across endports in the subnet */ + /* This is the only LMC mode which OpenSM currently supports */ + /* In absence of this assumption, validation of this is much more complicated */ + if (lmc == 0) { + /* + * Has this record already been returned? + */ + if (p_path->count != 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0056: " + "Already received path SLID 0x%X to DLID 0x%X\n", + cl_ntoh16(p_rec->slid), cl_ntoh16(p_rec->dlid)); + status = IB_ERROR; + goto Exit; + } + } else { + /* Also, this doesn't detect fewer than the correct number of paths being returned */ + if (p_path->count >= (uint32_t) (1 << (2 * lmc))) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0052: " + "Already received path SLID 0x%X to DLID 0x%X count %d LMC %d\n", + cl_ntoh16(p_rec->slid), cl_ntoh16(p_rec->dlid), + p_path->count, lmc); + status = IB_ERROR; + goto Exit; + } + } + + ++p_path->count; + + /* + * Check the fields the user wants checked. + */ + if ((p_path->comp.dgid.unicast.interface_id & + p_path->rec.dgid.unicast.interface_id) != + (p_path->comp.dgid.unicast.interface_id & + p_rec->dgid.unicast.interface_id)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0169: " + "DGID mismatch on path SLID 0x%X to DLID 0x%X\n" + "\t\t\t\tExpected 0x%016" PRIx64 " 0x%016" PRIx64 "\n" + "\t\t\t\tReceived 0x%016" PRIx64 " 0x%016" PRIx64 "\n", + cl_ntoh16(p_path->rec.slid), + cl_ntoh16(p_path->rec.dlid), + cl_ntoh64(p_path->rec.dgid.unicast.prefix), + cl_ntoh64(p_path->rec.dgid.unicast.interface_id), + cl_ntoh64(p_rec->dgid.unicast.prefix), + cl_ntoh64(p_rec->dgid.unicast.interface_id)); + status = IB_ERROR; + goto Exit; + } + + /* + * Check the fields the user wants checked. + */ + if ((p_path->comp.sgid.unicast.interface_id & + p_path->rec.sgid.unicast.interface_id) != + (p_path->comp.sgid.unicast.interface_id & + p_rec->sgid.unicast.interface_id)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0057: " + "SGID mismatch on path SLID 0x%X to DLID 0x%X\n" + "\t\t\t\tExpected 0x%016" PRIx64 " 0x%016" PRIx64 ",\n" + "\t\t\t\tReceived 0x%016" PRIx64 " 0x%016" PRIx64 ".\n", + cl_ntoh16(p_path->rec.slid), + cl_ntoh16(p_path->rec.dlid), + cl_ntoh64(p_path->rec.sgid.unicast.prefix), + cl_ntoh64(p_path->rec.sgid.unicast.interface_id), + cl_ntoh64(p_rec->sgid.unicast.prefix), + cl_ntoh64(p_rec->sgid.unicast.interface_id)); + status = IB_ERROR; + goto Exit; + } + + /* + * Compare the fields the user wishes to validate. + */ + if ((p_path->comp.pkey & p_path->rec.pkey) != + (p_path->comp.pkey & p_rec->pkey)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0012: " + "PKEY mismatch on path SLID 0x%X to DLID 0x%X\n" + "\t\t\t\tExpected 0x%X, received 0x%X\n", + cl_ntoh16(p_path->rec.slid), + cl_ntoh16(p_path->rec.dlid), + cl_ntoh16(p_path->rec.pkey), cl_ntoh16(p_rec->pkey)); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_node_data(IN osmtest_t * const p_osmt, + IN node_t * const p_node, + IN const ib_node_record_t * const p_rec) +{ + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Checking node 0x%016" PRIx64 ", LID 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), cl_ntoh16(p_rec->lid)); + + /* + * Has this record already been returned? + */ + if (p_node->count != 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0013: " + "Already received node 0x%016" PRIx64 "\n", + cl_ntoh64(p_node->rec.node_info.node_guid)); + status = IB_ERROR; + goto Exit; + } + + ++p_node->count; + + /* + * Compare the fields the user wishes to validate. + */ + if ((p_node->comp.lid & p_node->rec.lid) != + (p_node->comp.lid & p_rec->lid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0014: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected LID 0x%X, received 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), p_node->rec.lid, p_rec->lid); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.base_version & + p_node->rec.node_info.base_version) != + (p_node->comp.node_info.base_version & + p_rec->node_info.base_version)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0015: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected base_version 0x%X, received 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + p_node->rec.node_info.base_version, + p_rec->node_info.base_version); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.class_version & + p_node->rec.node_info.class_version) != + (p_node->comp.node_info.class_version & + p_rec->node_info.class_version)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0016: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected class_version 0x%X, received 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + p_node->rec.node_info.class_version, + p_rec->node_info.class_version); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.node_type & + p_node->rec.node_info.node_type) != + (p_node->comp.node_info.node_type & p_rec->node_info.node_type)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0017: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected node_type 0x%X, received 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + p_node->rec.node_info.node_type, + p_rec->node_info.node_type); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.sys_guid & + p_node->rec.node_info.sys_guid) != + (p_node->comp.node_info.sys_guid & p_rec->node_info.sys_guid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0018: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected sys_guid 0x%016" PRIx64 + ", received 0x%016" PRIx64 "\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + cl_ntoh64(p_node->rec.node_info.sys_guid), + cl_ntoh64(p_rec->node_info.sys_guid)); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.node_guid & + p_node->rec.node_info.node_guid) != + (p_node->comp.node_info.node_guid & p_rec->node_info.node_guid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0019: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected node_guid 0x%016" PRIx64 + ", received 0x%016" PRIx64 "\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + cl_ntoh64(p_node->rec.node_info.node_guid), + cl_ntoh64(p_rec->node_info.node_guid)); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.port_guid & + p_node->rec.node_info.port_guid) != + (p_node->comp.node_info.port_guid & p_rec->node_info.port_guid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0031: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected port_guid 0x%016" PRIx64 + ", received 0x%016" PRIx64 "\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + cl_ntoh64(p_node->rec.node_info.port_guid), + cl_ntoh64(p_rec->node_info.port_guid)); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.partition_cap & + p_node->rec.node_info.partition_cap) != + (p_node->comp.node_info.partition_cap & + p_rec->node_info.partition_cap)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0032: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected partition_cap 0x%X, received 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + cl_ntoh16(p_node->rec.node_info.partition_cap), + cl_ntoh16(p_rec->node_info.partition_cap)); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.device_id & + p_node->rec.node_info.device_id) != + (p_node->comp.node_info.device_id & p_rec->node_info.device_id)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0033: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected device_id 0x%X, received 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + cl_ntoh16(p_node->rec.node_info.device_id), + cl_ntoh16(p_rec->node_info.device_id)); + status = IB_ERROR; + goto Exit; + } + + if ((p_node->comp.node_info.revision & + p_node->rec.node_info.revision) != + (p_node->comp.node_info.revision & p_rec->node_info.revision)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0034: " + "Field mismatch node 0x%016" PRIx64 ", LID 0x%X\n" + "\t\t\t\tExpected revision 0x%X, received 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid), + cl_ntoh32(p_node->rec.node_info.revision), + cl_ntoh32(p_rec->node_info.revision)); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_node_rec(IN osmtest_t * const p_osmt, + IN const ib_node_record_t * const p_rec) +{ + cl_status_t status = IB_SUCCESS; + node_t *p_node; + const cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Find proper node record in the database. + */ + p_tbl = &p_osmt->exp_subn.node_lid_tbl; + p_node = (node_t *) cl_qmap_get(p_tbl, p_rec->lid); + if (p_node == (node_t *) cl_qmap_end(p_tbl)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0035: " + "Unexpected node 0x%016" PRIx64 ", LID 0x%X\n", + cl_ntoh64(p_rec->node_info.node_guid), + cl_ntoh16(p_rec->lid)); + status = IB_ERROR; + goto Exit; + } + + status = osmtest_validate_node_data(p_osmt, p_node, p_rec); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_port_data(IN osmtest_t * const p_osmt, + IN port_t * const p_port, + IN const ib_portinfo_record_t * const p_rec) +{ + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Checking port LID 0x%X, Num 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num); + + /* + * Has this record already been returned? + */ + if (p_port->count != 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0036: " + "Already received port LID 0x%X, Num 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num); + status = IB_ERROR; + goto Exit; + } + + ++p_port->count; + + /* + * Compare the fields the user wishes to validate. + */ + if ((p_port->comp.lid & p_port->rec.lid) != + (p_port->comp.lid & p_rec->lid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0037: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected LID 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.lid, p_rec->lid); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_num & p_port->rec.port_num) != + (p_port->comp.port_num & p_rec->port_num)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0038: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected port_num 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_num, p_rec->port_num); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.m_key & p_port->rec.port_info.m_key) != + (p_port->comp.port_info.m_key & p_rec->port_info.m_key)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0039: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected m_key 0x%016" PRIx64 + ", received 0x%016" PRIx64 "\n", cl_ntoh16(p_rec->lid), + p_rec->port_num, p_port->rec.port_info.m_key, + p_rec->port_info.m_key); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.subnet_prefix & p_port->rec.port_info. + subnet_prefix) != + (p_port->comp.port_info.subnet_prefix & p_rec->port_info. + subnet_prefix)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0040: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected subnet_prefix 0x%016" PRIx64 + ", received 0x%016" PRIx64 "\n", cl_ntoh16(p_rec->lid), + p_rec->port_num, p_port->rec.port_info.subnet_prefix, + p_rec->port_info.subnet_prefix); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.base_lid & p_port->rec.port_info. + base_lid) != + (p_port->comp.port_info.base_lid & p_rec->port_info.base_lid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0041: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected base_lid 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.base_lid, + p_rec->port_info.base_lid); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.master_sm_base_lid & p_port->rec.port_info. + master_sm_base_lid) != + (p_port->comp.port_info.master_sm_base_lid & p_rec->port_info. + master_sm_base_lid)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0042: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected master_sm_base_lid 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.master_sm_base_lid, + p_rec->port_info.master_sm_base_lid); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.capability_mask & p_port->rec.port_info. + capability_mask) != + (p_port->comp.port_info.capability_mask & p_rec->port_info. + capability_mask)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0043: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected capability_mask 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + cl_ntoh32(p_port->rec.port_info.capability_mask), + cl_ntoh32(p_rec->port_info.capability_mask)); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.diag_code & p_port->rec.port_info. + diag_code) != + (p_port->comp.port_info.diag_code & p_rec->port_info.diag_code)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0044: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected diag_code 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.diag_code, + p_rec->port_info.diag_code); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.m_key_lease_period & p_port->rec.port_info. + m_key_lease_period) != + (p_port->comp.port_info.m_key_lease_period & p_rec->port_info. + m_key_lease_period)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0045: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected m_key_lease_period 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.m_key_lease_period, + p_rec->port_info.m_key_lease_period); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.local_port_num & p_port->rec.port_info. + local_port_num) != + (p_port->comp.port_info.local_port_num & p_rec->port_info. + local_port_num)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0046: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected local_port_num 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.local_port_num, + p_rec->port_info.local_port_num); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.link_width_enabled & p_port->rec.port_info. + link_width_enabled) != + (p_port->comp.port_info.link_width_enabled & p_rec->port_info. + link_width_enabled)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0047: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected link_width_enabled 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.link_width_enabled, + p_rec->port_info.link_width_enabled); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.link_width_supported & p_port->rec. + port_info.link_width_supported) != + (p_port->comp.port_info.link_width_supported & p_rec->port_info. + link_width_supported)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0048: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected link_width_supported 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.link_width_supported, + p_rec->port_info.link_width_supported); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.link_width_active & p_port->rec.port_info. + link_width_active) != + (p_port->comp.port_info.link_width_active & p_rec->port_info. + link_width_active)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0049: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected link_width_active 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.link_width_active, + p_rec->port_info.link_width_active); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.link_speed & p_port->rec.port_info. + link_speed) != + (p_port->comp.port_info.link_speed & p_rec->port_info.link_speed)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0054: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected link_speed 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.link_speed, + p_rec->port_info.link_speed); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.state_info1 & p_port->rec.port_info. + state_info1) != + (p_port->comp.port_info.state_info1 & p_rec->port_info. + state_info1)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0055: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected state_info1 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.state_info1, + p_rec->port_info.state_info1); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.state_info2 & p_port->rec.port_info. + state_info2) != + (p_port->comp.port_info.state_info2 & p_rec->port_info. + state_info2)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0058: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected state_info2 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.state_info2, + p_rec->port_info.state_info2); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.mkey_lmc & p_port->rec.port_info. + mkey_lmc) != + (p_port->comp.port_info.mkey_lmc & p_rec->port_info.mkey_lmc)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0059: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected mkey_lmc 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.mkey_lmc, + p_rec->port_info.mkey_lmc); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.link_speed & p_port->rec.port_info. + link_speed) != + (p_port->comp.port_info.link_speed & p_rec->port_info.link_speed)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0060: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected link_speed 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.link_speed, + p_rec->port_info.link_speed); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.mtu_smsl & p_port->rec.port_info. + mtu_smsl) != + (p_port->comp.port_info.mtu_smsl & p_rec->port_info.mtu_smsl)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0061: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected mtu_smsl 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.mtu_smsl, + p_rec->port_info.mtu_smsl); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.vl_cap & p_port->rec.port_info.vl_cap) != + (p_port->comp.port_info.vl_cap & p_rec->port_info.vl_cap)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0062: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected vl_cap 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.vl_cap, p_rec->port_info.vl_cap); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.vl_high_limit & p_port->rec.port_info. + vl_high_limit) != + (p_port->comp.port_info.vl_high_limit & p_rec->port_info. + vl_high_limit)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0082: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected vl_high_limit 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.vl_high_limit, + p_rec->port_info.vl_high_limit); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.vl_arb_high_cap & p_port->rec.port_info. + vl_arb_high_cap) != + (p_port->comp.port_info.vl_arb_high_cap & p_rec->port_info. + vl_arb_high_cap)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0083: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected vl_arb_high_cap 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.vl_arb_high_cap, + p_rec->port_info.vl_arb_high_cap); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.vl_arb_low_cap & p_port->rec.port_info. + vl_arb_low_cap) != + (p_port->comp.port_info.vl_arb_low_cap & p_rec->port_info. + vl_arb_low_cap)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0084: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected vl_arb_low_cap 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.vl_arb_low_cap, + p_rec->port_info.vl_arb_low_cap); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.mtu_cap & p_port->rec.port_info.mtu_cap) != + (p_port->comp.port_info.mtu_cap & p_rec->port_info.mtu_cap)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0085: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected mtu_cap 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.mtu_cap, + p_rec->port_info.mtu_cap); + status = IB_ERROR; + goto Exit; + } +#if 0 + /* this is a dynamic attribute */ + if ((p_port->comp.port_info.vl_stall_life & p_port->rec.port_info. + vl_stall_life) != + (p_port->comp.port_info.vl_stall_life & p_rec->port_info. + vl_stall_life)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 012F: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected vl_stall_life 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.vl_stall_life, + p_rec->port_info.vl_stall_life); + status = IB_ERROR; + goto Exit; + } +#endif + + if ((p_port->comp.port_info.vl_enforce & p_port->rec.port_info. + vl_enforce) != + (p_port->comp.port_info.vl_enforce & p_rec->port_info.vl_enforce)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0086: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected vl_enforce 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.vl_enforce, + p_rec->port_info.vl_enforce); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.m_key_violations & p_port->rec.port_info. + m_key_violations) != + (p_port->comp.port_info.m_key_violations & p_rec->port_info. + m_key_violations)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0087: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected m_key_violations 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + cl_ntoh16(p_port->rec.port_info.m_key_violations), + cl_ntoh16(p_rec->port_info.m_key_violations)); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.p_key_violations & p_port->rec.port_info. + p_key_violations) != + (p_port->comp.port_info.p_key_violations & p_rec->port_info. + p_key_violations)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0088: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected p_key_violations 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + cl_ntoh16(p_port->rec.port_info.p_key_violations), + cl_ntoh16(p_rec->port_info.p_key_violations)); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.q_key_violations & p_port->rec.port_info. + q_key_violations) != + (p_port->comp.port_info.q_key_violations & p_rec->port_info. + q_key_violations)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0089: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected q_key_violations 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + cl_ntoh16(p_port->rec.port_info.q_key_violations), + cl_ntoh16(p_rec->port_info.q_key_violations)); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.guid_cap & p_port->rec.port_info. + guid_cap) != + (p_port->comp.port_info.guid_cap & p_rec->port_info.guid_cap)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0090: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected guid_cap 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.guid_cap, + p_rec->port_info.guid_cap); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.subnet_timeout & p_port->rec.port_info. + subnet_timeout) != + (p_port->comp.port_info.subnet_timeout & p_rec->port_info. + subnet_timeout)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0091: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected subnet_timeout 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + ib_port_info_get_timeout(&p_port->rec.port_info), + ib_port_info_get_timeout(&p_rec->port_info)); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.resp_time_value & p_port->rec.port_info. + resp_time_value) != + (p_port->comp.port_info.resp_time_value & p_rec->port_info. + resp_time_value)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0092: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected resp_time_value 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.resp_time_value, + p_rec->port_info.resp_time_value); + status = IB_ERROR; + goto Exit; + } + + if ((p_port->comp.port_info.error_threshold & p_port->rec.port_info. + error_threshold) != + (p_port->comp.port_info.error_threshold & p_rec->port_info. + error_threshold)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0093: " + "Field mismatch port LID 0x%X Num:0x%X\n" + "\t\t\t\tExpected error_threshold 0x%X, received 0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num, + p_port->rec.port_info.error_threshold, + p_rec->port_info.error_threshold); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_port_rec(IN osmtest_t * const p_osmt, + IN const ib_portinfo_record_t * const p_rec) +{ + cl_status_t status = IB_SUCCESS; + port_t *p_port; + const cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Find proper port record in the database. + * (we use by guid - since lid is not unique) + */ + p_tbl = &p_osmt->exp_subn.port_key_tbl; + p_port = + (port_t *) cl_qmap_get(p_tbl, + port_gen_id(p_rec->lid, p_rec->port_num)); + if (p_port == (port_t *) cl_qmap_end(p_tbl)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0094: " + "Unexpected port LID 0x%X, Num:0x%X\n", + cl_ntoh16(p_rec->lid), p_rec->port_num); + status = IB_ERROR; + goto Exit; + } + + status = osmtest_validate_port_data(p_osmt, p_port, p_rec); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_path_rec(IN osmtest_t * const p_osmt, + IN const ib_path_rec_t * const p_rec) +{ + cl_status_t status = IB_SUCCESS; + path_t *p_path; + const cl_qmap_t *p_tbl; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Find proper path record in the database. + */ + p_tbl = &p_osmt->exp_subn.path_tbl; + p_path = (path_t *) cl_qmap_get(p_tbl, osmtest_path_rec_key_get(p_rec)); + if (p_path == (path_t *) cl_qmap_end(p_tbl)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0095: " + "Unexpected path SLID 0x%X to DLID 0x%X\n", + cl_ntoh16(p_rec->slid), cl_ntoh16(p_rec->dlid)); + status = IB_ERROR; + goto Exit; + } + + status = osmtest_validate_path_data(p_osmt, p_path, p_rec); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +#ifdef VENDOR_RMPP_SUPPORT +static ib_net64_t portguid; + +static ib_api_status_t +osmtest_validate_all_node_recs(IN osmtest_t * const p_osmt) +{ + osmtest_req_context_t context; + const ib_node_record_t *p_rec; + uint32_t i; + cl_status_t status; + size_t num_recs; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all NodeRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_NODE_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0096: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %zu records\n", + num_recs); + + /* + * Compare the received records to the database. + */ + osmtest_prepare_db(p_osmt); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_node_rec(context.result.p_result_madw, i); + + status = osmtest_validate_node_rec(p_osmt, p_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0097: " + "osmtest_valid_node_rec failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + if (!portguid) + portguid = p_rec->node_info.port_guid; + } + + status = osmtest_check_missing_nodes(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0098: " + "osmtest_check_missing_nodes failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_all_guidinfo_recs(IN osmtest_t * const p_osmt) +{ + osmtest_req_context_t context; + const ib_guidinfo_record_t *p_rec; + cl_status_t status; + size_t num_recs; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all GuidInfoRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_GUIDINFO_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0099: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %zu records\n", + num_recs); + + /* No validation as yet */ + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_all_path_recs(IN osmtest_t * const p_osmt) +{ + osmtest_req_context_t context; + const ib_path_rec_t *p_rec; + uint32_t i; + cl_status_t status; + size_t num_recs; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + /* + * Do a blocking query for all PathRecords in the subnet. + */ + status = osmtest_get_all_recs(p_osmt, IB_MAD_ATTR_PATH_RECORD, + sizeof(*p_rec), &context); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 009A: " + "osmtest_get_all_recs failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + num_recs = context.result.result_cnt; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "Received %zu records\n", + num_recs); + + /* + * Compare the received records to the database. + */ + osmtest_prepare_db(p_osmt); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_path_rec(context.result.p_result_madw, i); + + status = osmtest_validate_path_rec(p_osmt, p_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0100: " + "osmtest_validate_path_rec failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + + status = osmtest_check_missing_paths(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0101: " + "osmtest_check_missing_paths failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Get link record by LID + **********************************************************************/ +static ib_api_status_t +osmtest_get_link_rec_by_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const from_lid, + IN ib_net16_t const to_lid, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_link_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting link record from LID 0x%02X to LID 0x%02X\n", + cl_ntoh16(from_lid), cl_ntoh16(to_lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.from_lid = from_lid; + record.to_lid = to_lid; + p_context->p_osmt = p_osmt; + if (from_lid) + user.comp_mask |= IB_LR_COMPMASK_FROM_LID; + if (to_lid) + user.comp_mask |= IB_LR_COMPMASK_TO_LID; + user.attr_id = IB_MAD_ATTR_LINK_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 007A: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 007B: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "osmtest_get_link_rec_by_lid: " + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Get GUIDInfo record by LID + **********************************************************************/ +static ib_api_status_t +osmtest_get_guidinfo_rec_by_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_guidinfo_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting GUIDInfo record for LID 0x%02X\n", cl_ntoh16(lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + p_context->p_osmt = p_osmt; + user.comp_mask = IB_GIR_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_GUIDINFO_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 007C: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 007D: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Get PKeyTable record by LID + **********************************************************************/ +static ib_api_status_t +osmtest_get_pkeytbl_rec_by_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN ib_net64_t const sm_key, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_pkey_table_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting PKeyTable record for LID 0x%02X\n", cl_ntoh16(lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + p_context->p_osmt = p_osmt; + user.comp_mask = IB_PKEY_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_PKEY_TBL_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = sm_key; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 007E: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 007F: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Get SwitchInfo record by LID + **********************************************************************/ +static ib_api_status_t +osmtest_get_sw_info_rec_by_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_switch_info_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting SwitchInfo record for LID 0x%02X\n", cl_ntoh16(lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + p_context->p_osmt = p_osmt; + if (lid) + user.comp_mask = IB_SWIR_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_SWITCH_INFO_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 006C: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 006D: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Get LFT record by LID + **********************************************************************/ +static ib_api_status_t +osmtest_get_lft_rec_by_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_lft_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting LFT record for LID 0x%02X\n", cl_ntoh16(lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + p_context->p_osmt = p_osmt; + if (lid) + user.comp_mask = IB_LFTR_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_LFT_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 008A: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 008B: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + * Get MFT record by LID + **********************************************************************/ +static ib_api_status_t +osmtest_get_mft_rec_by_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_mft_record_t record; + ib_mad_t *p_mad; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Getting MFT record for LID 0x%02X\n", cl_ntoh16(lid)); + + /* + * Do a blocking query for this record in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + p_context->p_osmt = p_osmt; + if (lid) + user.comp_mask = IB_MFTR_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_MFT_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 009B: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 009C: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_sminfo_record_request(IN osmtest_t * const p_osmt, + IN uint8_t method, + IN void *p_options, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_sminfo_record_t record; + ib_mad_t *p_mad; + osmtest_sm_info_rec_t *p_sm_info_opt; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Do a blocking query for these records in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + p_context->p_osmt = p_osmt; + user.attr_id = IB_MAD_ATTR_SMINFO_RECORD; + p_sm_info_opt = p_options; + if (p_sm_info_opt->sm_guid != 0) { + record.sm_info.guid = p_sm_info_opt->sm_guid; + user.comp_mask |= IB_SMIR_COMPMASK_GUID; + } + if (p_sm_info_opt->lid != 0) { + record.lid = p_sm_info_opt->lid; + user.comp_mask |= IB_SMIR_COMPMASK_LID; + } + if (p_sm_info_opt->priority != 0) { + record.sm_info.pri_state = + (p_sm_info_opt->priority & 0x0F) << 4; + user.comp_mask |= IB_SMIR_COMPMASK_PRIORITY; + } + if (p_sm_info_opt->sm_state != 0) { + record.sm_info.pri_state |= p_sm_info_opt->sm_state & 0x0F; + user.comp_mask |= IB_SMIR_COMPMASK_SMSTATE; + } + + user.method = method; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 008C: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + if (status != IB_INVALID_PARAMETER) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 008D: " + "ib_query failed (%s)\n", + ib_get_err_str(status)); + } + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_informinfo_request(IN osmtest_t * const p_osmt, + IN ib_net16_t attr_id, + IN uint8_t method, + IN void *p_options, + IN OUT osmtest_req_context_t * const p_context) +{ + ib_api_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_inform_info_t rec; + ib_inform_info_record_t record; + ib_mad_t *p_mad; + osmtest_inform_info_t *p_inform_info_opt; + osmtest_inform_info_rec_t *p_inform_info_rec_opt; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Do a blocking query for these records in the subnet. + * The result is returned in the result field of the caller's + * context structure. + * + * The query structures are locals. + */ + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&rec, 0, sizeof(rec)); + memset(&record, 0, sizeof(record)); + + p_context->p_osmt = p_osmt; + user.attr_id = attr_id; + if (attr_id == IB_MAD_ATTR_INFORM_INFO_RECORD) { + p_inform_info_rec_opt = p_options; + if (p_inform_info_rec_opt->subscriber_gid.unicast.prefix != 0 && + p_inform_info_rec_opt->subscriber_gid.unicast. + interface_id != 0) { + record.subscriber_gid = + p_inform_info_rec_opt->subscriber_gid; + user.comp_mask = IB_IIR_COMPMASK_SUBSCRIBERGID; + } + record.subscriber_enum = + cl_hton16(p_inform_info_rec_opt->subscriber_enum); + user.comp_mask |= IB_IIR_COMPMASK_ENUM; + user.p_attr = &record; + } else { + /* comp mask bits below are for InformInfoRecord rather than InformInfo */ + /* as currently no comp mask bits defined for InformInfo!!! */ + user.comp_mask = IB_IIR_COMPMASK_SUBSCRIBE; + p_inform_info_opt = p_options; + rec.subscribe = (uint8_t) p_inform_info_opt->subscribe; + if (p_inform_info_opt->qpn) { + rec.g_or_v.generic.qpn_resp_time_val = + cl_hton32(p_inform_info_opt->qpn << 8); + user.comp_mask |= IB_IIR_COMPMASK_QPN; + } + if (p_inform_info_opt->trap) { + rec.g_or_v.generic.trap_num = + cl_hton16(p_inform_info_opt->trap); + user.comp_mask |= IB_IIR_COMPMASK_TRAPNUMB; + } + user.p_attr = &rec; + } + user.method = method; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = p_context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 008E: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = p_context->result.status; + + if (status != IB_SUCCESS) { + if (status != IB_INVALID_PARAMETER) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 008F: " + "ib_query failed (%s)\n", + ib_get_err_str(status)); + } + if (status == IB_REMOTE_ERROR) { + p_mad = + osm_madw_get_mad_ptr(p_context->result. + p_result_madw); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(p_mad)); + + status = + (ib_net16_t) (p_mad->status & IB_SMP_STATUS_MASK); + } + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} +#endif + +static ib_api_status_t +osmtest_validate_single_path_rec_lid_pair(IN osmtest_t * const p_osmt, + IN path_t * const p_path) +{ + osmtest_req_context_t context; + const ib_path_rec_t *p_rec; + cl_status_t status = IB_SUCCESS; + size_t num_recs; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + status = osmtest_get_path_rec_by_lid_pair(p_osmt, + p_path->rec.slid, + p_path->rec.dlid, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0102: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + num_recs = context.result.result_cnt; + if (num_recs != 1) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0103: " + "Too many records. Expected 1, received %zu\n", + num_recs); + + status = IB_ERROR; + } else { + p_rec = + osmv_get_query_path_rec(context.result.p_result_madw, 0); + + status = osmtest_validate_path_data(p_osmt, p_path, p_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0104: " + "osmtest_validate_path_data failed (%s)\n", + ib_get_err_str(status)); + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_single_node_rec_lid(IN osmtest_t * const p_osmt, + IN ib_net16_t const lid, + IN node_t * const p_node) +{ + cl_status_t status = IB_SUCCESS; + osmv_user_query_t user; + osmv_query_req_t req; + ib_node_record_t record; + + osmtest_req_context_t context; + const ib_node_record_t *p_rec; + int num_recs, i; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Getting NodeRecord for node with LID 0x%X\n", cl_ntoh16(lid)); + + memset(&context, 0, sizeof(context)); + memset(&req, 0, sizeof(req)); + memset(&user, 0, sizeof(user)); + memset(&record, 0, sizeof(record)); + + record.lid = lid; + + context.p_osmt = p_osmt; + user.comp_mask = IB_NR_COMPMASK_LID; + user.attr_id = IB_MAD_ATTR_NODE_RECORD; + user.p_attr = &record; + + req.query_type = OSMV_QUERY_USER_DEFINED; + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + req.p_query_input = &user; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0105: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0106: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + + num_recs = context.result.result_cnt; + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Received %d nodes\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_node_rec(context.result.p_result_madw, i); + + status = osmtest_validate_node_rec(p_osmt, p_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0107: " + "osmtest_validate_node_data failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_single_port_rec_lid(IN osmtest_t * const p_osmt, + IN port_t * const p_port) +{ + osmtest_req_context_t context; + + const ib_portinfo_record_t *p_rec; + cl_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&context, 0, sizeof(context)); + + context.p_osmt = p_osmt; + status = osmtest_get_port_rec_by_num(p_osmt, + p_port->rec.lid, + p_port->rec.port_num, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0108: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + goto Exit; + } + + /* we should have got exactly one port */ + p_rec = osmv_get_query_portinfo_rec(context.result.p_result_madw, 0); + status = osmtest_validate_port_rec(p_osmt, p_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0109: " + "osmtest_validate_port_data failed (%s)\n", + ib_get_err_str(status)); + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_single_path_rec_guid_pair(IN osmtest_t * const p_osmt, + IN const osmv_guid_pair_t * + const p_pair) +{ + osmtest_req_context_t context; + const ib_path_rec_t *p_rec; + cl_status_t status = IB_SUCCESS; + size_t num_recs; + osmv_query_req_t req; + uint32_t i; + boolean_t got_error = FALSE; + + OSM_LOG_ENTER(&p_osmt->log); + + memset(&req, 0, sizeof(req)); + memset(&context, 0, sizeof(context)); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "\n\t\t\t\tChecking src 0x%016" PRIx64 + " to dest 0x%016" PRIx64 "\n", + cl_ntoh64(p_pair->src_guid), cl_ntoh64(p_pair->dest_guid)); + + context.p_osmt = p_osmt; + + req.timeout_ms = p_osmt->opt.transaction_timeout; + req.retry_cnt = p_osmt->opt.retry_count; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = &context; + req.pfn_query_cb = osmtest_query_res_cb; + + req.query_type = OSMV_QUERY_PATH_REC_BY_PORT_GUIDS; + req.p_query_input = p_pair; + req.sm_key = 0; + + if (p_osmt->opt.with_grh) { + req.with_grh = 1; + memcpy(&req.gid, &p_osmt->sm_port_gid, 16); + } + + status = osmv_query_sa(p_osmt->h_bind, &req); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0110: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + status = context.result.status; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0111: " + "ib_query failed (%s)\n", ib_get_err_str(status)); + + if (status == IB_REMOTE_ERROR) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Remote error = %s\n", + ib_get_mad_status_str(osm_madw_get_mad_ptr + (context.result. + p_result_madw))); + } + goto Exit; + } + + num_recs = context.result.result_cnt; + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "%zu records\n", num_recs); + + for (i = 0; i < num_recs; i++) { + p_rec = + osmv_get_query_path_rec(context.result.p_result_madw, i); + + /* + * Make sure the GUID values are correct + */ + if (p_rec->dgid.unicast.interface_id != p_pair->dest_guid) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0112: " + "Destination GUID mismatch\n" + "\t\t\t\texpected 0x%016" PRIx64 + ", received 0x%016" PRIx64 "\n", + cl_ntoh64(p_pair->dest_guid), + cl_ntoh64(p_rec->dgid.unicast.interface_id)); + got_error = TRUE; + } + + if (p_rec->sgid.unicast.interface_id != p_pair->src_guid) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0113: " + "Source GUID mismatch\n" + "\t\t\t\texpected 0x%016" PRIx64 + ", received 0x%016" PRIx64 ".\n", + cl_ntoh64(p_pair->src_guid), + cl_ntoh64(p_rec->sgid.unicast.interface_id)); + got_error = TRUE; + } + + status = osmtest_validate_path_rec(p_osmt, p_rec); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0114: " + "osmtest_validate_path_rec failed (%s)\n", + ib_get_err_str(status)); + got_error = TRUE; + } + if (got_error || (status != IB_SUCCESS)) { + osm_dump_path_record(&p_osmt->log, p_rec, + OSM_LOG_VERBOSE); + if (status == IB_SUCCESS) + status = IB_ERROR; + goto Exit; + } + } + +Exit: + /* + * Return the IB query MAD to the pool as necessary. + */ + if (context.result.p_result_madw != NULL) { + osm_mad_pool_put(&p_osmt->mad_pool, + context.result.p_result_madw); + context.result.p_result_madw = NULL; + } + + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_get_sm_gid(IN osmtest_t * const p_osmt) +{ + cl_status_t status = IB_SUCCESS; + node_t *p_node; + const cl_qmap_t *p_tbl; + ib_port_attr_t *local_port; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Calculate SM GID for use in subsequent SA queries\n"); + + local_port = &p_osmt->local_port; + p_osmt->sm_port_gid.unicast.prefix = + cl_hton64(p_osmt->local_port_gid.unicast.prefix); + + if (local_port->lid != local_port->sm_lid) { + status = osmtest_create_db(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0155: Database creation failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + p_tbl = &p_osmt->exp_subn.node_lid_tbl; + p_node = (node_t *) cl_qmap_get(p_tbl, + cl_hton16(local_port->sm_lid)); + if (p_node == (node_t *) cl_qmap_end(p_tbl)) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0154: SM LID 0x%X doesn't exist\n", + local_port->sm_lid); + status = IB_ERROR; + goto Exit; + } + p_osmt->sm_port_gid.unicast.interface_id = p_node->rec.node_info.port_guid; + } else { + p_osmt->sm_port_gid.unicast.interface_id = + cl_hton64(p_osmt->local_port_gid.unicast.interface_id); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "SM GID 0x%016" PRIx64 " 0x%016" PRIx64 "\n", + cl_ntoh64(p_osmt->sm_port_gid.unicast.prefix), + cl_ntoh64(p_osmt->sm_port_gid.unicast.interface_id)); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_single_path_recs(IN osmtest_t * const p_osmt) +{ + path_t *p_path; + cl_status_t status = IB_SUCCESS; + const cl_qmap_t *p_path_tbl; +/* We skip node to node path record validation since it might contains + NONEXISTENT PATHS, i.e. when using UPDN */ + osmv_guid_pair_t guid_pair; + uint16_t cnt; + + OSM_LOG_ENTER(&p_osmt->log); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Validating individual path record queries\n"); + p_path_tbl = &p_osmt->exp_subn.path_tbl; + + osmtest_prepare_db(p_osmt); + + /* + * Walk the list of all path records, and ask for each one + * specifically. Make sure we get it. + */ + cnt = 0; + p_path = (path_t *) cl_qmap_head(p_path_tbl); + while (p_path != (path_t *) cl_qmap_end(p_path_tbl)) { + status = + osmtest_validate_single_path_rec_lid_pair(p_osmt, p_path); + if (status != IB_SUCCESS) + goto Exit; + cnt++; + p_path = (path_t *) cl_qmap_next(&p_path->map_item); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Total of %u path records validated using LID based query\n", + cnt); + + status = osmtest_check_missing_paths(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0115: " + "osmtest_check_missing_paths failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + /* + * Do the whole thing again with port GUID pairs. + * Note that multiple path records may be returned + * for each guid pair if LMC > 0. + */ + osmtest_prepare_db(p_osmt); + cnt = 0; + p_path = (path_t *) cl_qmap_head(p_path_tbl); + while (p_path != (path_t *) cl_qmap_end(p_path_tbl)) { + guid_pair.src_guid = p_path->rec.sgid.unicast.interface_id; + guid_pair.dest_guid = p_path->rec.dgid.unicast.interface_id; + status = osmtest_validate_single_path_rec_guid_pair(p_osmt, + &guid_pair); + if (status != IB_SUCCESS) + goto Exit; + cnt++; + p_path = (path_t *) cl_qmap_next(&p_path->map_item); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Total of %u path records validated using GUID based query\n", + cnt); + + status = osmtest_check_missing_paths(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0116: " + "osmtest_check_missing_paths failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_single_node_recs(IN osmtest_t * const p_osmt) +{ + node_t *p_node; + cl_status_t status = IB_SUCCESS; + const cl_qmap_t *p_node_lid_tbl; + uint16_t cnt = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + p_node_lid_tbl = &p_osmt->exp_subn.node_lid_tbl; + + osmtest_prepare_db(p_osmt); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Validating individual node record queries\n"); + + /* + * Walk the list of all node records, and ask for each one + * specifically. Make sure we get it. + */ + p_node = (node_t *) cl_qmap_head(p_node_lid_tbl); + while (p_node != (node_t *) cl_qmap_end(p_node_lid_tbl)) { + status = osmtest_validate_single_node_rec_lid(p_osmt, + (ib_net16_t) + cl_qmap_key((cl_map_item_t *) p_node), p_node); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 011A: " + "osmtest_validate_single_node_rec_lid (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + cnt++; + p_node = (node_t *) cl_qmap_next(&p_node->map_item); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Total of %u node records validated\n", cnt); + + status = osmtest_check_missing_nodes(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0117: " + "osmtest_check_missing_nodes (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_validate_single_port_recs(IN osmtest_t * const p_osmt) +{ + port_t *p_port; + cl_status_t status = IB_SUCCESS; + const cl_qmap_t *p_port_key_tbl; + uint16_t cnt = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + p_port_key_tbl = &p_osmt->exp_subn.port_key_tbl; + + osmtest_prepare_db(p_osmt); + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Validating individual port record queries\n"); + + /* + * Walk the list of all port records, and ask for each one + * specifically. Make sure we get it. + */ + p_port = (port_t *) cl_qmap_head(p_port_key_tbl); + while (p_port != (port_t *) cl_qmap_end(p_port_key_tbl)) { + status = osmtest_validate_single_port_rec_lid(p_osmt, p_port); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 011B: " + "osmtest_validate_single_port_rec_lid (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + cnt++; + p_port = (port_t *) cl_qmap_next(&p_port->map_item); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, + "Total of %u port records validated\n", cnt); + + status = osmtest_check_missing_ports(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0118: " + "osmtest_check_missing_paths failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_validate_against_db(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + ib_gid_t portgid, mgid; + osmtest_sm_info_rec_t sm_info_rec_opt; + osmtest_inform_info_t inform_info_opt; + osmtest_inform_info_rec_t inform_info_rec_opt; +#ifdef VENDOR_RMPP_SUPPORT + ib_net64_t sm_key; + ib_net16_t test_lid; + uint8_t lmc; + osmtest_req_context_t context; +#ifdef DUAL_SIDED_RMPP + osmv_multipath_req_t request; +#endif + uint8_t i; +#endif + + OSM_LOG_ENTER(&p_osmt->log); + +#ifdef VENDOR_RMPP_SUPPORT + status = osmtest_validate_all_node_recs(p_osmt); + if (status != IB_SUCCESS) + goto Exit; +#endif + + status = osmtest_validate_single_node_recs(p_osmt); + if (status != IB_SUCCESS) + goto Exit; + + /* Exercise SA PathRecord multicast destination code */ + memset(&context, 0, sizeof(context)); + ib_gid_set_default(&portgid, portguid); + /* Set IPoIB broadcast MGID */ + mgid.unicast.prefix = CL_HTON64(0xff12401bffff0000ULL); + mgid.unicast.interface_id = CL_HTON64(0x00000000ffffffffULL); + /* Can't check status as don't know whether port is running IPoIB */ + osmtest_get_path_rec_by_gid_pair(p_osmt, portgid, mgid, &context); + + /* Other link local unicast PathRecord */ + memset(&context, 0, sizeof(context)); + ib_gid_set_default(&portgid, portguid); + ib_gid_set_default(&mgid, portguid); + mgid.raw[7] = 0xff; /* not default GID prefix */ + /* Can't check status as don't know whether ??? */ + osmtest_get_path_rec_by_gid_pair(p_osmt, portgid, mgid, &context); + + /* Off subnet (site local) unicast PathRecord */ + memset(&context, 0, sizeof(context)); + ib_gid_set_default(&portgid, portguid); + ib_gid_set_default(&mgid, portguid); + mgid.raw[1] = 0xc0; /* site local */ + /* Can't check status as don't know whether ??? */ + osmtest_get_path_rec_by_gid_pair(p_osmt, portgid, mgid, &context); + + /* More than link local scope multicast PathRecord */ + memset(&context, 0, sizeof(context)); + ib_gid_set_default(&portgid, portguid); + /* Set IPoIB broadcast MGID */ + mgid.unicast.prefix = CL_HTON64(0xff15401bffff0000ULL); /* site local */ + mgid.unicast.interface_id = CL_HTON64(0x00000000ffffffffULL); + /* Can't check status as don't know whether port is running IPoIB */ + osmtest_get_path_rec_by_gid_pair(p_osmt, portgid, mgid, &context); + +#if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) + memset(&context, 0, sizeof(context)); + memset(&request, 0, sizeof(request)); + request.comp_mask = + IB_MPR_COMPMASK_SGIDCOUNT | IB_MPR_COMPMASK_DGIDCOUNT; + request.sgid_count = 1; + request.dgid_count = 1; + ib_gid_set_default(&request.gids[0], portguid); + ib_gid_set_default(&request.gids[1], portguid); + status = osmtest_get_multipath_rec(p_osmt, &request, &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&context, 0, sizeof(context)); + memset(&request, 0, sizeof(request)); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmtest_get_multipath_rec(p_osmt, &request, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Got error %s\n", ib_get_err_str(status)); + } + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } + + memset(&context, 0, sizeof(context)); + memset(&request, 0, sizeof(request)); + request.comp_mask = IB_MPR_COMPMASK_SGIDCOUNT; + request.sgid_count = 1; + ib_gid_set_default(&request.gids[0], portguid); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmtest_get_multipath_rec(p_osmt, &request, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Got error %s\n", ib_get_err_str(status)); + } + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } + + memset(&context, 0, sizeof(context)); + memset(&request, 0, sizeof(request)); + request.comp_mask = + IB_MPR_COMPMASK_SGIDCOUNT | IB_MPR_COMPMASK_DGIDCOUNT; + request.sgid_count = 1; + request.dgid_count = 1; + ib_gid_set_default(&request.gids[0], portguid); + /* Set IPoIB broadcast MGID as DGID */ + request.gids[1].unicast.prefix = CL_HTON64(0xff12401bffff0000ULL); + request.gids[1].unicast.interface_id = CL_HTON64(0x00000000ffffffffULL); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmtest_get_multipath_rec(p_osmt, &request, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Got error %s\n", ib_get_err_str(status)); + } + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } + + memset(&context, 0, sizeof(context)); + request.comp_mask = + IB_MPR_COMPMASK_SGIDCOUNT | IB_MPR_COMPMASK_DGIDCOUNT; + request.sgid_count = 1; + request.dgid_count = 1; + /* Set IPoIB broadcast MGID as SGID */ + request.gids[0].unicast.prefix = CL_HTON64(0xff12401bffff0000ULL); + request.gids[0].unicast.interface_id = CL_HTON64(0x00000000ffffffffULL); + ib_gid_set_default(&request.gids[1], portguid); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmtest_get_multipath_rec(p_osmt, &request, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Got error %s\n", ib_get_err_str(status)); + } + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } + + memset(&context, 0, sizeof(context)); + memset(&request, 0, sizeof(request)); + request.comp_mask = + IB_MPR_COMPMASK_SGIDCOUNT | IB_MPR_COMPMASK_DGIDCOUNT | + IB_MPR_COMPMASK_NUMBPATH; + request.sgid_count = 2; + request.dgid_count = 2; + request.num_path = 2; + ib_gid_set_default(&request.gids[0], portguid); + ib_gid_set_default(&request.gids[1], portguid); + ib_gid_set_default(&request.gids[2], portguid); + ib_gid_set_default(&request.gids[3], portguid); + status = osmtest_get_multipath_rec(p_osmt, &request, &context); + if (status != IB_SUCCESS) + goto Exit; +#endif + +#ifdef VENDOR_RMPP_SUPPORT + /* GUIDInfoRecords */ + status = osmtest_validate_all_guidinfo_recs(p_osmt); + if (status != IB_SUCCESS) + goto Exit; + + /* If LMC > 0, test non base LID SA PortInfoRecord request */ + status = + osmtest_get_local_port_lmc(p_osmt, p_osmt->local_port.lid, &lmc); + if (status != IB_SUCCESS) + goto Exit; + + if (lmc != 0) { + status = + osmtest_get_local_port_lmc(p_osmt, + p_osmt->local_port.lid + 1, + NULL); + if (status != IB_SUCCESS) + goto Exit; + } + + status = osmtest_get_local_port_lmc(p_osmt, 0xffff, NULL); + if (status != IB_SUCCESS) + goto Exit; + + test_lid = cl_ntoh16(p_osmt->local_port.lid); + + /* More GUIDInfo Record tests */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_guidinfo_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&context, 0, sizeof(context)); + status = osmtest_get_guidinfo_rec_by_lid(p_osmt, 0xffff, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Some PKeyTable Record tests */ + sm_key = OSM_DEFAULT_SM_KEY; + memset(&context, 0, sizeof(context)); + status = + osmtest_get_pkeytbl_rec_by_lid(p_osmt, test_lid, sm_key, &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&context, 0, sizeof(context)); + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_START "\n"); + status = osmtest_get_pkeytbl_rec_by_lid(p_osmt, test_lid, 0, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Got error %s\n", ib_get_err_str(status)); + } + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } + + memset(&context, 0, sizeof(context)); + status = + osmtest_get_pkeytbl_rec_by_lid(p_osmt, 0xffff, sm_key, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* SwitchInfo Record tests */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_sw_info_rec_by_lid(p_osmt, 0, &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&context, 0, sizeof(context)); + status = osmtest_get_sw_info_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* LFT Record tests */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_lft_rec_by_lid(p_osmt, 0, &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&context, 0, sizeof(context)); + status = osmtest_get_lft_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* MFT Record tests */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_mft_rec_by_lid(p_osmt, 0, &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&context, 0, sizeof(context)); + status = osmtest_get_mft_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Some LinkRecord tests */ + /* FromLID */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_link_rec_by_lid(p_osmt, test_lid, 0, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* ToLID */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_link_rec_by_lid(p_osmt, 0, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* FromLID & ToLID */ + memset(&context, 0, sizeof(context)); + status = + osmtest_get_link_rec_by_lid(p_osmt, test_lid, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* NodeRecord test */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_node_rec_by_lid(p_osmt, 0xffff, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* SMInfoRecord tests */ + memset(&sm_info_rec_opt, 0, sizeof(sm_info_rec_opt)); + memset(&context, 0, sizeof(context)); + status = osmtest_sminfo_record_request(p_osmt, IB_MAD_METHOD_SET, + &sm_info_rec_opt, &context); + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "IS EXPECTED ERROR ^^^^\n"); + } + + memset(&sm_info_rec_opt, 0, sizeof(sm_info_rec_opt)); + memset(&context, 0, sizeof(context)); + status = osmtest_sminfo_record_request(p_osmt, IB_MAD_METHOD_GETTABLE, + &sm_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&sm_info_rec_opt, 0, sizeof(sm_info_rec_opt)); + sm_info_rec_opt.lid = test_lid; /* local LID */ + memset(&context, 0, sizeof(context)); + status = osmtest_sminfo_record_request(p_osmt, IB_MAD_METHOD_GETTABLE, + &sm_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + if (portguid != 0) { + memset(&sm_info_rec_opt, 0, sizeof(sm_info_rec_opt)); + sm_info_rec_opt.sm_guid = portguid; /* local GUID */ + memset(&context, 0, sizeof(context)); + status = + osmtest_sminfo_record_request(p_osmt, + IB_MAD_METHOD_GETTABLE, + &sm_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + } + + for (i = 1; i < 16; i++) { + memset(&sm_info_rec_opt, 0, sizeof(sm_info_rec_opt)); + sm_info_rec_opt.priority = i; + memset(&context, 0, sizeof(context)); + status = + osmtest_sminfo_record_request(p_osmt, + IB_MAD_METHOD_GETTABLE, + &sm_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + } + + for (i = 1; i < 4; i++) { + memset(&sm_info_rec_opt, 0, sizeof(sm_info_rec_opt)); + sm_info_rec_opt.sm_state = i; + memset(&context, 0, sizeof(context)); + status = + osmtest_sminfo_record_request(p_osmt, + IB_MAD_METHOD_GETTABLE, + &sm_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + } + + /* InformInfoRecord tests */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a BAD - Set Unsubscribe request\n"); + memset(&inform_info_opt, 0, sizeof(inform_info_opt)); + memset(&inform_info_rec_opt, 0, sizeof(inform_info_rec_opt)); + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_SET, &inform_info_rec_opt, + &context); + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "InformInfoRecord " + "IS EXPECTED ERROR ^^^^\n"); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a Good - Empty GetTable request\n"); + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_GETTABLE, + &inform_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* InformInfo tests */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending a BAD - Empty Get request " + "(should fail with NO_RECORDS)\n"); + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_GET, &inform_info_opt, + &context); + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "InformInfo " + "IS EXPECTED ERROR ^^^^\n"); + } + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending a BAD - Set Unsubscribe request\n"); + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, &inform_info_opt, + &context); + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } else { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "InformInfo UnSubscribe " + "IS EXPECTED ERROR ^^^^\n"); + } + + /* Now subscribe */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending a Good - Set Subscribe request\n"); + inform_info_opt.subscribe = TRUE; + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, &inform_info_opt, + &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Now unsubscribe (QPN needs to be 1 to work) */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending a Good - Set Unsubscribe request\n"); + inform_info_opt.subscribe = FALSE; + inform_info_opt.qpn = 1; + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, &inform_info_opt, + &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Now subscribe again */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending a Good - Set Subscribe request\n"); + inform_info_opt.subscribe = TRUE; + inform_info_opt.qpn = 1; + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, &inform_info_opt, + &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Subscribe over existing subscription */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending a Good - Set Subscribe (again) request\n"); + inform_info_opt.qpn = 0; + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, &inform_info_opt, + &context); + if (status != IB_SUCCESS) + goto Exit; + + /* More InformInfoRecord tests */ + /* RID lookup (with currently invalid enum) */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a Good - GetTable by GID\n"); + ib_gid_set_default(&inform_info_rec_opt.subscriber_gid, + p_osmt->local_port.port_guid); + inform_info_rec_opt.subscriber_enum = 1; + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_GETTABLE, + &inform_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Enum lookup */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a Good - GetTable (subscriber_enum == 0) request\n"); + inform_info_rec_opt.subscriber_enum = 0; + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_GETTABLE, + &inform_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Get all InformInfoRecords */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a Good - GetTable (ALL records) request\n"); + memset(&inform_info_rec_opt, 0, sizeof(inform_info_rec_opt)); + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_GETTABLE, + &inform_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Another subscription */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending another Good - Set Subscribe (again) request\n"); + inform_info_opt.qpn = 0; + inform_info_opt.trap = 0x1234; + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, &inform_info_opt, + &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Get all InformInfoRecords again */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a Good - GetTable (ALL records) request\n"); + memset(&inform_info_rec_opt, 0, sizeof(inform_info_rec_opt)); + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_GETTABLE, + &inform_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Cleanup subscriptions before further testing */ + /* Does order of deletion matter ? Test this !!! */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo " + "Sending a Good - Set (cleanup) request\n"); + inform_info_opt.subscribe = FALSE; + inform_info_opt.qpn = 1; + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, + &inform_info_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Get all InformInfoRecords again */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a Good - GetTable (ALL records) request\n"); + memset(&inform_info_rec_opt, 0, sizeof(inform_info_rec_opt)); + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_GETTABLE, + &inform_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfo" + "Sending a Good - Set (cleanup) request\n"); + inform_info_opt.subscribe = FALSE; + inform_info_opt.qpn = 1; + inform_info_opt.trap = 0; + memset(&context, 0, sizeof(context)); + status = osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO, + IB_MAD_METHOD_SET, + &inform_info_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Get all InformInfoRecords a final time */ + OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE, "InformInfoRecord " + "Sending a Good - GetTable (ALL records) request\n"); + memset(&inform_info_rec_opt, 0, sizeof(inform_info_rec_opt)); + memset(&context, 0, sizeof(context)); + status = + osmtest_informinfo_request(p_osmt, IB_MAD_ATTR_INFORM_INFO_RECORD, + IB_MAD_METHOD_GETTABLE, + &inform_info_rec_opt, &context); + if (status != IB_SUCCESS) + goto Exit; + + if (lmc != 0) { + test_lid = cl_ntoh16(p_osmt->local_port.lid + 1); + + /* Another GUIDInfo Record test */ + memset(&context, 0, sizeof(context)); + status = + osmtest_get_guidinfo_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Another PKeyTable Record test */ + memset(&context, 0, sizeof(context)); + status = + osmtest_get_pkeytbl_rec_by_lid(p_osmt, test_lid, sm_key, + &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Another SwitchInfo Record test */ + memset(&context, 0, sizeof(context)); + status = + osmtest_get_sw_info_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Another LFT Record test */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_lft_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Another MFT Record test */ + memset(&context, 0, sizeof(context)); + status = osmtest_get_mft_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* More LinkRecord tests */ + /* FromLID */ + memset(&context, 0, sizeof(context)); + status = + osmtest_get_link_rec_by_lid(p_osmt, test_lid, 0, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* ToLID */ + memset(&context, 0, sizeof(context)); + status = + osmtest_get_link_rec_by_lid(p_osmt, 0, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + + /* Another NodeRecord test */ + memset(&context, 0, sizeof(context)); + status = + osmtest_get_node_rec_by_lid(p_osmt, test_lid, &context); + if (status != IB_SUCCESS) + goto Exit; + } + + /* PathRecords */ + if (!p_osmt->opt.ignore_path_records) { + status = osmtest_validate_all_path_recs(p_osmt); + if (status != IB_SUCCESS) + goto Exit; + + if (lmc != 0) { + memset(&context, 0, sizeof(context)); + status = + osmtest_get_path_rec_by_lid_pair(p_osmt, test_lid, + test_lid, + &context); + if (status != IB_SUCCESS) + goto Exit; + + memset(&context, 0, sizeof(context)); + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + EXPECTING_ERRORS_START "\n"); + status = + osmtest_get_path_rec_by_lid_pair(p_osmt, 0xffff, + 0xffff, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Got error %s\n", + ib_get_err_str(status)); + } + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + EXPECTING_ERRORS_START "\n"); + + status = + osmtest_get_path_rec_by_lid_pair(p_osmt, test_lid, + 0xffff, &context); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "Got error %s\n", + ib_get_err_str(status)); + } + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + EXPECTING_ERRORS_END "\n"); + + if (status == IB_SUCCESS) { + status = IB_ERROR; + goto Exit; + } + } + } +#endif + + status = osmtest_validate_single_port_recs(p_osmt); + if (status != IB_SUCCESS) + goto Exit; + + if (!p_osmt->opt.ignore_path_records) { + status = osmtest_validate_single_path_recs(p_osmt); + if (status != IB_SUCCESS) + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static const osmtest_token_t *str_get_token(IN char *const p_str) +{ + const osmtest_token_t *p_tok; + uint32_t index = 0; + + p_tok = &token_array[index]; + + while (p_tok->val != OSMTEST_TOKEN_UNKNOWN) { + if (strncasecmp(p_str, p_tok->str, p_tok->str_size) == 0) + return (p_tok); + + p_tok = &token_array[++index]; + } + + return (NULL); +} + +/********************************************************************** + Returns true if not whitespace character encountered before EOL. +**********************************************************************/ +static boolean_t +str_skip_white(IN char line[], IN OUT uint32_t * const p_offset) +{ + while (((line[*p_offset] == '\t') || + (line[*p_offset] == ' ')) && + (line[*p_offset] != '\n') && (line[*p_offset] != '\0')) { + ++*p_offset; + } + + if ((line[*p_offset] == '\n') || (line[*p_offset] == '\0')) + return (FALSE); + else + return (TRUE); +} + +/********************************************************************** + Returns true if not whitespace character encountered before EOL. +**********************************************************************/ +static void str_skip_token(IN char line[], IN OUT uint32_t * const p_offset) +{ + while ((line[*p_offset] != '\t') && + (line[*p_offset] != ' ') && (line[*p_offset] != '\0')) { + ++*p_offset; + } +} + +static ib_api_status_t +osmtest_parse_node(IN osmtest_t * const p_osmt, + IN FILE * const fh, IN OUT uint32_t * const p_line_num) +{ + ib_api_status_t status = IB_SUCCESS; + uint32_t offset; + char line[OSMTEST_MAX_LINE_LEN]; + boolean_t done = FALSE; + node_t *p_node; + node_t *p_guid_node; + const osmtest_token_t *p_tok; + + OSM_LOG_ENTER(&p_osmt->log); + + p_node = node_new(); + CL_ASSERT(p_node != NULL); + + /* + * Parse the inventory file and create the database. + */ + while (!done) { + if (fgets(line, OSMTEST_MAX_LINE_LEN, fh) == NULL) { + /* + * End of file in the middle of a definition. + */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0119: " + "Unexpected end of file\n"); + status = IB_ERROR; + node_delete(p_node); + goto Exit; + } + + ++*p_line_num; + + /* + * Skip whitespace + */ + offset = 0; + if (!str_skip_white(line, &offset)) + continue; /* whole line was whitespace */ + + p_tok = str_get_token(&line[offset]); + if (p_tok == NULL) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0120: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + continue; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Found '%s' (line %u)\n", p_tok->str, *p_line_num); + + str_skip_token(line, &offset); + + switch (p_tok->val) { + case OSMTEST_TOKEN_COMMENT: + break; + + case OSMTEST_TOKEN_LID: + p_node->comp.lid = 0xFFFF; + p_node->rec.lid = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "lid = 0x%X\n", + cl_ntoh16(p_node->rec.lid)); + break; + + case OSMTEST_TOKEN_BASE_VERSION: + p_node->comp.node_info.base_version = 0xFF; + p_node->rec.node_info.base_version = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "base_version = 0x%X\n", + p_node->rec.node_info.base_version); + break; + + case OSMTEST_TOKEN_CLASS_VERSION: + p_node->comp.node_info.class_version = 0xFF; + p_node->rec.node_info.class_version = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "class_version = 0x%X\n", + p_node->rec.node_info.class_version); + break; + + case OSMTEST_TOKEN_NODE_TYPE: + p_node->comp.node_info.node_type = 0xFF; + p_node->rec.node_info.node_type = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "node_type = 0x%X\n", + p_node->rec.node_info.node_type); + break; + + case OSMTEST_TOKEN_NUM_PORTS: + p_node->comp.node_info.num_ports = 0xFF; + p_node->rec.node_info.num_ports = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "num_ports = 0x%X\n", + p_node->rec.node_info.num_ports); + break; + + case OSMTEST_TOKEN_SYS_GUID: + p_node->comp.node_info.sys_guid = 0xFFFFFFFFFFFFFFFFULL; + p_node->rec.node_info.sys_guid = + cl_hton64(strtoull(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "sys_guid = 0x%016" PRIx64 "\n", + cl_ntoh64(p_node->rec.node_info.sys_guid)); + break; + + case OSMTEST_TOKEN_NODE_GUID: + p_node->comp.node_info.node_guid = + 0xFFFFFFFFFFFFFFFFULL; + p_node->rec.node_info.node_guid = + cl_hton64(strtoull(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "node_guid = 0x%016" PRIx64 "\n", + cl_ntoh64(p_node->rec.node_info.node_guid)); + break; + + case OSMTEST_TOKEN_PORT_GUID: + p_node->comp.node_info.port_guid = + 0xFFFFFFFFFFFFFFFFULL; + p_node->rec.node_info.port_guid = + cl_hton64(strtoull(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "port_guid = 0x%016" PRIx64 "\n", + cl_ntoh64(p_node->rec.node_info.port_guid)); + break; + + case OSMTEST_TOKEN_PARTITION_CAP: + p_node->comp.node_info.partition_cap = 0xFFFF; + p_node->rec.node_info.partition_cap = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "partition_cap = 0x%X\n", + cl_ntoh16(p_node->rec.node_info.partition_cap)); + break; + + case OSMTEST_TOKEN_DEVICE_ID: + p_node->comp.node_info.device_id = 0xFFFF; + p_node->rec.node_info.device_id = cl_hton16((uint16_t) + strtoul + (&line + [offset], + NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "device_id = 0x%X\n", + cl_ntoh16(p_node->rec.node_info.device_id)); + break; + + case OSMTEST_TOKEN_REVISION: + p_node->comp.node_info.revision = 0xFFFFFFFF; + p_node->rec.node_info.revision = + cl_hton32(strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "revision = 0x%X\n", + cl_ntoh32(p_node->rec.node_info.revision)); + break; + + case OSMTEST_TOKEN_PORT_NUM: + p_node->comp.node_info.port_num_vendor_id |= + IB_NODE_INFO_PORT_NUM_MASK; + p_node->rec.node_info.port_num_vendor_id |= + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "local_port_num = 0x%X\n", + ib_node_info_get_local_port_num + (&p_node->rec.node_info)); + break; + + case OSMTEST_TOKEN_VENDOR_ID: + p_node->comp.node_info.port_num_vendor_id |= + IB_NODE_INFO_VEND_ID_MASK; + p_node->rec.node_info.port_num_vendor_id |= + cl_hton32(strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "vendor_id = 0x%X\n", + cl_ntoh32(ib_node_info_get_vendor_id + (&p_node->rec.node_info))); + break; + + case OSMTEST_TOKEN_END: + done = TRUE; + break; + + default: + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0121: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + + break; + } + } + + /* + * Make sure the user specified enough information, then + * add this object to the database. + */ + if (p_node->comp.lid == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0122: " + "LID must be specified for defined nodes\n"); + node_delete(p_node); + goto Exit; + } + + cl_qmap_insert(&p_osmt->exp_subn.node_lid_tbl, + p_node->rec.lid, &p_node->map_item); + + p_guid_node = node_new(); + CL_ASSERT(p_node != NULL); + + *p_guid_node = *p_node; + + cl_qmap_insert(&p_osmt->exp_subn.node_guid_tbl, + p_guid_node->rec.node_info.node_guid, + &p_guid_node->map_item); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_parse_port(IN osmtest_t * const p_osmt, + IN FILE * const fh, IN OUT uint32_t * const p_line_num) +{ + ib_api_status_t status = IB_SUCCESS; + uint32_t offset; + char line[OSMTEST_MAX_LINE_LEN]; + boolean_t done = FALSE; + port_t *p_port; + const osmtest_token_t *p_tok; + + OSM_LOG_ENTER(&p_osmt->log); + + p_port = port_new(); + CL_ASSERT(p_port != NULL); + + /* + * Parse the inventory file and create the database. + */ + while (!done) { + if (fgets(line, OSMTEST_MAX_LINE_LEN, fh) == NULL) { + /* + * End of file in the middle of a definition. + */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0123: " + "Unexpected end of file\n"); + status = IB_ERROR; + port_delete(p_port); + goto Exit; + } + + ++*p_line_num; + + /* + * Skip whitespace + */ + offset = 0; + if (!str_skip_white(line, &offset)) + continue; /* whole line was whitespace */ + + p_tok = str_get_token(&line[offset]); + if (p_tok == NULL) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0124: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + continue; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Found '%s' (line %u)\n", p_tok->str, *p_line_num); + + str_skip_token(line, &offset); + + switch (p_tok->val) { + case OSMTEST_TOKEN_COMMENT: + break; + + case OSMTEST_TOKEN_LID: + p_port->comp.lid = 0xFFFF; + p_port->rec.lid = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "lid = 0x%X\n", + cl_ntoh16(p_port->rec.lid)); + break; + + case OSMTEST_TOKEN_PORT_NUM: + p_port->comp.port_num = 0xFF; + p_port->rec.port_num = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "port_num = 0x%u\n", p_port->rec.port_num); + break; + + case OSMTEST_TOKEN_MKEY: + p_port->comp.port_info.m_key = 0xFFFFFFFFFFFFFFFFULL; + p_port->rec.port_info.m_key = + cl_hton64(strtoull(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "m_key = 0x%016" PRIx64 "\n", + cl_ntoh64(p_port->rec.port_info.m_key)); + break; + + case OSMTEST_TOKEN_SUBN_PREF: + p_port->comp.port_info.subnet_prefix = + 0xFFFFFFFFFFFFFFFFULL; + p_port->rec.port_info.subnet_prefix = + cl_hton64(strtoull(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "subnet_prefix = 0x%016" PRIx64 "\n", + cl_ntoh64(p_port->rec.port_info.subnet_prefix)); + break; + + case OSMTEST_TOKEN_BASE_LID: + p_port->comp.port_info.base_lid = 0xFFFF; + p_port->rec.port_info.base_lid = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "base_lid = 0x%X\n", + cl_ntoh16(p_port->rec.port_info.base_lid)); + break; + + case OSMTEST_TOKEN_SM_BASE_LID: + p_port->comp.port_info.master_sm_base_lid = 0xFFFF; + p_port->rec.port_info.master_sm_base_lid = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "master_sm_base_lid = 0x%X\n", + cl_ntoh16(p_port->rec.port_info.master_sm_base_lid)); + break; + + case OSMTEST_TOKEN_CAP_MASK: + p_port->comp.port_info.capability_mask = 0xFFFFFFFF; + p_port->rec.port_info.capability_mask = + cl_hton32((uint32_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "capability_mask = 0x%X\n", + cl_ntoh32(p_port->rec.port_info.capability_mask)); + break; + + case OSMTEST_TOKEN_DIAG_CODE: + p_port->comp.port_info.diag_code = 0xFFFF; + p_port->rec.port_info.diag_code = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "diag_code = 0x%X\n", + cl_ntoh16(p_port->rec.port_info.diag_code)); + break; + + case OSMTEST_TOKEN_MKEY_LEASE_PER: + p_port->comp.port_info.m_key_lease_period = 0xFFFF; + p_port->rec.port_info.m_key_lease_period = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "m_key_lease_period = 0x%X\n", + cl_ntoh16(p_port->rec.port_info.m_key_lease_period)); + break; + + case OSMTEST_TOKEN_LOC_PORT_NUM: + p_port->comp.port_info.local_port_num = 0xFF; + p_port->rec.port_info.local_port_num = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "local_port_num = 0x%u\n", + p_port->rec.port_info.local_port_num); + break; + + case OSMTEST_TOKEN_LINK_WID_EN: + p_port->comp.port_info.link_width_enabled = 0xFF; + p_port->rec.port_info.link_width_enabled = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "link_width_enabled = 0x%u\n", + p_port->rec.port_info.link_width_enabled); + break; + + case OSMTEST_TOKEN_LINK_WID_SUP: + p_port->comp.port_info.link_width_supported = 0xFF; + p_port->rec.port_info.link_width_supported = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "link_width_supported = 0x%u\n", + p_port->rec.port_info.link_width_supported); + break; + + case OSMTEST_TOKEN_LINK_WID_ACT: + p_port->comp.port_info.link_width_active = 0xFF; + p_port->rec.port_info.link_width_active = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "link_width_active = 0x%u\n", + p_port->rec.port_info.link_width_active); + break; + + case OSMTEST_TOKEN_LINK_SPEED_SUP: + p_port->comp.port_info.state_info1 = 0xFF; + ib_port_info_set_link_speed_sup((uint8_t) + strtoul(&line[offset], + NULL, 0), + &p_port->rec.port_info); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "link_speed_supported = 0x%u\n", + ib_port_info_get_link_speed_sup(&p_port->rec.port_info)); + break; + + case OSMTEST_TOKEN_PORT_STATE: + str_skip_white(line, &offset); + p_port->comp.port_info.state_info1 = 0xFF; + ib_port_info_set_port_state(&p_port->rec.port_info, + ib_get_port_state_from_str + (&line[offset])); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "port_state = 0x%u\n", + ib_port_info_get_port_state(&p_port->rec.port_info)); + break; + + case OSMTEST_TOKEN_STATE_INFO2: + p_port->comp.port_info.state_info2 = 0xFF; + p_port->rec.port_info.state_info2 = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "state_info2 = 0x%u\n", + p_port->rec.port_info.state_info2); + break; + + case OSMTEST_TOKEN_MKEY_PROT_BITS: + p_port->comp.port_info.mkey_lmc = 0xFF; + ib_port_info_set_mpb(&p_port->rec.port_info, + (uint8_t) strtoul(&line[offset], + NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "mpb = 0x%u\n", + ib_port_info_get_mpb(&p_port->rec.port_info)); + break; + + case OSMTEST_TOKEN_LMC: + p_port->comp.port_info.mkey_lmc = 0xFF; + ib_port_info_set_lmc(&p_port->rec.port_info, + (uint8_t) strtoul(&line[offset], + NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "lmc = 0x%u\n", + ib_port_info_get_lmc(&p_port->rec.port_info)); + break; + + case OSMTEST_TOKEN_LINK_SPEED: + p_port->comp.port_info.link_speed = 0xFF; + p_port->rec.port_info.link_speed = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "link_speed = 0x%u\n", + p_port->rec.port_info.link_speed); + break; + + case OSMTEST_TOKEN_MTU_SMSL: + p_port->comp.port_info.mtu_smsl = 0xFF; + p_port->rec.port_info.mtu_smsl = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "mtu_smsl = 0x%u\n", + p_port->rec.port_info.mtu_smsl); + break; + + case OSMTEST_TOKEN_VL_CAP: + p_port->comp.port_info.vl_cap = 0xFF; + p_port->rec.port_info.vl_cap = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "vl_cap = 0x%u\n", + p_port->rec.port_info.vl_cap); + break; + + case OSMTEST_TOKEN_VL_HIGH_LIMIT: + p_port->comp.port_info.vl_high_limit = 0xFF; + p_port->rec.port_info.vl_high_limit = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "vl_high_limit = 0x%u\n", + p_port->rec.port_info.vl_high_limit); + break; + + case OSMTEST_TOKEN_VL_ARB_HIGH_CAP: + p_port->comp.port_info.vl_arb_high_cap = 0xFF; + p_port->rec.port_info.vl_arb_high_cap = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "vl_arb_high_cap = 0x%u\n", + p_port->rec.port_info.vl_arb_high_cap); + break; + + case OSMTEST_TOKEN_VL_ARB_LOW_CAP: + p_port->comp.port_info.vl_arb_low_cap = 0xFF; + p_port->rec.port_info.vl_arb_low_cap = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "vl_arb_low_cap = 0x%u\n", + p_port->rec.port_info.vl_arb_low_cap); + break; + + case OSMTEST_TOKEN_MTU_CAP: + p_port->comp.port_info.mtu_cap = 0xFF; + p_port->rec.port_info.mtu_cap = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "mtu_cap = 0x%u\n", + p_port->rec.port_info.mtu_cap); + break; + + case OSMTEST_TOKEN_VL_STALL_LIFE: + p_port->comp.port_info.vl_stall_life = 0xFF; + p_port->rec.port_info.vl_stall_life = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "vl_stall_life = 0x%u\n", + p_port->rec.port_info.vl_stall_life); + break; + + case OSMTEST_TOKEN_VL_ENFORCE: + p_port->comp.port_info.vl_enforce = 0xFF; + p_port->rec.port_info.vl_enforce = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "vl_enforce = 0x%u\n", + p_port->rec.port_info.vl_enforce); + break; + + case OSMTEST_TOKEN_MKEY_VIOL: + p_port->comp.port_info.m_key_violations = 0xFFFF; + p_port->rec.port_info.m_key_violations = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "m_key_violations = 0x%X\n", + cl_ntoh16(p_port->rec.port_info.m_key_violations)); + break; + + case OSMTEST_TOKEN_PKEY_VIOL: + p_port->comp.port_info.p_key_violations = 0xFFFF; + p_port->rec.port_info.p_key_violations = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "p_key_violations = 0x%X\n", + cl_ntoh16(p_port->rec.port_info.p_key_violations)); + break; + + case OSMTEST_TOKEN_QKEY_VIOL: + p_port->comp.port_info.q_key_violations = 0xFFFF; + p_port->rec.port_info.q_key_violations = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "q_key_violations = 0x%X\n", + cl_ntoh16(p_port->rec.port_info.q_key_violations)); + break; + + case OSMTEST_TOKEN_GUID_CAP: + p_port->comp.port_info.guid_cap = 0xFF; + p_port->rec.port_info.guid_cap = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "guid_cap = 0x%u\n", + p_port->rec.port_info.guid_cap); + break; + + case OSMTEST_TOKEN_SUBN_TIMEOUT: + p_port->comp.port_info.subnet_timeout = 0x1F; + p_port->rec.port_info.subnet_timeout = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "subnet_timeout = 0x%u\n", + ib_port_info_get_timeout(&p_port->rec.port_info)); + break; + + case OSMTEST_TOKEN_RESP_TIME_VAL: + p_port->comp.port_info.resp_time_value = 0xFF; + p_port->rec.port_info.resp_time_value = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "resp_time_value = 0x%u\n", + p_port->rec.port_info.resp_time_value); + break; + + case OSMTEST_TOKEN_ERR_THRESHOLD: + p_port->comp.port_info.error_threshold = 0xFF; + p_port->rec.port_info.error_threshold = + (uint8_t) strtoul(&line[offset], NULL, 0); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "error_threshold = 0x%u\n", + p_port->rec.port_info.error_threshold); + break; + + case OSMTEST_TOKEN_END: + done = TRUE; + break; + + default: + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0125: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + break; + } + } + + /* + * Make sure the user specified enough information, then + * add this object to the database. + */ + if (p_port->comp.lid == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0126: " + "LID must be specified for defined ports\n"); + port_delete(p_port); + status = IB_ERROR; + goto Exit; + } + + cl_qmap_insert(&p_osmt->exp_subn.port_key_tbl, + port_gen_id(p_port->rec.lid, p_port->rec.port_num), + &p_port->map_item); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_parse_path(IN osmtest_t * const p_osmt, + IN FILE * const fh, IN OUT uint32_t * const p_line_num) +{ + ib_api_status_t status = IB_SUCCESS; + uint32_t offset; + char line[OSMTEST_MAX_LINE_LEN]; + boolean_t done = FALSE; + path_t *p_path; + const osmtest_token_t *p_tok; + boolean_t got_error = FALSE; + + OSM_LOG_ENTER(&p_osmt->log); + + p_path = path_new(); + CL_ASSERT(p_path != NULL); + + /* + * Parse the inventory file and create the database. + */ + while (!done) { + if (fgets(line, OSMTEST_MAX_LINE_LEN, fh) == NULL) { + /* + * End of file in the middle of a definition. + */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0127: " + "Unexpected end of file\n"); + status = IB_ERROR; + path_delete(p_path); + goto Exit; + } + + ++*p_line_num; + + /* + * Skip whitespace + */ + offset = 0; + if (!str_skip_white(line, &offset)) + continue; /* whole line was whitespace */ + + p_tok = str_get_token(&line[offset]); + if (p_tok == NULL) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0128: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + got_error = TRUE; + continue; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Found '%s' (line %u)\n", p_tok->str, *p_line_num); + + str_skip_token(line, &offset); + + switch (p_tok->val) { + case OSMTEST_TOKEN_COMMENT: + break; + + case OSMTEST_TOKEN_DGID: + p_path->comp.dgid.unicast.prefix = + 0xFFFFFFFFFFFFFFFFULL; + p_path->comp.dgid.unicast.interface_id = + 0xFFFFFFFFFFFFFFFFULL; + + str_skip_white(line, &offset); + p_path->rec.dgid.unicast.prefix = + cl_hton64(strtoull(&line[offset], NULL, 0)); + str_skip_token(line, &offset); + p_path->rec.dgid.unicast.interface_id = + cl_hton64(strtoull(&line[offset], NULL, 0)); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "dgid = 0x%016" PRIx64 " 0x%016" PRIx64 "\n", + cl_ntoh64(p_path->rec.dgid.unicast.prefix), + cl_ntoh64(p_path->rec.dgid.unicast.interface_id)); + break; + + case OSMTEST_TOKEN_SGID: + p_path->comp.sgid.unicast.prefix = + 0xFFFFFFFFFFFFFFFFULL; + p_path->comp.sgid.unicast.interface_id = + 0xFFFFFFFFFFFFFFFFULL; + + str_skip_white(line, &offset); + p_path->rec.sgid.unicast.prefix = + cl_hton64(strtoull(&line[offset], NULL, 0)); + str_skip_token(line, &offset); + p_path->rec.sgid.unicast.interface_id = + cl_hton64(strtoull(&line[offset], NULL, 0)); + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "sgid = 0x%016" PRIx64 " 0x%016" PRIx64 "\n", + cl_ntoh64(p_path->rec.sgid.unicast.prefix), + cl_ntoh64(p_path->rec.sgid.unicast.interface_id)); + break; + + case OSMTEST_TOKEN_DLID: + p_path->comp.dlid = 0xFFFF; + p_path->rec.dlid = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "dlid = 0x%X\n", + cl_ntoh16(p_path->rec.dlid)); + break; + + case OSMTEST_TOKEN_SLID: + p_path->comp.slid = 0xFFFF; + p_path->rec.slid = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "slid = 0x%X\n", + cl_ntoh16(p_path->rec.slid)); + break; + + case OSMTEST_TOKEN_PKEY: + p_path->comp.pkey = 0xFFFF; + p_path->rec.pkey = + cl_hton16((uint16_t) + strtoul(&line[offset], NULL, 0)); + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, "pkey = 0x%X\n", + cl_ntoh16(p_path->rec.pkey)); + break; + + case OSMTEST_TOKEN_END: + done = TRUE; + break; + + default: + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0129: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + got_error = TRUE; + break; + } + } + + if (got_error) { + status = IB_ERROR; + path_delete(p_path); + goto Exit; + } + /* + * Make sure the user specified enough information, then + * add this object to the database. + */ + if (osmtest_path_rec_kay_is_valid(p_osmt, p_path) == FALSE) { + path_delete(p_path); + status = IB_ERROR; + goto Exit; + } + + cl_qmap_insert(&p_osmt->exp_subn.path_tbl, + osmtest_path_rec_key_get(&p_path->rec), + &p_path->map_item); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t +osmtest_parse_link(IN osmtest_t * const p_osmt, + IN FILE * const fh, IN OUT uint32_t * const p_line_num) +{ + ib_api_status_t status = IB_SUCCESS; + uint32_t offset; + char line[OSMTEST_MAX_LINE_LEN]; + boolean_t done = FALSE; + const osmtest_token_t *p_tok; + boolean_t got_error = FALSE; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * Parse the inventory file and create the database. + */ + while (!done) { + if (fgets(line, OSMTEST_MAX_LINE_LEN, fh) == NULL) { + /* + * End of file in the middle of a definition. + */ + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 012A: " + "Unexpected end of file\n"); + status = IB_ERROR; + goto Exit; + } + + ++*p_line_num; + + /* + * Skip whitespace + */ + offset = 0; + if (!str_skip_white(line, &offset)) + continue; /* whole line was whitespace */ + + p_tok = str_get_token(&line[offset]); + if (p_tok == NULL) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 012B: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + got_error = TRUE; + continue; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Found '%s' (line %u)\n", p_tok->str, *p_line_num); + + str_skip_token(line, &offset); + + switch (p_tok->val) { + case OSMTEST_TOKEN_FROMLID: + case OSMTEST_TOKEN_FROMPORTNUM: + case OSMTEST_TOKEN_TOPORTNUM: + case OSMTEST_TOKEN_TOLID: + /* For now */ + break; + + case OSMTEST_TOKEN_END: + done = TRUE; + break; + + default: + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 012C: " + "Ignoring line %u with unknown token: %s\n", + *p_line_num, &line[offset]); + got_error = TRUE; + break; + } + } + + if (got_error) + status = IB_ERROR; + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +static ib_api_status_t osmtest_create_db(IN osmtest_t * const p_osmt) +{ + FILE *fh; + ib_api_status_t status = IB_SUCCESS; + uint32_t offset; + char line[OSMTEST_MAX_LINE_LEN]; + uint32_t line_num = 0; + const osmtest_token_t *p_tok; + boolean_t got_error = FALSE; + + OSM_LOG_ENTER(&p_osmt->log); + + fh = fopen(p_osmt->opt.file_name, "r"); + if (fh == NULL) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0130: " + "Unable to open inventory file (%s)\n", + p_osmt->opt.file_name); + status = IB_ERROR; + goto Exit; + } + + /* + * Parse the inventory file and create the database. + */ + while (fgets(line, OSMTEST_MAX_LINE_LEN, fh) != NULL) { + line_num++; + + /* + * Skip whitespace + */ + offset = 0; + if (!str_skip_white(line, &offset)) + continue; /* whole line was whitespace */ + + p_tok = str_get_token(&line[offset]); + if (p_tok == NULL) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0131: " + "Ignoring line %u: %s\n", line_num, + &line[offset]); + got_error = TRUE; + continue; + } + + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Found '%s' (line %u)\n", p_tok->str, line_num); + + switch (p_tok->val) { + case OSMTEST_TOKEN_COMMENT: + break; + + case OSMTEST_TOKEN_DEFINE_NODE: + status = osmtest_parse_node(p_osmt, fh, &line_num); + break; + + case OSMTEST_TOKEN_DEFINE_PORT: + status = osmtest_parse_port(p_osmt, fh, &line_num); + break; + + case OSMTEST_TOKEN_DEFINE_PATH: + status = osmtest_parse_path(p_osmt, fh, &line_num); + break; + + case OSMTEST_TOKEN_DEFINE_LINK: + status = osmtest_parse_link(p_osmt, fh, &line_num); + break; + + default: + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0132: " + "Ignoring line %u: %s\n", line_num, + &line[offset]); + got_error = TRUE; + break; + } + + if (got_error) + status = IB_ERROR; + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0133: " + "Bad status received during parsing (%s)\n", + ib_get_err_str(status)); + fclose(fh); + goto Exit; + } + } + + fclose(fh); + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +/********************************************************************** + Returns the index in the local port attribute array for the + user's selection. +**********************************************************************/ +static uint32_t +osmtest_get_user_port(IN osmtest_t * const p_osmt, + IN const ib_port_attr_t p_attr_array[], + IN uint32_t const num_ports) +{ + uint32_t i, choice = 0; + + OSM_LOG_ENTER(&p_osmt->log); + + /* + * User needs prompting for the local port GUID with which + * to bind. + */ + + while (1) { + printf("\nChoose a local port number with which to bind:\n\n"); + for (i = 0; i < num_ports; i++) { + /* + * Print the index + 1 since by convention, port numbers + * start with 1 on host channel adapters. + */ + + printf("\t%u: GUID = 0x%8" PRIx64 + ", lid = 0x%04X, state = %s\n", i + 1, + cl_ntoh64(p_attr_array[i].port_guid), + p_attr_array[i].lid, + ib_get_port_state_str(p_attr_array[i]. + link_state)); + } + + printf("\nEnter choice (1-%u): ", i); + fflush(stdout); + if (scanf("%u", &choice) <= 0) { + char junk[256]; + if (scanf("%255s", junk) <= 0) + printf("\nError: Cannot scan!\n"); + } else if (choice && choice <= num_ports) + break; + printf("\nError: Lame choice!\n"); + } + printf("\n"); + OSM_LOG_EXIT(&p_osmt->log); + return (choice - 1); +} + +ib_api_status_t +osmtest_bind(IN osmtest_t * p_osmt, + IN uint16_t max_lid, IN ib_net64_t guid OPTIONAL) +{ + uint32_t port_index; + ib_api_status_t status; + uint32_t num_ports = MAX_LOCAL_IBPORTS; + ib_port_attr_t attr_array[MAX_LOCAL_IBPORTS] = { {0} }; + ib_gid_t gid[MAX_LOCAL_IBPORTS]; + int i; + + OSM_LOG_ENTER(&p_osmt->log); + + for (i = 0; i < MAX_LOCAL_IBPORTS; i++) { + attr_array[i].num_gids = 1; + attr_array[i].p_gid_table = &gid[i]; + } + + /* + * Call the transport layer for a list of local port + * GUID values. + */ + status = osm_vendor_get_all_port_attr(p_osmt->p_vendor, + attr_array, &num_ports); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0134: " + "Failure getting local port attributes (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + if (guid == 0) { + /* + * User needs prompting for the local port GUID with which + * to bind. + */ + port_index = + osmtest_get_user_port(p_osmt, attr_array, num_ports); + + if (num_ports == 0) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0135: " + "No local ports. Unable to proceed\n"); + goto Exit; + } + guid = attr_array[port_index].port_guid; + } else { + for (port_index = 0; port_index < num_ports; port_index++) { + if (attr_array[port_index].port_guid == guid) + break; + } + + if (port_index == num_ports) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0136: " + "No local port with guid 0x%016" PRIx64 "\n", + cl_ntoh64(guid)); + status = IB_NOT_FOUND; + goto Exit; + } + } + + /* + * Copy the port info for the selected port. + */ + memcpy(&p_osmt->local_port, &attr_array[port_index], + sizeof(p_osmt->local_port) - sizeof(p_osmt->local_port.p_gid_table)); + if (p_osmt->local_port.num_gids) { + p_osmt->local_port_gid.unicast.prefix = p_osmt->local_port.p_gid_table[0].unicast.prefix; + p_osmt->local_port_gid.unicast.interface_id = p_osmt->local_port.p_gid_table[0].unicast.interface_id; + } else { + p_osmt->local_port_gid.unicast.prefix = IB_DEFAULT_SUBNET_PREFIX_HO; + p_osmt->local_port_gid.unicast.interface_id = attr_array[port_index].port_guid; + p_osmt->local_port.p_gid_table = NULL; + } + p_osmt->local_port.num_gids = 1; + p_osmt->local_port.p_gid_table = &p_osmt->local_port_gid; + + /* bind to the SA */ + OSM_LOG(&p_osmt->log, OSM_LOG_DEBUG, + "Using port with SM LID:0x%04X\n", p_osmt->local_port.sm_lid); + p_osmt->max_lid = max_lid; + + p_osmt->h_bind = + osmv_bind_sa(p_osmt->p_vendor, &p_osmt->mad_pool, guid); + + if (p_osmt->h_bind == OSM_BIND_INVALID_HANDLE) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0137: " + "Unable to bind to SA\n"); + status = IB_ERROR; + goto Exit; + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} + +ib_api_status_t osmtest_run(IN osmtest_t * const p_osmt) +{ + ib_api_status_t status = IB_SUCCESS; + + OSM_LOG_ENTER(&p_osmt->log); + if (p_osmt->opt.flow != OSMT_FLOW_CREATE_INVENTORY && + p_osmt->opt.with_grh) { + /* Get SM GID for GRH DGID */ + status = osmtest_get_sm_gid(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0153: calculating SM GID failed: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + + status = osmtest_validate_sa_class_port_info(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0138: " + "Could not obtain SA ClassPortInfo (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + if (p_osmt->opt.flow == OSMT_FLOW_CREATE_INVENTORY) { + /* + * Creating an inventory file with all nodes, ports and paths + */ + status = osmtest_create_inventory_file(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 0139: " + "Inventory file create failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } else { + if (p_osmt->opt.flow == OSMT_FLOW_STRESS_SA) { + /* + * Stress SA - flood the SA with queries + */ + switch (p_osmt->opt.stress) { + case 0: + case 1: /* small response SA query stress */ + status = osmtest_stress_small_rmpp(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0140: " + "Small RMPP stress test failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + break; + case 2: /* large response SA query stress */ + status = osmtest_stress_large_rmpp(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0141: " + "Large RMPP stress test failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + break; + case 3: /* large response Path Record SA query stress */ + status = osmtest_create_db(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0142: " + "Database creation failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + status = osmtest_stress_large_rmpp_pr(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0143: " + "Large RMPP stress test failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + break; + case 4: /* SA Get PR to SA LID */ + status = osmtest_stress_get_pr(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 014B: " + "SA Get PR stress test failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + break; + default: + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0144: " + "Unknown stress test value %u\n", + p_osmt->opt.stress); + break; + } + } else { + + /* + * Run normal validation tests. + */ + if (p_osmt->opt.flow == OSMT_FLOW_ALL || + p_osmt->opt.flow == OSMT_FLOW_VALIDATE_INVENTORY) { + /* + * Only validate the given inventory file + */ + status = osmtest_create_db(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0145: " + "Database creation failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + status = osmtest_validate_against_db(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0146: " + "SA validation database failure (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + + if (p_osmt->opt.flow == OSMT_FLOW_ALL) { + status = osmtest_wrong_sm_key_ignored(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0147: " + "Try wrong SM_Key failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + + if (p_osmt->opt.flow == OSMT_FLOW_ALL || + p_osmt->opt.flow == OSMT_FLOW_SERVICE_REGISTRATION) + { + /* + * run service registration, deregistration, and lease test + */ + status = osmt_run_service_records_flow(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0148: " + "Service Flow failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + + if (p_osmt->opt.flow == OSMT_FLOW_ALL || + p_osmt->opt.flow == OSMT_FLOW_EVENT_FORWARDING) { + /* + * Run event forwarding test + */ +#ifdef OSM_VENDOR_INTF_MTL + status = osmt_run_inform_info_flow(p_osmt); + + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0149: " + "Inform Info Flow failed: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } +#else + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "The event forwarding flow " + "is not implemented yet!\n"); + status = IB_SUCCESS; + goto Exit; +#endif + } + + if (p_osmt->opt.flow == OSMT_FLOW_QOS) { + /* + * QoS info: dump VLArb and SLtoVL tables. + * Since it generates a huge file, we run it only + * if explicitly required to + */ + status = osmtest_create_db(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 014A: " + "Database creation failed (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + + status = + osmt_run_slvl_and_vlarb_records_flow + (p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0150: " + "Failed to get SLtoVL and VL Arbitration Tables (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + + if (p_osmt->opt.flow == OSMT_FLOW_TRAP) { + /* + * Run trap 64/65 flow (this flow requires running of external tool) + */ +#ifdef OSM_VENDOR_INTF_MTL + status = osmt_run_trap64_65_flow(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0151: " + "Trap 64/65 Flow failed: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } +#else + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "Trap 64/65 flow " + "is not implemented yet!\n"); + status = IB_SUCCESS; + goto Exit; +#endif + } + + if (p_osmt->opt.flow == OSMT_FLOW_ALL || + p_osmt->opt.flow == OSMT_FLOW_MULTICAST) { + /* + * Multicast flow + */ + status = osmt_run_mcast_flow(p_osmt); + if (status != IB_SUCCESS) { + OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, + "ERR 0152: " + "Multicast Flow failed: (%s)\n", + ib_get_err_str(status)); + goto Exit; + } + } + + OSM_LOG(&p_osmt->log, OSM_LOG_INFO, + "\n\n***************** ALL TESTS PASS *****************\n\n"); + + } + } + +Exit: + OSM_LOG_EXIT(&p_osmt->log); + return (status); +} diff --git a/scripts/opensm.init.in b/scripts/opensm.init.in new file mode 100644 index 0000000..f1c6c1a --- /dev/null +++ b/scripts/opensm.init.in @@ -0,0 +1,151 @@ +#!/bin/bash +# +# opensm: Manage OpenSM +# +# chkconfig: - 09 91 +# description: Manage OpenSM +# +### BEGIN INIT INFO +# Provides: opensm +# Required-Start: $syslog @RDMA_SERVICE@ +# Required-Stop: $syslog @RDMA_SERVICE@ +# Default-Start: @DEFAULT_START@ +# Default-Stop: @DEFAULT_STOP@ +# Description: Manage OpenSM +### END INIT INFO +# +# Copyright (c) 2008 Voltaire, Inc. All rights reserved. +# Copyright 2006 PathScale, Inc. All Rights Reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +pidfile=/var/run/opensm.pid + +# Source function library. +if [[ -s /etc/init.d/functions ]]; then + # RHEL / CentOS / SL / Fedora. + . /etc/init.d/functions + rc_status() { :; } + rc_exit() { exit $RETVAL; } +elif [[ -s /lib/lsb/init-functions ]]; then + # SLES / openSuSE / Debian. + . /lib/lsb/init-functions + rc_exit() { exit $RETVAL; } + success() { log_success_msg; } + failure() { log_failure_msg; } +elif [[ -s /etc/rc.status ]]; then + # Older SuSE systems. + . /etc/rc.status + failure() { rc_status -v; } + success() { rc_status -v; } +fi + +CONFIG=@sysconfdir@/sysconfig/opensm +if [[ -s $CONFIG ]]; then + . $CONFIG +fi + +running () { + [ -e $pidfile ] && + [ "$(readlink "/proc/$(<$pidfile)/exe")" = "@sbindir@/opensm" ] +} + +start () { + if running; then + echo Already started + return 1 + fi + echo -n "Starting opensm: " + @sbindir@/opensm --daemon --pidfile $pidfile $OPTIONS > /dev/null + RETVAL=$? + if [[ $RETVAL -eq 0 ]]; then + success + else + failure + fi + echo +} + +stop () { + echo -n "Shutting down opensm: " + killproc opensm + RETVAL=$? + if [[ $RETVAL -eq 0 ]]; then + success + else + failure + fi + echo +} + +Xstatus () { + pid="`pidof opensm`" + ret=$? + if [ $ret -eq 0 ] ; then + echo "OpenSM is running... pid=$pid" + else + echo "OpenSM is not running." + fi +} + +restart() { + stop + start +} + +# See how we were called. +case "$1" in + start) + start + ;; + stop) + stop + ;; + status) + Xstatus + ;; + restart | force-reload | reload) + restart + ;; + try-restart | condrestart) + [ -e $pidfile ] && restart + ;; + resweep) + killall -HUP opensm + RETVAL=$? + ;; + rotatelog) + killall -USR1 opensm + RETVAL=$? + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|reload|condrestart|resweep|rotatelog}" + RETVAL=1 + ;; +esac + +_rc_status_all=$RETVAL +rc_exit diff --git a/scripts/opensm.logrotate b/scripts/opensm.logrotate new file mode 100644 index 0000000..e16e227 --- /dev/null +++ b/scripts/opensm.logrotate @@ -0,0 +1,7 @@ +/var/log/opensm.log { + missingok + notifempty + copytruncate + weekly + compress +} diff --git a/scripts/opensm.sysconfig b/scripts/opensm.sysconfig new file mode 100644 index 0000000..2cc02e6 --- /dev/null +++ b/scripts/opensm.sysconfig @@ -0,0 +1,2 @@ +# It will be used for sldd.sh +OSM_HOSTS="" diff --git a/scripts/redhat-opensm.init.in b/scripts/redhat-opensm.init.in new file mode 100755 index 0000000..415ab2f --- /dev/null +++ b/scripts/redhat-opensm.init.in @@ -0,0 +1,297 @@ +#!/bin/bash +# +# Bring up/down opensm +# +# chkconfig: - 15 85 +# description: Activates/Deactivates InfiniBand Subnet Manager +# +### BEGIN INIT INFO +# Provides: opensm +# Required-Start: $syslog @RDMA_SERVICE@ +# Required-Stop: $syslog @RDMA_SERVICE@ +# Default-Start: @DEFAULT_START@ +# Default-Stop: @DEFAULT_STOP@ +# Description: Manage OpenSM +### END INIT INFO +# +# Copyright (c) 2008 Voltaire, Inc. All rights reserved. +# Copyright (c) 2006 Mellanox Technologies. All rights reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. +# +# +# $Id: openib-1.0-opensm.init,v 1.5 2006/08/02 18:18:23 dledford Exp $ +# +# processname: @sbindir@/opensm +# config: @sysconfdir@/sysconfig/opensm +# pidfile: /var/run/opensm.pid + +prefix=@prefix@ +exec_prefix=@exec_prefix@ + +. /etc/rc.d/init.d/functions + +CONFIG=@sysconfdir@/sysconfig/opensm +if [ -f $CONFIG ]; then + . $CONFIG +fi + +prog=@sbindir@/opensm +bin=${prog##*/} + +# Handover daemon for updating guid2lid cache file +sldd_prog=@sbindir@/sldd.sh +sldd_bin=${sldd_prog##*/} +sldd_pid_file=/var/run/sldd.pid + +ACTION=$1 + +# Setting OpenSM start parameters +PID_FILE=/var/run/${bin}.pid +touch $PID_FILE + +if [[ -n "${OSM_HOSTS}" && $(echo -n ${OSM_HOSTS} | wc -w | tr -d '[:space:]') -gt 1 ]]; then + HONORE_GUID2LID="--honor_guid2lid" +fi + +######################################################################### + +start_sldd() +{ + if [ -f $sldd_pid_file ]; then + local line p + read line < $sldd_pid_file + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" + done + fi + + if [ -z "$sldd_pid" ]; then + sldd_pid=`pidof -x $sldd_bin` + fi + + if [ -n "${sldd_pid:-}" ] ; then + kill -9 ${sldd_pid} > /dev/null 2>&1 + fi + + $sldd_prog > /dev/null 2>&1 & + sldd_pid=$! + + echo ${sldd_pid} > $sldd_pid_file + # Sleep is needed in order to update local gid2lid cache file before running opensm + sleep 3 +} + +stop_sldd() +{ + if [ -f $sldd_pid_file ]; then + local line p + read line < $sldd_pid_file + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" + done + fi + + if [ -z "$sldd_pid" ]; then + sldd_pid=`pidof -x $sldd_bin` + fi + + if [ -n "${sldd_pid:-}" ] ; then + kill -15 ${sldd_pid} > /dev/null 2>&1 + fi + +} + +start() +{ + local OSM_PID= + + pid="" + + if [ -f $PID_FILE ]; then + local line p + read line < $PID_FILE + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid="$pid $p" + done + fi + + if [ -z "$pid" ]; then + pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` + fi + + if [ -n "${pid:-}" ] ; then + echo $"${bin} (pid $pid) is already running..." + else + + if [ -n "${HONORE_GUID2LID}" ]; then + # Run sldd daemod + start_sldd + fi + + # Start opensm + echo -n "Starting IB Subnet Manager" + $prog --daemon ${HONORE_GUID2LID} ${OPTIONS} > /dev/null + cnt=0; alive=0 + while [ $cnt -lt 6 -a $alive -ne 1 ]; do + echo -n "."; + sleep 1 + alive=0 + OSM_PID=`pidof $prog` + if [ "$OSM_PID" != "" ]; then + alive=1 + fi + let cnt++; + done + + echo $OSM_PID > $PID_FILE + checkpid $OSM_PID + RC=$? + [ $RC -eq 0 ] && echo_success || echo_failure + [ $RC -eq 0 ] && touch /var/lock/subsys/opensm + echo + + fi +return $RC +} + +stop() +{ + local pid= + local pid1= + local pid2= + + # Stop sldd daemon + stop_sldd + + if [ -f $PID_FILE ]; then + local line p + read line < $PID_FILE + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid1="$pid1 $p" + done + fi + + pid2=`pidof -o $$ -o $PPID -o %PPID -x $bin` + + pid=`echo "$pid1 $pid2" | sed -e 's/\ /\n/g' | sort -n | uniq | sed -e 's/\n/\ /g'` + + if [ -n "${pid:-}" ] ; then + # Kill opensm + echo -n "Stopping IB Subnet Manager." + kill -15 $pid > /dev/null 2>&1 + cnt=0; alive=1 + while [ $cnt -lt 6 -a $alive -ne 0 ]; do + echo -n "."; + alive=0 + for p in $pid; do + if checkpid $p ; then alive=1; echo -n "-"; fi + done + let cnt++; + sleep $alive + done + + for p in $pid + do + while checkpid $p ; do + kill -KILL $p > /dev/null 2>&1 + echo -n "+" + sleep 1 + done + done + checkpid $pid + RC=$? + [ $RC -eq 0 ] && echo_failure || echo_success + echo + RC=$((! $RC)) + else + echo -n "Stopping IB Subnet Manager." + echo_failure + echo + RC=1 + fi + + # Remove pid file if any. + rm -f $PID_FILE + rm -f /var/lock/subsys/opensm + return $RC +} + +status() +{ + local pid + + # First try "pidof" + pid=`pidof -o $$ -o $PPID -o %PPID -x ${bin}` + if [ -n "$pid" ]; then + echo $"${bin} (pid $pid) is running..." + return 0 + fi + + # Next try "/var/run/opensm.pid" files + if [ -f $PID_FILE ] ; then + read pid < $PID_FILE + if [ -n "$pid" ]; then + echo $"${bin} dead but pid file $PID_FILE exists" + return 1 + fi + fi + echo $"${bin} is stopped" + return 3 +} + + + +case $ACTION in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + status) + status + ;; + condrestart) + pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` + if [ -n "$pid" ]; then + stop + sleep 1 + start + fi + ;; + *) + echo + echo "Usage: `basename $0` {start|stop|restart|status}" + echo + exit 1 + ;; +esac + +RC=$? +exit $RC diff --git a/scripts/sldd.sh.in b/scripts/sldd.sh.in new file mode 100755 index 0000000..9b0e282 --- /dev/null +++ b/scripts/sldd.sh.in @@ -0,0 +1,259 @@ +#!/bin/bash +# +# Copyright (c) 2008 Voltaire, Inc. All rights reserved. +# Copyright (c) 2006 Mellanox Technologies. All rights reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. +# +# + +# OpenSM found to have the following problem +# when handover is performed: +# If some of the cluster nodes are rebooted during the handover they loose their LID assignment. +# The reason for it is that the standby SM does not obey its own Guid to LID table +# and simply uses the discovered LIDs. If some nodes are not available for it +# their previous LID assignment is lost forever. + +# The idea is to use an external daemon that will distribute +# the semi-static LID assignment table from the master SM to all standby SMs. +# A standby SM, becoming a master . needs to obey the copied semi static LID assignment table. + +prefix=@prefix@ +exec_prefix=@exec_prefix@ + +CONFIG=@sysconfdir@/sysconfig/opensm +if [ -f $CONFIG ]; then + . $CONFIG +fi + +SLDD_DEBUG=${SLDD_DEBUG:-0} + +CACHE_FILE=${CACHE_FILE:-/var/cache/opensm/guid2lid:/var/cache/opensm/guid2mkey:/var/cache/opensm/neighbors} +declare -a arr_CACHE_FILES +arr_CACHE_FILES=(`echo $CACHE_FILE| sed 's/:/\n/g' | sort | uniq`) + +PING='ping -w 1 -c 1' + +RCP=${RCP:-/usr/bin/scp} +RSH=${RSH:-/usr/bin/ssh} +IFCONFIG=${IFCONFIG:-'/sbin/ifconfig -a'} + +declare -i SLDD_DEBUG +RESCAN_TIME=${RESCAN_TIME:-60} + +if [ -z "${OSM_HOSTS}" ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "No OpenSM servers (OSM_HOSTS) configured for the IB subnet." + exit 0 +fi + + +declare -a arr_OSM_HOSTS +arr_OSM_HOSTS=(${OSM_HOSTS}) + +num_of_osm_hosts=${#arr_OSM_HOSTS[@]} + +if [ ${num_of_osm_hosts} -eq 1 ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "One OpenSM server configured in the IB subnet." && + echo "Nothing to be done for SLDD" + + exit 0 +fi + +trap 'trap_handler' 15 + +trap_handler() +{ + logger -i "SLDD: Exiting." + exit 0 +} + +is_alive() +{ + $PING $1 > /dev/null 2>&1 + return $? +} + +is_local() +{ + $IFCONFIG | grep -w "$1" > /dev/null 2>&1 + return $? +} + +update_remote_cache() +{ + /bin/rm -f "$1.upd" + /bin/cp -a "$1" "$1.upd" + + [ $SLDD_DEBUG -eq 1 ] && + echo "Updating remote cache file" + + for host in ${OSM_HOSTS} + do + # Skip local host update + if [ "${host}" == "${local_host}" ]; then + continue + fi + + if is_alive $host; then + cache_dir=$(dirname "$1") + stat=$($RSH $host "/bin/mkdir -p ${cache_dir} > /dev/null 2>&1; /bin/rm -f "$1.${local_host}" > /dev/null 2>&1; echo \$?" | tr -d '[:space:]') + if [ "X${stat}" == "X0" ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "Updating $host" + logger -i "SLDD: updating $host with $1" + $RCP "$1.upd" "${host}:$1.${local_host}" + /bin/cp "$1.upd" "$1.${host}" + else + [ $SLDD_DEBUG -eq 1 ] && + echo "$RSH to $host failed." + logger -i "SLDD: Failed to update $host with $1. $RSH without password should be enabled" + exit 5 + fi + else + [ $SLDD_DEBUG -eq 1 ] && + echo "$host is down." + continue + fi + done +} + +get_latest_remote_cache() +{ + # Find most updated remote cache file (the suffix should be like ip address: *.*.*.*) + echo -n "$(/bin/ls -1t $1.*.* 2> /dev/null | head -1)" +} + +get_largest_remote_cache() +{ + # Find largest (size) remote cache file (the suffix should be like ip address: *.*.*.*) + echo -n "$(/bin/ls -1S $1.*.* 2> /dev/null | head -1)" +} + +swap_cache_files() +{ + /bin/rm -f "$1.old" + /bin/mv "$1" "$1.old" + /bin/cp "$2" "$1" + touch "$1.tmp" +} + +# Find local host in the osm hosts list +local_host="" +for host in ${OSM_HOSTS} +do + if is_local $host; then + local_host=${host} + fi +done + +# Get cache file info +declare -i new_size=0 +declare -ai arr_last_size +for i in ${!arr_CACHE_FILES[@]} +do + arr_last_size[$i]=0 +done +declare -i largest_remote_cache_size=0 + +for i in ${!arr_CACHE_FILES[@]} +do + cache_file=${arr_CACHE_FILES[$i]} + if [ -e ${cache_file} ]; then + arr_last_size[$i]=$(du -b ${cache_file} | awk '{print$1}' | tr -d '[:space:]') + else + touch ${cache_file} ${cache_file}.tmp + fi + +# if [ ${arr_last_size[$i]} -gt 0 ]; then +# # First time update +# update_remote_cache ${cache_file} +# fi +done + +while true +do + for i in ${!arr_CACHE_FILES[@]} + do + cache_file=${arr_CACHE_FILES[$i]} + if [ -s "${cache_file}" ]; then + new_size=$(du -b ${cache_file} | awk '{print$1}' | tr -d '[:space:]') + # Check if local cache file grew from its last version or the time stamp changed + if [ ${new_size} -gt ${arr_last_size[$i]} ] + [ "$(/bin/ls -1t ${cache_file} ${cache_file}.tmp 2> /dev/null | head -1)" != "${cache_file}.tmp" ]; then + largest_remote_cache=$(get_largest_remote_cache ${cache_file}) + if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then + largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]') + else + largest_remote_cache_size=0 + fi + + # Check if local cache file larger than remote chache file + if [ ${new_size} -gt ${largest_remote_cache_size} ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "Local cache file larger then remote. Update remote cache files" + arr_last_size[$i]=${new_size} + update_remote_cache ${cache_file} + continue + fi + fi + + largest_remote_cache=$(get_largest_remote_cache ${cache_file}) + if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then + largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]') + else + largest_remote_cache_size=0 + fi + + # Update local cache file from remote + if [ ${largest_remote_cache_size} -gt ${new_size} ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "Local cache file shorter then remote. Use ${largest_remote_cache}" + logger -i "SLDD: updating local cache file with ${largest_remote_cache}" + swap_cache_files ${cache_file} ${largest_remote_cache} + arr_last_size[$i]=${largest_remote_cache_size} + fi + + else # The local cache file is empty + [ $SLDD_DEBUG -eq 1 ] && + echo "${cache_file} is empty" + + largest_remote_cache=$(get_largest_remote_cache ${cache_file}) + if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then + # Copy it to the current cache + [ $SLDD_DEBUG -eq 1 ] && + echo "Local cache file is empty. Use ${largest_remote_cache}" + logger -i "SLDD: updating local cache file with ${largest_remote_cache}" + swap_cache_files ${cache_file} ${largest_remote_cache} + fi + + fi + done + + [ $SLDD_DEBUG -eq 1 ] && + echo "Sleeping ${RESCAN_TIME} seconds." + sleep ${RESCAN_TIME} + +done diff --git a/scripts/travis-build b/scripts/travis-build new file mode 100755 index 0000000..e48bc51 --- /dev/null +++ b/scripts/travis-build @@ -0,0 +1,28 @@ +#!/bin/bash + +PATH=/home/`whoami`/.local/bin:$PATH +DIR=$(readlink -f .) +# Stop on error +set -e +# Echo all commands to Travis log +set -x + +./autogen.sh + + +# Clang doesn't like getting pointers from packed struct members, even if aligned. +CC=clang CFLAGS="-Wno-address-of-packed-member" ./configure +make +make clean + +# Run sparse on the subdirectories which are sparse clean +CC=cgcc CFLAGS="-Werror" make > /dev/null 2>&1 | grep -v '^/usr/include' | tee out +make clean +# sparse does not fail gcc on messages +if [ -s out ]; then + false +fi + +CC=gcc-8 CFLAGS="" ./configure +make +make dist diff --git a/scripts/travis-checkpatch b/scripts/travis-checkpatch new file mode 100755 index 0000000..441d69a --- /dev/null +++ b/scripts/travis-checkpatch @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright 2017 Mellanox Technologies Ltd. +# Licensed under BSD (MIT variant) or GPLv2. See COPYING. + + +if [ "x$TRAVIS_EVENT_TYPE" != "xpull_request" ]; then + # Peform checkpatch checks on pull requests only + exit 0 +fi + +# The below "set" is commented, because the checkpatch.pl returns 1 (error) for warnings too. +# And the rdma-core code is not mature enough to be warning safe +# set -e + +if [ "x$TRAVIS_COMMIT_RANGE" != "x" ]; then + wget -q https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/scripts/checkpatch.pl \ + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/scripts/spelling.txt + DIR_FOR_PATCHES_TO_CHECK=$(mktemp -d) + git format-patch --no-cover-letter $TRAVIS_COMMIT_RANGE ^$TRAVIS_BRANCH -o $DIR_FOR_PATCHES_TO_CHECK/ + CHECKPATCH_OPT="--no-tree --ignore PREFER_KERNEL_TYPES,FILE_PATH_CHANGES,EXECUTE_PERMISSIONS,USE_NEGATIVE_ERRNO,CONST_STRUCT $DIR_FOR_PATCHES_TO_CHECK/*" + perl checkpatch.pl $CHECKPATCH_OPT + if [ $? -ne 0 ]; then + # We rerun checkpatch to simplify parsing and to understand if we failed for errors + # For example, the output on some arbitrary patchset of the following line without awk is: + # total: 1 errors, 3 warnings, 42 lines checked + NUMB_ERRRORS=$(perl checkpatch.pl --terse $CHECKPATCH_OPT | awk 'BEGIN {FS = "total:"} ; {sum+=$2} END {print sum}') + exit $NUMB_ERRRORS + fi +fi