|
Packit |
13e616 |
.TH OPENSM 8 "Sept 15, 2014" "OpenIB" "OpenIB Management"
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH NAME
|
|
Packit |
13e616 |
opensm \- InfiniBand subnet manager and administration (SM/SA)
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH SYNOPSIS
|
|
Packit |
13e616 |
.B opensm
|
|
Packit |
13e616 |
[\-\-version]]
|
|
Packit |
13e616 |
[\-F | \-\-config <file_name>]
|
|
Packit |
13e616 |
[\-c(reate-config) <file_name>]
|
|
Packit |
13e616 |
[\-g(uid) <GUID in hex>]
|
|
Packit |
13e616 |
[\-l(mc) <LMC>]
|
|
Packit |
13e616 |
[\-p(riority) <PRIORITY>]
|
|
Packit |
13e616 |
[\-\-subnet_prefix <PREFIX in hex>]
|
|
Packit |
13e616 |
[\-\-smkey <SM_Key>]
|
|
Packit |
13e616 |
[\-\-sm_sl <SL number>]
|
|
Packit |
13e616 |
[\-r(eassign_lids)]
|
|
Packit |
13e616 |
[\-R <engine name(s)> | \-\-routing_engine <engine name(s)>]
|
|
Packit |
13e616 |
[\-\-do_mesh_analysis]
|
|
Packit |
13e616 |
[\-\-lash_start_vl <vl number>]
|
|
Packit |
13e616 |
[\-\-nue_max_num_vls <vl number>]
|
|
Packit |
13e616 |
[\-A | \-\-ucast_cache]
|
|
Packit |
13e616 |
[\-z | \-\-connect_roots]
|
|
Packit |
13e616 |
[\-M <file name> | \-\-lid_matrix_file <file name>]
|
|
Packit |
13e616 |
[\-U <file name> | \-\-lfts_file <file name>]
|
|
Packit |
13e616 |
[\-S | \-\-sadb_file <file name>]
|
|
Packit |
13e616 |
[\-a | \-\-root_guid_file <path to file>]
|
|
Packit |
13e616 |
[\-u | \-\-cn_guid_file <path to file>]
|
|
Packit |
13e616 |
[\-G | \-\-io_guid_file <path to file>]
|
|
Packit |
13e616 |
[\-\-port\-shifting]
|
|
Packit |
13e616 |
[\-\-scatter\-ports <random seed>]
|
|
Packit |
13e616 |
[\-H | \-\-max_reverse_hops <max reverse hops allowed>]
|
|
Packit |
13e616 |
[\-X | \-\-guid_routing_order_file <path to file>]
|
|
Packit |
13e616 |
[\-m | \-\-ids_guid_file <path to file>]
|
|
Packit |
13e616 |
[\-o(nce)]
|
|
Packit |
13e616 |
[\-s(weep) <interval>]
|
|
Packit |
13e616 |
[\-t(imeout) <milliseconds>]
|
|
Packit |
13e616 |
[\-\-retries <number>]
|
|
Packit |
13e616 |
[\-\-maxsmps <number>]
|
|
Packit |
13e616 |
[\-\-console [off | local | socket | loopback]]
|
|
Packit |
13e616 |
[\-\-console-port <port>]
|
|
Packit |
13e616 |
[\-i | \-\-ignore_guids <equalize-ignore-guids-file>]
|
|
Packit |
13e616 |
[\-w | \-\-hop_weights_file <path to file>]
|
|
Packit |
13e616 |
[\-O | \-\-port_search_ordering_file <path to file>]
|
|
Packit |
13e616 |
[\-O | \-\-dimn_ports_file <path to file>] (DEPRECATED)
|
|
Packit |
13e616 |
[\-\-dump_files_dir <directory-name>]
|
|
Packit |
13e616 |
[\-f <log file path> | \-\-log_file <log file path> ]
|
|
Packit |
13e616 |
[\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)]
|
|
Packit |
13e616 |
[\-P(config) <partition config file> ]
|
|
Packit |
13e616 |
[\-N | \-\-no_part_enforce] (DEPRECATED)
|
|
Packit |
13e616 |
[\-Z | \-\-part_enforce [both | in | out | off]]
|
|
Packit |
13e616 |
[\-W | \-\-allow_both_pkeys]
|
|
Packit |
13e616 |
[\-Q | \-\-qos [\-Y | \-\-qos_policy_file <file name>]]
|
|
Packit |
13e616 |
[\-\-congestion\-control]
|
|
Packit |
13e616 |
[\-\-cckey <key>]
|
|
Packit |
13e616 |
[\-y | \-\-stay_on_fatal]
|
|
Packit |
13e616 |
[\-B | \-\-daemon]
|
|
Packit |
13e616 |
[\-J | \-\-pidfile <file_name>]
|
|
Packit |
13e616 |
[\-I | \-\-inactive]
|
|
Packit |
13e616 |
[\-\-perfmgr]
|
|
Packit |
13e616 |
[\-\-perfmgr_sweep_time_s <seconds>]
|
|
Packit |
13e616 |
[\-\-prefix_routes_file <path>]
|
|
Packit |
13e616 |
[\-\-consolidate_ipv6_snm_req]
|
|
Packit |
13e616 |
[\-\-log_prefix <prefix text>]
|
|
Packit |
13e616 |
[\-\-torus_config <path to file>]
|
|
Packit |
13e616 |
[\-v(erbose)] [\-V] [\-D <flags>] [\-d(ebug) <number>]
|
|
Packit |
13e616 |
[\-h(elp)] [\-?]
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH DESCRIPTION
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
opensm is an InfiniBand compliant Subnet Manager and Administration,
|
|
Packit |
13e616 |
and runs on top of OpenIB.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
opensm provides an implementation of an InfiniBand Subnet Manager and
|
|
Packit |
13e616 |
Administration. Such a software entity is required to run for in order
|
|
Packit |
13e616 |
to initialize the InfiniBand hardware (at least one per each
|
|
Packit |
13e616 |
InfiniBand subnet).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
opensm also now contains an experimental version of a performance
|
|
Packit |
13e616 |
manager as well.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB
|
|
Packit |
13e616 |
fabric, initialize it, and sweep occasionally for changes.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
opensm attaches to a specific IB port on the local machine and configures only
|
|
Packit |
13e616 |
the fabric connected to it. (If the local machine has other IB ports,
|
|
Packit |
13e616 |
opensm will ignore the fabrics connected to those other ports). If no port is
|
|
Packit |
13e616 |
specified, it will select the first "best" available port.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
opensm can present the available ports and prompt for a port number to
|
|
Packit |
13e616 |
attach to.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log.
|
|
Packit |
13e616 |
The first file will register only general major events, whereas the second
|
|
Packit |
13e616 |
will include details of reported errors. All errors reported in this second
|
|
Packit |
13e616 |
file should be treated as indicators of IB fabric health issues.
|
|
Packit |
13e616 |
(Note that when a fatal and non-recoverable error occurs, opensm will exit.)
|
|
Packit |
13e616 |
Both log files should include the message "SUBNET UP" if opensm was able to
|
|
Packit |
13e616 |
setup the subnet correctly.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH OPTIONS
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-version\fR
|
|
Packit |
13e616 |
Prints OpenSM version and exits.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-F\fR, \fB\-\-config\fR <config file>
|
|
Packit |
13e616 |
The name of the OpenSM config file. When not specified
|
|
Packit |
13e616 |
\fB\% @OPENSM_CONFIG_DIR@/@OPENSM_CONFIG_FILE@\fP will be used (if exists).
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-c\fR, \fB\-\-create-config\fR <file name>
|
|
Packit |
13e616 |
OpenSM will dump its configuration to the specified file and exit.
|
|
Packit |
13e616 |
This is a way to generate OpenSM configuration file template.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-g\fR, \fB\-\-guid\fR <GUID in hex>
|
|
Packit |
13e616 |
This option specifies the local port GUID value
|
|
Packit |
13e616 |
with which OpenSM should bind. OpenSM may be
|
|
Packit |
13e616 |
bound to 1 port at a time.
|
|
Packit |
13e616 |
If GUID given is 0, OpenSM displays a list
|
|
Packit |
13e616 |
of possible port GUIDs and waits for user input.
|
|
Packit |
13e616 |
Without -g, OpenSM tries to use the default port.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-l\fR, \fB\-\-lmc\fR <LMC value>
|
|
Packit |
13e616 |
This option specifies the subnet's LMC value.
|
|
Packit |
13e616 |
The number of LIDs assigned to each port is 2^LMC.
|
|
Packit |
13e616 |
The LMC value must be in the range 0-7.
|
|
Packit |
13e616 |
LMC values > 0 allow multiple paths between ports.
|
|
Packit |
13e616 |
LMC values > 0 should only be used if the subnet
|
|
Packit |
13e616 |
topology actually provides multiple paths between
|
|
Packit |
13e616 |
ports, i.e. multiple interconnects between switches.
|
|
Packit |
13e616 |
Without -l, OpenSM defaults to LMC = 0, which allows
|
|
Packit |
13e616 |
one path between any two ports.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-p\fR, \fB\-\-priority\fR <Priority value>
|
|
Packit |
13e616 |
This option specifies the SM\'s PRIORITY.
|
|
Packit |
13e616 |
This will effect the handover cases, where master
|
|
Packit |
13e616 |
is chosen by priority and GUID. Range goes from 0
|
|
Packit |
13e616 |
(default and lowest priority) to 15 (highest).
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-subnet_prefix\fR <PREFIX in hex>
|
|
Packit |
13e616 |
This option specifies the subnet prefix to use in
|
|
Packit |
13e616 |
on the fabric. The default prefix is
|
|
Packit |
13e616 |
0xfe80000000000000.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-smkey\fR <SM_Key value>
|
|
Packit |
13e616 |
This option specifies the SM\'s SM_Key (64 bits).
|
|
Packit |
13e616 |
This will effect SM authentication.
|
|
Packit |
13e616 |
Note that OpenSM version 3.2.1 and below used the default value '1'
|
|
Packit |
13e616 |
in a host byte order, it is fixed now but you may need this option to
|
|
Packit |
13e616 |
interoperate with old OpenSM running on a little endian machine.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-sm_sl\fR <SL number>
|
|
Packit |
13e616 |
This option sets the SL to use for communication with the SM/SA.
|
|
Packit |
13e616 |
Defaults to 0.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-r\fR, \fB\-\-reassign_lids\fR
|
|
Packit |
13e616 |
This option causes OpenSM to reassign LIDs to all
|
|
Packit |
13e616 |
end nodes. Specifying -r on a running subnet
|
|
Packit |
13e616 |
may disrupt subnet traffic.
|
|
Packit |
13e616 |
Without -r, OpenSM attempts to preserve existing
|
|
Packit |
13e616 |
LID assignments resolving multiple use of same LID.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-R\fR, \fB\-\-routing_engine\fR <Routing engine names>
|
|
Packit |
13e616 |
This option chooses routing engine(s) to use instead of Min Hop
|
|
Packit |
13e616 |
algorithm (default). Multiple routing engines can be specified
|
|
Packit |
13e616 |
separated by commas so that specific ordering of routing algorithms
|
|
Packit |
13e616 |
will be tried if earlier routing engines fail. If all configured
|
|
Packit |
13e616 |
routing engines fail, OpenSM will always attempt to route with Min Hop
|
|
Packit |
13e616 |
unless 'no_fallback' is included in the list of routing engines.
|
|
Packit |
13e616 |
Supported engines: minhop, updn, dnup, file, ftree, lash, dor, torus-2QoS,
|
|
Packit |
13e616 |
nue, dfsssp, sssp.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-do_mesh_analysis\fR
|
|
Packit |
13e616 |
This option enables additional analysis for the lash routing engine to
|
|
Packit |
13e616 |
precondition switch port assignments in regular cartesian meshes which
|
|
Packit |
13e616 |
may reduce the number of SLs required to give a deadlock free routing.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-lash_start_vl\fR <vl number>
|
|
Packit |
13e616 |
This option sets the starting VL to use for the lash routing algorithm.
|
|
Packit |
13e616 |
Defaults to 0.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-nue_max_num_vls\fR <vl number>
|
|
Packit |
13e616 |
This option sets the maximum number of VLs to use for the Nue routing engine.
|
|
Packit |
13e616 |
Every number greater or equal to 0 is allowed, and the default is 1 to enforce
|
|
Packit |
13e616 |
deadlock-freedom even if QoS is not enabled. If set to 0, then Nue routing will
|
|
Packit |
13e616 |
automatically determine and choose maximum supported by the fabric. And if set
|
|
Packit |
13e616 |
to any integer >= 1, then Nue uses min(max_supported,nue_max_num_vls).
|
|
Packit |
13e616 |
Rule of thumb is: higher nue_max_num_vls results in better path balancing.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-A\fR, \fB\-\-ucast_cache\fR
|
|
Packit |
13e616 |
This option enables unicast routing cache and prevents routing
|
|
Packit |
13e616 |
recalculation (which is a heavy task in a large cluster) when
|
|
Packit |
13e616 |
there was no topology change detected during the heavy sweep, or
|
|
Packit |
13e616 |
when the topology change does not require new routing calculation,
|
|
Packit |
13e616 |
e.g. when one or more CAs/RTRs/leaf switches going down, or one or
|
|
Packit |
13e616 |
more of these nodes coming back after being down.
|
|
Packit |
13e616 |
A very common case that is handled by the unicast routing cache
|
|
Packit |
13e616 |
is host reboot, which otherwise would cause two full routing
|
|
Packit |
13e616 |
recalculations: one when the host goes down, and the other when
|
|
Packit |
13e616 |
the host comes back online.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-z\fR, \fB\-\-connect_roots\fR
|
|
Packit |
13e616 |
This option enforces routing engines (up/down and
|
|
Packit |
13e616 |
fat-tree) to make connectivity between root switches and in
|
|
Packit |
13e616 |
this way to be fully IBA compliant. In many cases this can
|
|
Packit |
13e616 |
violate "pure" deadlock free algorithm, so use it carefully.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-M\fR, \fB\-\-lid_matrix_file\fR <file name>
|
|
Packit |
13e616 |
This option specifies the name of the lid matrix dump file
|
|
Packit |
13e616 |
from where switch lid matrices (min hops tables) will be
|
|
Packit |
13e616 |
loaded.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-U\fR, \fB\-\-lfts_file\fR <file name>
|
|
Packit |
13e616 |
This option specifies the name of the LFTs file
|
|
Packit |
13e616 |
from where switch forwarding tables will be loaded when using "file" routing
|
|
Packit |
13e616 |
engine.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-S\fR, \fB\-\-sadb_file\fR <file name>
|
|
Packit |
13e616 |
This option specifies the name of the SA DB dump file
|
|
Packit |
13e616 |
from where SA database will be loaded.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-a\fR, \fB\-\-root_guid_file\fR <file name>
|
|
Packit |
13e616 |
Set the root nodes for the Up/Down or Fat-Tree routing
|
|
Packit |
13e616 |
algorithm to the guids provided in the given file (one to a line).
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-u\fR, \fB\-\-cn_guid_file\fR <file name>
|
|
Packit |
13e616 |
Set the compute nodes for the Fat-Tree or DFSSSP/SSSP routing algorithms
|
|
Packit |
13e616 |
to the port GUIDs provided in the given file (one to a line).
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-G\fR, \fB\-\-io_guid_file\fR <file name>
|
|
Packit |
13e616 |
Set the I/O nodes for the Fat-Tree or DFSSSP/SSSP routing algorithms
|
|
Packit |
13e616 |
to the port GUIDs provided in the given file (one to a line).
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
In the case of Fat-Tree routing:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
I/O nodes are non-CN nodes allowed to use up to max_reverse_hops switches
|
|
Packit |
13e616 |
the wrong way around to improve connectivity.
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
In the case of (DF)SSSP routing:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
Providing guids of compute and/or I/O nodes will ensure that paths towards
|
|
Packit |
13e616 |
those nodes are as much separated as possible within their node category,
|
|
Packit |
13e616 |
i.e., I/O traffic will not share the same link if multiple links are available.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-port\-shifting\fR
|
|
Packit |
13e616 |
This option enables a feature called \fBport shifting\fR. In some
|
|
Packit |
13e616 |
fabrics, particularly cluster environments, routes commonly align and
|
|
Packit |
13e616 |
congest with other routes due to algorithmically unchanging traffic
|
|
Packit |
13e616 |
patterns. This routing option will "shift" routing around in an
|
|
Packit |
13e616 |
attempt to alleviate this problem.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-scatter\-ports\fR <random seed>
|
|
Packit |
13e616 |
This option is used to randomize port selection in routing rather than
|
|
Packit |
13e616 |
using a round-robin algorithm (which is the default). Value supplied
|
|
Packit |
13e616 |
with option is used as a random seed. If value is 0,
|
|
Packit |
13e616 |
which is the default, the scatter ports option is disabled.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-H\fR, \fB\-\-max_reverse_hops\fR <max reverse hops allowed>
|
|
Packit |
13e616 |
Set the maximum number of reverse hops an I/O node is allowed
|
|
Packit |
13e616 |
to make. A reverse hop is the use of a switch the wrong way around.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-m\fR, \fB\-\-ids_guid_file\fR <file name>
|
|
Packit |
13e616 |
Name of the map file with set of the IDs which will be used
|
|
Packit |
13e616 |
by Up/Down routing algorithm instead of node GUIDs
|
|
Packit |
13e616 |
(format: <guid> <id> per line).
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-X\fR, \fB\-\-guid_routing_order_file\fR <file name>
|
|
Packit |
13e616 |
Set the order port guids will be routed for the MinHop
|
|
Packit |
13e616 |
and Up/Down routing algorithms to the guids provided in the
|
|
Packit |
13e616 |
given file (one to a line).
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-o\fR, \fB\-\-once\fR
|
|
Packit |
13e616 |
This option causes OpenSM to configure the subnet
|
|
Packit |
13e616 |
once, then exit. Ports remain in the ACTIVE state.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-s\fR, \fB\-\-sweep\fR <interval value>
|
|
Packit |
13e616 |
This option specifies the number of seconds between
|
|
Packit |
13e616 |
subnet sweeps. Specifying -s 0 disables sweeping.
|
|
Packit |
13e616 |
Without -s, OpenSM defaults to a sweep interval of
|
|
Packit |
13e616 |
10 seconds.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-t\fR, \fB\-\-timeout\fR <value>
|
|
Packit |
13e616 |
This option specifies the time in milliseconds
|
|
Packit |
13e616 |
used for transaction timeouts.
|
|
Packit |
13e616 |
Timeout values should be > 0.
|
|
Packit |
13e616 |
Without -t, OpenSM defaults to a timeout value of
|
|
Packit |
13e616 |
200 milliseconds.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-retries\fR <number>
|
|
Packit |
13e616 |
This option specifies the number of retries used
|
|
Packit |
13e616 |
for transactions.
|
|
Packit |
13e616 |
Without --retries, OpenSM defaults to 3 retries
|
|
Packit |
13e616 |
for transactions.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-maxsmps\fR <number>
|
|
Packit |
13e616 |
This option specifies the number of VL15 SMP MADs
|
|
Packit |
13e616 |
allowed on the wire at any one time.
|
|
Packit |
13e616 |
Specifying \-\-maxsmps 0 allows unlimited outstanding
|
|
Packit |
13e616 |
SMPs.
|
|
Packit |
13e616 |
Without \-\-maxsmps, OpenSM defaults to a maximum of
|
|
Packit |
13e616 |
4 outstanding SMPs.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-console [off | local | loopback | socket]\fR
|
|
Packit |
13e616 |
This option brings up the OpenSM console (default off). Note, loopback and
|
|
Packit |
13e616 |
socket open a socket which can be connected to WITHOUT CREDENTIALS. Loopback
|
|
Packit |
13e616 |
is safer if access to your SM host is controlled. tcp_wrappers
|
|
Packit |
13e616 |
(hosts.[allow|deny]) is used with loopback and socket. loopback and socket
|
|
Packit |
13e616 |
will only be available if OpenSM was built with --enable-console-loopback
|
|
Packit |
13e616 |
(default yes) and --enable-console-socket (default no) respectively.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-console-port\fR <port>
|
|
Packit |
13e616 |
Specify an alternate telnet port for the socket console (default 10000).
|
|
Packit |
13e616 |
Note that this option only appears if OpenSM was built with
|
|
Packit |
13e616 |
--enable-console-socket.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-i\fR, \fB\-\-ignore_guids\fR <equalize-ignore-guids-file>
|
|
Packit |
13e616 |
This option provides the means to define a set of ports
|
|
Packit |
13e616 |
(by node guid and port number) that will be ignored by the link load
|
|
Packit |
13e616 |
equalization algorithm.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-w\fR, \fB\-\-hop_weights_file\fR <path to file>
|
|
Packit |
13e616 |
This option provides weighting factors per port representing a hop cost in
|
|
Packit |
13e616 |
computing the lid matrix. The file consists of lines containing a switch port
|
|
Packit |
13e616 |
GUID (specified as a 64 bit hex number, with leading 0x), output port number,
|
|
Packit |
13e616 |
and weighting factor. Any port not listed in the file defaults to a weighting
|
|
Packit |
13e616 |
factor of 1. Lines starting with # are comments. Weights affect only the
|
|
Packit |
13e616 |
output route from the port, so many useful configurations will require weights
|
|
Packit |
13e616 |
to be specified in pairs.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-O\fR, \fB\-\-port_search_ordering_file\fR <path to file>
|
|
Packit |
13e616 |
This option tweaks the routing. It suitable for two cases:
|
|
Packit |
13e616 |
1. While using DOR routing algorithm.
|
|
Packit |
13e616 |
This option provides a mapping between hypercube dimensions and ports
|
|
Packit |
13e616 |
on a per switch basis for the DOR routing engine. The file consists
|
|
Packit |
13e616 |
of lines containing a switch node GUID (specified as a 64 bit hex
|
|
Packit |
13e616 |
number, with leading 0x) followed by a list of non-zero port numbers,
|
|
Packit |
13e616 |
separated by spaces, one switch per line. The order for the port
|
|
Packit |
13e616 |
numbers is in one to one correspondence to the dimensions. Ports not
|
|
Packit |
13e616 |
listed on a line are assigned to the remaining dimensions, in port
|
|
Packit |
13e616 |
order. Anything after a # is a comment.
|
|
Packit |
13e616 |
2. While using general routing algorithm.
|
|
Packit |
13e616 |
This option provides the order of the ports that would be chosen for routing,
|
|
Packit |
13e616 |
from each switch rather than searching for an appropriate port from port 1 to N.
|
|
Packit |
13e616 |
The file consists of lines containing a switch node GUID (specified as a 64 bit
|
|
Packit |
13e616 |
hex number, with leading 0x) followed by a list of non-zero port numbers,
|
|
Packit |
13e616 |
separated by spaces, one switch per line. In case of DOR, the order for the
|
|
Packit |
13e616 |
port numbers is in one to one correspondence to the dimensions. Ports not
|
|
Packit |
13e616 |
listed on a line are assigned to the remaining dimensions, in port
|
|
Packit |
13e616 |
order. Anything after a # is a comment.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-O\fR, \fB\-\-dimn_ports_file\fR <path to file> \fB(DEPRECATED)\fR
|
|
Packit |
13e616 |
This is a deprecated flag. Please use \fB\-\-port_search_ordering_file\fR instead.
|
|
Packit |
13e616 |
This option provides a mapping between hypercube dimensions and ports
|
|
Packit |
13e616 |
on a per switch basis for the DOR routing engine. The file consists
|
|
Packit |
13e616 |
of lines containing a switch node GUID (specified as a 64 bit hex
|
|
Packit |
13e616 |
number, with leading 0x) followed by a list of non-zero port numbers,
|
|
Packit |
13e616 |
separated by spaces, one switch per line. The order for the port
|
|
Packit |
13e616 |
numbers is in one to one correspondence to the dimensions. Ports not
|
|
Packit |
13e616 |
listed on a line are assigned to the remaining dimensions, in port
|
|
Packit |
13e616 |
order. Anything after a # is a comment.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-x\fR, \fB\-\-honor_guid2lid\fR
|
|
Packit |
13e616 |
This option forces OpenSM to honor the guid2lid file,
|
|
Packit |
13e616 |
when it comes out of Standby state, if such file exists
|
|
Packit |
13e616 |
under OSM_CACHE_DIR, and is valid.
|
|
Packit |
13e616 |
By default, this is FALSE.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-dump_files_dir <directory name>
|
|
Packit |
13e616 |
This option will set the directory to hold the file dumps.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-f\fR, \fB\-\-log_file\fR <file name>
|
|
Packit |
13e616 |
This option defines the log to be the given file.
|
|
Packit |
13e616 |
By default, the log goes to /var/log/opensm.log.
|
|
Packit |
13e616 |
For the log to go to standard output use -f stdout.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-L\fR, \fB\-\-log_limit\fR <size in MB>
|
|
Packit |
13e616 |
This option defines maximal log file size in MB. When
|
|
Packit |
13e616 |
specified the log file will be truncated upon reaching
|
|
Packit |
13e616 |
this limit.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-e\fR, \fB\-\-erase_log_file\fR
|
|
Packit |
13e616 |
This option will cause deletion of the log file
|
|
Packit |
13e616 |
(if it previously exists). By default, the log file
|
|
Packit |
13e616 |
is accumulative.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-P\fR, \fB\-\-Pconfig\fR <partition config file>
|
|
Packit |
13e616 |
This option defines the optional partition configuration file.
|
|
Packit |
13e616 |
The default name is \fB\%@OPENSM_CONFIG_DIR@/@PARTITION_CONFIG_FILE@\fP.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-prefix_routes_file\fR <file name>
|
|
Packit |
13e616 |
Prefix routes control how the SA responds to path record queries for
|
|
Packit |
13e616 |
off-subnet DGIDs. By default, the SA fails such queries. The
|
|
Packit |
13e616 |
.B PREFIX ROUTES
|
|
Packit |
13e616 |
section below describes the format of the configuration file.
|
|
Packit |
13e616 |
The default path is \fB\%@OPENSM_CONFIG_DIR@/prefix\-routes.conf\fP.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-Q\fR, \fB\-\-qos\fR
|
|
Packit |
13e616 |
This option enables QoS setup. It is disabled by default.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-Y\fR, \fB\-\-qos_policy_file\fR <file name>
|
|
Packit |
13e616 |
This option defines the optional QoS policy file. The default
|
|
Packit |
13e616 |
name is \fB\%@OPENSM_CONFIG_DIR@/@QOS_POLICY_FILE@\fP. See
|
|
Packit |
13e616 |
QoS_management_in_OpenSM.txt in opensm doc for more information on
|
|
Packit |
13e616 |
configuring QoS policy via this file.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-congestion_control\fR
|
|
Packit |
13e616 |
(EXPERIMENTAL) This option enables congestion control configuration.
|
|
Packit |
13e616 |
It is disabled by default. See config file for congestion control
|
|
Packit |
13e616 |
configuration options.
|
|
Packit |
13e616 |
\fB\-\-cc_key\fR <key>
|
|
Packit |
13e616 |
(EXPERIMENTAL) This option configures the CCkey to use when configuring
|
|
Packit |
13e616 |
congestion control. Note that this option does not configure a new
|
|
Packit |
13e616 |
CCkey into switches and CAs. Defaults to 0.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-N\fR, \fB\-\-no_part_enforce\fR \fB(DEPRECATED)\fR
|
|
Packit |
13e616 |
This is a deprecated flag. Please use \fB\-\-part_enforce\fR instead.
|
|
Packit |
13e616 |
This option disables partition enforcement on switch external ports.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-Z\fR, \fB\-\-part_enforce\fR [both | in | out | off]
|
|
Packit |
13e616 |
This option indicates the partition enforcement type (for switches).
|
|
Packit |
13e616 |
Enforcement type can be inbound only (in), outbound only (out),
|
|
Packit |
13e616 |
both or disabled (off). Default is both.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-W\fR, \fB\-\-allow_both_pkeys\fR
|
|
Packit |
13e616 |
This option indicates whether both full and limited membership on the
|
|
Packit |
13e616 |
same partition can be configured in the PKeyTable. Default is not
|
|
Packit |
13e616 |
to allow both pkeys.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-y\fR, \fB\-\-stay_on_fatal\fR
|
|
Packit |
13e616 |
This option will cause SM not to exit on fatal initialization
|
|
Packit |
13e616 |
issues: if SM discovers duplicated guids or a 12x link with
|
|
Packit |
13e616 |
lane reversal badly configured.
|
|
Packit |
13e616 |
By default, the SM will exit on these errors.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-B\fR, \fB\-\-daemon\fR
|
|
Packit |
13e616 |
Run in daemon mode - OpenSM will run in the background.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-J\fR, \fB\-\-pidfile <file_name>\fR
|
|
Packit |
13e616 |
Makes the SM write its own PID to the specified file when started in daemon
|
|
Packit |
13e616 |
mode.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-I\fR, \fB\-\-inactive\fR
|
|
Packit |
13e616 |
Start SM in inactive rather than init SM state. This
|
|
Packit |
13e616 |
option can be used in conjunction with the perfmgr so as to
|
|
Packit |
13e616 |
run a standalone performance manager without SM/SA. However,
|
|
Packit |
13e616 |
this is NOT currently implemented in the performance manager.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-perfmgr\fR
|
|
Packit |
13e616 |
Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at
|
|
Packit |
13e616 |
configure time. See performance-manager-HOWTO.txt in opensm doc for
|
|
Packit |
13e616 |
more information on running perfmgr.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-perfmgr_sweep_time_s\fR <seconds>
|
|
Packit |
13e616 |
Specify the sweep time for the performance manager in seconds
|
|
Packit |
13e616 |
(default is 180 seconds). Only takes
|
|
Packit |
13e616 |
effect if --enable-perfmgr was specified at configure time.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.BI --consolidate_ipv6_snm_req
|
|
Packit |
13e616 |
Use shared MLID for IPv6 Solicited Node Multicast groups per MGID scope
|
|
Packit |
13e616 |
and P_Key.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-log_prefix\fR <prefix text>
|
|
Packit |
13e616 |
This option specifies the prefix to the syslog messages from OpenSM.
|
|
Packit |
13e616 |
A suitable prefix can be used to identify the IB subnet in syslog messages
|
|
Packit |
13e616 |
when two or more instances of OpenSM run in a single node to manage multiple
|
|
Packit |
13e616 |
fabrics. For example, in a dual-fabric (or dual-rail) IB cluster, the prefix
|
|
Packit |
13e616 |
for the first fabric could be "mpi" and the other fabric could be "storage".
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-\-torus_config\fR <path to torus\-2QoS config file>
|
|
Packit |
13e616 |
This option defines the file name for the extra configuration
|
|
Packit |
13e616 |
information needed for the torus-2QoS routing engine. The default
|
|
Packit |
13e616 |
name is \fB\%@OPENSM_CONFIG_DIR@/@TORUS2QOS_CONF_FILE@\fP
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-v\fR, \fB\-\-verbose\fR
|
|
Packit |
13e616 |
This option increases the log verbosity level.
|
|
Packit |
13e616 |
The -v option may be specified multiple times
|
|
Packit |
13e616 |
to further increase the verbosity level.
|
|
Packit |
13e616 |
See the -D option for more information about
|
|
Packit |
13e616 |
log verbosity.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-V\fR
|
|
Packit |
13e616 |
This option sets the maximum verbosity level and
|
|
Packit |
13e616 |
forces log flushing.
|
|
Packit |
13e616 |
The -V option is equivalent to \'-D 0xFF -d 2\'.
|
|
Packit |
13e616 |
See the -D option for more information about
|
|
Packit |
13e616 |
log verbosity.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-D\fR <value>
|
|
Packit |
13e616 |
This option sets the log verbosity level.
|
|
Packit |
13e616 |
A flags field must follow the -D option.
|
|
Packit |
13e616 |
A bit set/clear in the flags enables/disables a
|
|
Packit |
13e616 |
specific log level as follows:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
BIT LOG LEVEL ENABLED
|
|
Packit |
13e616 |
---- -----------------
|
|
Packit |
13e616 |
0x01 - ERROR (error messages)
|
|
Packit |
13e616 |
0x02 - INFO (basic messages, low volume)
|
|
Packit |
13e616 |
0x04 - VERBOSE (interesting stuff, moderate volume)
|
|
Packit |
13e616 |
0x08 - DEBUG (diagnostic, high volume)
|
|
Packit |
13e616 |
0x10 - FUNCS (function entry/exit, very high volume)
|
|
Packit |
13e616 |
0x20 - FRAMES (dumps all SMP and GMP frames)
|
|
Packit |
13e616 |
0x40 - ROUTING (dump FDB routing information)
|
|
Packit |
13e616 |
0x80 - SYS (syslog at LOG_INFO level in addition to OpenSM logging)
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Without -D, OpenSM defaults to ERROR + INFO (0x3).
|
|
Packit |
13e616 |
Specifying -D 0 disables all messages.
|
|
Packit |
13e616 |
Specifying -D 0xFF enables all messages (see -V).
|
|
Packit |
13e616 |
High verbosity levels may require increasing
|
|
Packit |
13e616 |
the transaction timeout with the -t option.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-d\fR, \fB\-\-debug\fR <value>
|
|
Packit |
13e616 |
This option specifies a debug option.
|
|
Packit |
13e616 |
These options are not normally needed.
|
|
Packit |
13e616 |
The number following -d selects the debug
|
|
Packit |
13e616 |
option to enable as follows:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OPT Description
|
|
Packit |
13e616 |
--- -----------------
|
|
Packit |
13e616 |
-d0 - Ignore other SM nodes
|
|
Packit |
13e616 |
-d1 - Force single threaded dispatching
|
|
Packit |
13e616 |
-d2 - Force log flushing after each log message
|
|
Packit |
13e616 |
-d3 - Disable multicast support
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-h\fR, \fB\-\-help\fR
|
|
Packit |
13e616 |
Display this usage info then exit.
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
\fB\-?\fR
|
|
Packit |
13e616 |
Display this usage info then exit.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH ENVIRONMENT VARIABLES
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
The following environment variables control opensm behavior:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_TMP_DIR - controls the directory in which the temporary files generated by
|
|
Packit |
13e616 |
opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and
|
|
Packit |
13e616 |
opensm.mcfdbs. By default, this directory is /var/log. Note that
|
|
Packit |
13e616 |
\-\-dump_files_dir command line option or dump_file_dir option in
|
|
Packit |
13e616 |
option/config file takes precedence over this environment variable.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent
|
|
Packit |
13e616 |
runs are consistent. The default directory used is /var/cache/opensm.
|
|
Packit |
13e616 |
The following files are included in it:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
guid2lid - stores the LID range assigned to each GUID
|
|
Packit |
13e616 |
guid2mkey - stores the MKey previously assigned to each GUID
|
|
Packit |
13e616 |
neighbors - stores a map of the GUIDs at either end of each link
|
|
Packit |
13e616 |
in the fabric
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH NOTES
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found.
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for
|
|
Packit |
13e616 |
logrotate purposes.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH PARTITION CONFIGURATION
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
The default name of OpenSM partitions configuration file is
|
|
Packit |
13e616 |
\fB\%@OPENSM_CONFIG_DIR@/@PARTITION_CONFIG_FILE@\fP. The default may be changed
|
|
Packit |
13e616 |
by using the --Pconfig (-P) option with OpenSM.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The default partition will be created by OpenSM unconditionally even
|
|
Packit |
13e616 |
when partition configuration file does not exist or cannot be accessed.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The default partition has P_Key value 0x7fff. OpenSM\'s port will always
|
|
Packit |
13e616 |
have full membership in default partition. All other end ports will have
|
|
Packit |
13e616 |
full membership if the partition configuration file is not found or cannot
|
|
Packit |
13e616 |
be accessed, or limited membership if the file exists and can be accessed
|
|
Packit |
13e616 |
but there is no rule for the Default partition.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Effectively, this amounts to the same as if one of the following rules
|
|
Packit |
13e616 |
below appear in the partition configuration file.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
In the case of no rule for the Default partition:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Default=0x7fff : ALL=limited, SELF=full ;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
In the case of no partition configuration file or file cannot be accessed:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Default=0x7fff : ALL=full ;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
File Format
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Comments:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Line content followed after \'#\' character is comment and ignored by
|
|
Packit |
13e616 |
parser.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
General file format:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
<Partition Definition>:[<newline>]<Partition Properties>;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Partition Definition:
|
|
Packit |
13e616 |
.nf
|
|
Packit |
13e616 |
[PartitionName][=PKey][,indx0][,ipoib_bc_flags][,defmember=full|limited]
|
|
Packit |
13e616 |
.fi
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
PartitionName - string, will be used with logging. When
|
|
Packit |
13e616 |
omitted, empty string will be used.
|
|
Packit |
13e616 |
PKey - P_Key value for this partition. Only low 15
|
|
Packit |
13e616 |
bits will be used. When omitted will be
|
|
Packit |
13e616 |
autogenerated.
|
|
Packit |
13e616 |
indx0 - indicates that this pkey should be inserted in
|
|
Packit |
13e616 |
block 0 index 0.
|
|
Packit |
13e616 |
ipoib_bc_flags - used to indicate/specify IPoIB capability of
|
|
Packit |
13e616 |
this partition.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
defmember=full|limited|both - specifies default membership for
|
|
Packit |
13e616 |
port guid list. Default is limited.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
ipoib_bc_flags:
|
|
Packit |
13e616 |
ipoib_flag|[mgroup_flag]*
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
ipoib_flag:
|
|
Packit |
13e616 |
ipoib - indicates that this partition may be used for
|
|
Packit |
13e616 |
IPoIB, as a result the IPoIB broadcast group will
|
|
Packit |
13e616 |
be created with the mgroup_flag flags given,
|
|
Packit |
13e616 |
if any.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Partition Properties:
|
|
Packit |
13e616 |
[<Port list>|<MCast Group>]* | <Port list>
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Port list:
|
|
Packit |
13e616 |
<Port Specifier>[,<Port Specifier>]
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Port Specifier:
|
|
Packit |
13e616 |
<PortGUID>[=[full|limited|both]]
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
PortGUID - GUID of partition member EndPort.
|
|
Packit |
13e616 |
Hexadecimal numbers should start from
|
|
Packit |
13e616 |
0x, decimal numbers are accepted too.
|
|
Packit |
13e616 |
full, limited, - indicates full and/or limited membership for
|
|
Packit |
13e616 |
both this port. When omitted (or unrecognized)
|
|
Packit |
13e616 |
limited membership is assumed. Both
|
|
Packit |
13e616 |
indicates both full and limited membership
|
|
Packit |
13e616 |
for this port.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
MCast Group:
|
|
Packit |
13e616 |
mgid=gid[,mgroup_flag]*<newline>
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
- gid specified is verified to be a Multicast
|
|
Packit |
13e616 |
address. IP groups are verified to match
|
|
Packit |
13e616 |
the rate and mtu of the broadcast group.
|
|
Packit |
13e616 |
The P_Key bits of the mgid for IP groups are
|
|
Packit |
13e616 |
verified to either match the P_Key specified
|
|
Packit |
13e616 |
in by "Partition Definition" or if they are
|
|
Packit |
13e616 |
0x0000 the P_Key will be copied into those
|
|
Packit |
13e616 |
bits.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
mgroup_flag:
|
|
Packit |
13e616 |
rate=<val> - specifies rate for this MC group
|
|
Packit |
13e616 |
(default is 3 (10GBps))
|
|
Packit |
13e616 |
mtu=<val> - specifies MTU for this MC group
|
|
Packit |
13e616 |
(default is 4 (2048))
|
|
Packit |
13e616 |
sl=<val> - specifies SL for this MC group
|
|
Packit |
13e616 |
(default is 0)
|
|
Packit |
13e616 |
scope=<val> - specifies scope for this MC group
|
|
Packit |
13e616 |
(default is 2 (link local)). Multiple scope
|
|
Packit |
13e616 |
settings are permitted for a partition.
|
|
Packit |
13e616 |
NOTE: This overwrites the scope nibble of the
|
|
Packit |
13e616 |
specified mgid. Furthermore specifying
|
|
Packit |
13e616 |
multiple scope settings will result in
|
|
Packit |
13e616 |
multiple MC groups being created.
|
|
Packit |
13e616 |
Q_Key=<val> - specifies the Q_Key for this MC group
|
|
Packit |
13e616 |
(default: 0x0b1b for IP groups, 0 for other
|
|
Packit |
13e616 |
groups)
|
|
Packit |
13e616 |
WARNING: changing this for the broadcast
|
|
Packit |
13e616 |
group may break IPoIB on client
|
|
Packit |
13e616 |
nodes!!
|
|
Packit |
13e616 |
TClass=<val> - specifies tclass for this MC group
|
|
Packit |
13e616 |
(default is 0)
|
|
Packit |
13e616 |
FlowLabel=<val> - specifies FlowLabel for this MC group
|
|
Packit |
13e616 |
(default is 0)
|
|
Packit |
13e616 |
NOTE: All mgroup_flag flags MUST be separated by comma (,).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note that values for rate, mtu, and scope, for both partitions and multicast
|
|
Packit |
13e616 |
groups, should be specified as defined in the IBTA specification (for example,
|
|
Packit |
13e616 |
mtu=4 for 2048).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
There are several useful keywords for PortGUID definition:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
- 'ALL' means all end ports in this subnet.
|
|
Packit |
13e616 |
- 'ALL_CAS' means all Channel Adapter end ports in this subnet.
|
|
Packit |
13e616 |
- 'ALL_SWITCHES' means all Switch end ports in this subnet.
|
|
Packit |
13e616 |
- 'ALL_ROUTERS' means all Router end ports in this subnet.
|
|
Packit |
13e616 |
- 'SELF' means subnet manager's port.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Empty list means no ports in this partition.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Notes:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
White space is permitted between delimiters ('=', ',',':',';').
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
PartitionName does not need to be unique, PKey does need to be unique.
|
|
Packit |
13e616 |
If PKey is repeated then those partition configurations will be merged
|
|
Packit |
13e616 |
and first PartitionName will be used (see also next note).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
It is possible to split partition configuration in more than one
|
|
Packit |
13e616 |
definition, but then PKey should be explicitly specified (otherwise
|
|
Packit |
13e616 |
different PKey values will be generated for those definitions).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Examples:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Default=0x7fff : ALL, SELF=full ;
|
|
Packit |
13e616 |
Default=0x7fff : ALL, ALL_SWITCHES=full, SELF=full ;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
YetAnotherOne = 0x300 : SELF=full ;
|
|
Packit |
13e616 |
YetAnotherOne = 0x300 : ALL=limited ;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
ShareIO = 0x80 , defmember=full : 0x123451, 0x123452;
|
|
Packit |
13e616 |
# 0x123453, 0x123454 will be limited
|
|
Packit |
13e616 |
ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full;
|
|
Packit |
13e616 |
# 0x123456, 0x123457 will be limited
|
|
Packit |
13e616 |
ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full;
|
|
Packit |
13e616 |
ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a;
|
|
Packit |
13e616 |
ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
# multicast groups added to default
|
|
Packit |
13e616 |
Default=0x7fff,ipoib:
|
|
Packit |
13e616 |
mgid=ff12:401b::0707,sl=1 # random IPv4 group
|
|
Packit |
13e616 |
mgid=ff12:601b::16 # MLDv2-capable routers
|
|
Packit |
13e616 |
mgid=ff12:401b::16 # IGMP
|
|
Packit |
13e616 |
mgid=ff12:601b::2 # All routers
|
|
Packit |
13e616 |
mgid=ff12::1,sl=1,Q_Key=0xDEADBEEF,rate=3,mtu=2 # random group
|
|
Packit |
13e616 |
ALL=full;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The following rule is equivalent to how OpenSM used to run prior to the
|
|
Packit |
13e616 |
partition manager:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Default=0x7fff,ipoib:ALL=full;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH QOS CONFIGURATION
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
There are a set of QoS related low-level configuration parameters.
|
|
Packit |
13e616 |
All these parameter names are prefixed by "qos_" string. Here is a full
|
|
Packit |
13e616 |
list of these parameters:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
qos_max_vls - The maximum number of VLs that will be on the subnet
|
|
Packit |
13e616 |
qos_high_limit - The limit of High Priority component of VL
|
|
Packit |
13e616 |
Arbitration table (IBA 7.6.9)
|
|
Packit |
13e616 |
qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9)
|
|
Packit |
13e616 |
template
|
|
Packit |
13e616 |
qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9)
|
|
Packit |
13e616 |
template
|
|
Packit |
13e616 |
Both VL arbitration templates are pairs of
|
|
Packit |
13e616 |
VL and weight
|
|
Packit |
13e616 |
qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is
|
|
Packit |
13e616 |
a list of VLs corresponding to SLs 0-15 (Note
|
|
Packit |
13e616 |
that VL15 used here means drop this SL)
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Typical default values (hard-coded in OpenSM initialization) are:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
qos_max_vls 15
|
|
Packit |
13e616 |
qos_high_limit 0
|
|
Packit |
13e616 |
.nf
|
|
Packit |
13e616 |
qos_vlarb_low 0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4
|
|
Packit |
13e616 |
qos_vlarb_high 0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0
|
|
Packit |
13e616 |
.fi
|
|
Packit |
13e616 |
qos_sl2vl 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The syntax is compatible with rest of OpenSM configuration options and
|
|
Packit |
13e616 |
values may be stored in OpenSM config file (cached options file).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
In addition to the above, we may define separate QoS configuration
|
|
Packit |
13e616 |
parameters sets for various target types. As targets, we currently support
|
|
Packit |
13e616 |
CAs, routers, switch external ports, and switch's enhanced port 0. The
|
|
Packit |
13e616 |
names of such specialized parameters are prefixed by "qos_<type>_"
|
|
Packit |
13e616 |
string. Here is a full list of the currently supported sets:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
qos_ca_ - QoS configuration parameters set for CAs.
|
|
Packit |
13e616 |
qos_rtr_ - parameters set for routers.
|
|
Packit |
13e616 |
qos_sw0_ - parameters set for switches' port 0.
|
|
Packit |
13e616 |
qos_swe_ - parameters set for switches' external ports.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Examples:
|
|
Packit |
13e616 |
qos_sw0_max_vls=2
|
|
Packit |
13e616 |
qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0,
|
|
Packit |
13e616 |
qos_swe_high_limit=0
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH PREFIX ROUTES
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
Prefix routes control how the SA responds to path record queries for
|
|
Packit |
13e616 |
off-subnet DGIDs. By default, the SA fails such queries.
|
|
Packit |
13e616 |
Note that IBA does not specify how the SA should obtain off-subnet path
|
|
Packit |
13e616 |
record information.
|
|
Packit |
13e616 |
The prefix routes configuration is meant as a stop-gap until the
|
|
Packit |
13e616 |
specification is completed.
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
Each line in the configuration file is a 64-bit prefix followed by a
|
|
Packit |
13e616 |
64-bit GUID, separated by white space.
|
|
Packit |
13e616 |
The GUID specifies the router port on the local subnet that will
|
|
Packit |
13e616 |
handle the prefix.
|
|
Packit |
13e616 |
Blank lines are ignored, as is anything between a \fB#\fP character
|
|
Packit |
13e616 |
and the end of the line.
|
|
Packit |
13e616 |
The prefix and GUID are both in hex, the leading 0x is optional.
|
|
Packit |
13e616 |
Either, or both, can be wild-carded by specifying an
|
|
Packit |
13e616 |
asterisk instead of an explicit prefix or GUID.
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
When responding to a path record query for an off-subnet DGID,
|
|
Packit |
13e616 |
opensm searches for the first prefix match in the configuration file.
|
|
Packit |
13e616 |
Therefore, the order of the lines in the configuration file is important:
|
|
Packit |
13e616 |
a wild-carded prefix at the beginning of the configuration file renders
|
|
Packit |
13e616 |
all subsequent lines useless.
|
|
Packit |
13e616 |
If there is no match, then opensm fails the query.
|
|
Packit |
13e616 |
It is legal to repeat prefixes in the configuration file,
|
|
Packit |
13e616 |
opensm will return the path to the first available matching router.
|
|
Packit |
13e616 |
A configuration file with a single line where both prefix and GUID
|
|
Packit |
13e616 |
are wild-carded means that a path record query specifying any
|
|
Packit |
13e616 |
off-subnet DGID should return a path to the first available router.
|
|
Packit |
13e616 |
This configuration yields the same behavior formerly achieved by
|
|
Packit |
13e616 |
compiling opensm with -DROUTER_EXP which has been obsoleted.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH MKEY CONFIGURATION
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
OpenSM supports configuring a single management key (MKey) for use across
|
|
Packit |
13e616 |
the subnet.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The following configuration options are available:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
m_key - the 64-bit MKey to be used on the subnet
|
|
Packit |
13e616 |
(IBA 14.2.4)
|
|
Packit |
13e616 |
m_key_protection_level - the numeric value of the MKey ProtectBits
|
|
Packit |
13e616 |
(IBA 14.2.4.1)
|
|
Packit |
13e616 |
m_key_lease_period - the number of seconds a CA will wait for a
|
|
Packit |
13e616 |
response from the SM before resetting the
|
|
Packit |
13e616 |
protection level to 0 (IBA 14.2.4.2).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OpenSM will configure all ports with the MKey specified by m_key, defaulting
|
|
Packit |
13e616 |
to a value of 0. A m_key value of 0 disables MKey protection on the subnet.
|
|
Packit |
13e616 |
Switches and HCAs with a non-zero MKey will not accept requests to change
|
|
Packit |
13e616 |
their configuration unless the request includes the proper MKey.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
MKey Protection Levels
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
MKey protection levels modify how switches and CAs respond to SMPs lacking
|
|
Packit |
13e616 |
a valid MKey.
|
|
Packit |
13e616 |
OpenSM will configure each port's ProtectBits to support the level defined by
|
|
Packit |
13e616 |
the m_key_protection_level parameter. If no parameter is specified, OpenSM
|
|
Packit |
13e616 |
defaults to operating at protection level 0.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
There are currently 4 protection levels defined by the IBA:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
0 - Queries return valid data, including MKey. Configuration changes
|
|
Packit |
13e616 |
are not allowed unless the request contains a valid MKey.
|
|
Packit |
13e616 |
1 - Like level 0, but the MKey is set to 0 (0x00000000) in queries,
|
|
Packit |
13e616 |
unless the request contains a valid MKey.
|
|
Packit |
13e616 |
2 - Neither queries nor configuration changes are allowed, unless the
|
|
Packit |
13e616 |
request contains a valid MKey.
|
|
Packit |
13e616 |
3 - Identical to 2. Maintained for backwards compatibility.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
MKey Lease Period
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
InfiniBand supports a MKey lease timeout, which is intended to allow
|
|
Packit |
13e616 |
administrators or a new SM to recover/reset lost MKeys on a fabric.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
If MKeys are enabled on the subnet and a switch or CA receives a request that
|
|
Packit |
13e616 |
requires a valid MKey but does not contain one, it warns the SM by sending a trap
|
|
Packit |
13e616 |
(Bad M_Key, Trap 256). If the MKey lease period is non-zero, it also starts a
|
|
Packit |
13e616 |
countdown timer for the time specified by the lease period.
|
|
Packit |
13e616 |
If a SM (or other agent) responds with the correct MKey, the timer is stopped
|
|
Packit |
13e616 |
and reset. Should the timer reach zero, the switch or CA will reset its MKey
|
|
Packit |
13e616 |
protection level to 0, exposing the MKey and allowing recovery.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OpenSM will initialize all ports to use a mkey lease period of the number of
|
|
Packit |
13e616 |
seconds specified in the config file. If no mkey_lease_period is specified,
|
|
Packit |
13e616 |
a default of 0 will be used.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OpenSM normally quickly responds to all Bad_M_Key traps, resetting the lease
|
|
Packit |
13e616 |
timers. Additionally, OpenSM's subnet sweeps will also cancel
|
|
Packit |
13e616 |
any running timers. For maximum protection against accidentally-exposed MKeys,
|
|
Packit |
13e616 |
the MKey lease time should be a few multiples of the subnet sweep time.
|
|
Packit |
13e616 |
If OpenSM detects at startup that your sweep interval is greater than your
|
|
Packit |
13e616 |
MKey lease period, it will reset the lease period to be greater than the
|
|
Packit |
13e616 |
sweep interval. Similarly, if sweeping is disabled at startup, it will be
|
|
Packit |
13e616 |
re-enabled with an interval less than the Mkey lease period.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
If OpenSM is required to recover a subnet for which it is missing mkeys,
|
|
Packit |
13e616 |
it must do so one switch level at a time. As such, the total time to
|
|
Packit |
13e616 |
recover the subnet may be as long as the mkey lease period multiplied by
|
|
Packit |
13e616 |
the maximum number of hops between the SM and an endpoint, plus one.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
MKey Effects on Diagnostic Utilities
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Setting a MKey may have a detrimental effect on diagnostic software run on
|
|
Packit |
13e616 |
the subnet, unless your diagnostic software is able to retrieve MKeys from the
|
|
Packit |
13e616 |
SA or can be explicitly configured with the proper MKey. This is particularly
|
|
Packit |
13e616 |
true at protection level 2, where CAs will ignore queries for management
|
|
Packit |
13e616 |
information that do not contain the proper MKey.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH ROUTING
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
OpenSM now offers ten routing engines:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
1. Min Hop Algorithm - based on the minimum hops to each node where the
|
|
Packit |
13e616 |
path length is optimized.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
2. UPDN Unicast routing algorithm - also based on the minimum hops to each
|
|
Packit |
13e616 |
node, but it is constrained to ranking rules. This algorithm should be chosen
|
|
Packit |
13e616 |
if the subnet is not a pure Fat Tree, and deadlock may occur due to a
|
|
Packit |
13e616 |
loop in the subnet.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
3. DNUP Unicast routing algorithm - similar to UPDN but allows routing in
|
|
Packit |
13e616 |
fabrics which have some CA nodes attached closer to the roots than some switch
|
|
Packit |
13e616 |
nodes.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
4. Fat Tree Unicast routing algorithm - this algorithm optimizes routing
|
|
Packit |
13e616 |
for congestion-free "shift" communication pattern.
|
|
Packit |
13e616 |
It should be chosen if a subnet is a symmetrical or almost symmetrical
|
|
Packit |
13e616 |
fat-tree of various types, not just K-ary-N-Trees: non-constant K, not
|
|
Packit |
13e616 |
fully staffed, any Constant Bisectional Bandwidth (CBB) ratio.
|
|
Packit |
13e616 |
Similar to UPDN, Fat Tree routing is constrained to ranking rules.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
5. LASH unicast routing algorithm - uses InfiniBand virtual layers
|
|
Packit |
13e616 |
(SL) to provide deadlock-free shortest-path routing while also
|
|
Packit |
13e616 |
distributing the paths between layers. LASH is an alternative
|
|
Packit |
13e616 |
deadlock-free topology-agnostic routing algorithm to the non-minimal
|
|
Packit |
13e616 |
UPDN algorithm avoiding the use of a potentially congested root node.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
6. DOR Unicast routing algorithm - based on the Min Hop algorithm, but
|
|
Packit |
13e616 |
avoids port equalization except for redundant links between the same
|
|
Packit |
13e616 |
two switches. This provides deadlock free routes for hypercubes when
|
|
Packit |
13e616 |
the fabric is cabled as a hypercube and for meshes when cabled as a
|
|
Packit |
13e616 |
mesh (see details below).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
7. Torus-2QoS unicast routing algorithm - a DOR-based routing algorithm
|
|
Packit |
13e616 |
specialized for 2D/3D torus topologies. Torus-2QoS provides deadlock-free
|
|
Packit |
13e616 |
routing while supporting two quality of service (QoS) levels. In addition
|
|
Packit |
13e616 |
it is able to route around multiple failed fabric links or a single failed
|
|
Packit |
13e616 |
fabric switch without introducing deadlocks, and without changing path SL
|
|
Packit |
13e616 |
values granted before the failure.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
8. DFSSSP unicast routing algorithm - a deadlock-free
|
|
Packit |
13e616 |
single-source-shortest-path routing, which uses the SSSP algorithm
|
|
Packit |
13e616 |
(see algorithm 9.) as the base to optimize link utilization and uses
|
|
Packit |
13e616 |
InfiniBand virtual lanes (SL) to provide deadlock-freedom.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
9. SSSP unicast routing algorithm - a single-source-shortest-path routing
|
|
Packit |
13e616 |
algorithm, which globally balances the number of routes per link to
|
|
Packit |
13e616 |
optimize link utilization. This routing algorithm has no restrictions
|
|
Packit |
13e616 |
in terms of the underlying topology.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
10. Nue unicast routing algorithm - a 100%-applicable and deadlock-free routing
|
|
Packit |
13e616 |
which can be used for any arbitrary or faulty network topology and any number
|
|
Packit |
13e616 |
of virtual lanes (this includes the absence of VLs as well). Paths are globally
|
|
Packit |
13e616 |
balanced w.r.t the number of routes per link, and are kept as short as possible
|
|
Packit |
13e616 |
while enforcing deadlock-freedom within the VL constraint.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OpenSM also supports a file method which
|
|
Packit |
13e616 |
can load routes from a table. See \'Modular Routing Engine\' for more
|
|
Packit |
13e616 |
information on this.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The basic routing algorithm is comprised of two stages:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
1. MinHop matrix calculation
|
|
Packit |
13e616 |
How many hops are required to get from each port to each LID ?
|
|
Packit |
13e616 |
The algorithm to fill these tables is different if you run standard
|
|
Packit |
13e616 |
(min hop) or Up/Down.
|
|
Packit |
13e616 |
For standard routing, a "relaxation" algorithm is used to propagate
|
|
Packit |
13e616 |
min hop from every destination LID through neighbor switches
|
|
Packit |
13e616 |
For Up/Down routing, a BFS from every target is used. The BFS tracks link
|
|
Packit |
13e616 |
direction (up or down) and avoid steps that will perform up after a down
|
|
Packit |
13e616 |
step was used.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
2. Once MinHop matrices exist, each switch is visited and for each target LID a
|
|
Packit |
13e616 |
decision is made as to what port should be used to get to that LID.
|
|
Packit |
13e616 |
This step is common to standard and Up/Down routing. Each port has a
|
|
Packit |
13e616 |
counter counting the number of target LIDs going through it.
|
|
Packit |
13e616 |
When there are multiple alternative ports with same MinHop to a LID,
|
|
Packit |
13e616 |
the one with less previously assigned LIDs is selected.
|
|
Packit |
13e616 |
If LMC > 0, more checks are added: Within each group of LIDs assigned to
|
|
Packit |
13e616 |
same target port,
|
|
Packit |
13e616 |
a. use only ports which have same MinHop
|
|
Packit |
13e616 |
b. first prefer the ones that go to different systemImageGuid (then
|
|
Packit |
13e616 |
the previous LID of the same LMC group)
|
|
Packit |
13e616 |
c. if none - prefer those which go through another NodeGuid
|
|
Packit |
13e616 |
d. fall back to the number of paths method (if all go to same node).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Effect of Topology Changes
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OpenSM will preserve existing routing in any case where there is no change in
|
|
Packit |
13e616 |
the fabric switches unless the -r (--reassign_lids) option is specified.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
-r
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
--reassign_lids
|
|
Packit |
13e616 |
This option causes OpenSM to reassign LIDs to all
|
|
Packit |
13e616 |
end nodes. Specifying -r on a running subnet
|
|
Packit |
13e616 |
may disrupt subnet traffic.
|
|
Packit |
13e616 |
Without -r, OpenSM attempts to preserve existing
|
|
Packit |
13e616 |
LID assignments resolving multiple use of same LID.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
If a link is added or removed, OpenSM does not recalculate
|
|
Packit |
13e616 |
the routes that do not have to change. A route has to change
|
|
Packit |
13e616 |
if the port is no longer UP or no longer the MinHop. When routing changes
|
|
Packit |
13e616 |
are performed, the same algorithm for balancing the routes is invoked.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
In the case of using the file based routing, any topology changes are
|
|
Packit |
13e616 |
currently ignored The 'file' routing engine just loads the LFTs from the file
|
|
Packit |
13e616 |
specified, with no reaction to real topology. Obviously, this will not be able
|
|
Packit |
13e616 |
to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent
|
|
Packit |
13e616 |
switches will be skipped. Multicast is not affected by 'file' routing engine
|
|
Packit |
13e616 |
(this uses min hop tables).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Min Hop Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The Min Hop algorithm is invoked by default if no routing algorithm is
|
|
Packit |
13e616 |
specified. It can also be invoked by specifying '-R minhop'.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The Min Hop algorithm is divided into two stages: computation of
|
|
Packit |
13e616 |
min-hop tables on every switch and LFT output port assignment. Link
|
|
Packit |
13e616 |
subscription is also equalized with the ability to override based on
|
|
Packit |
13e616 |
port GUID. The latter is supplied by:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
-i <equalize-ignore-guids-file>
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
\-\-ignore_guids <equalize-ignore-guids-file>
|
|
Packit |
13e616 |
This option provides the means to define a set of ports
|
|
Packit |
13e616 |
(by guid) that will be ignored by the link load
|
|
Packit |
13e616 |
equalization algorithm. Note that only endports (CA,
|
|
Packit |
13e616 |
switch port 0, and router ports) and not switch external
|
|
Packit |
13e616 |
ports are supported.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
LMC awareness routes based on (remote) system or switch basis.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Purpose of UPDN Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The UPDN algorithm is designed to prevent deadlocks from occurring in loops
|
|
Packit |
13e616 |
of the subnet. A loop-deadlock is a situation in which it is no longer
|
|
Packit |
13e616 |
possible to send data between any two hosts connected through the loop. As
|
|
Packit |
13e616 |
such, the UPDN routing algorithm should be used if the subnet is not a pure
|
|
Packit |
13e616 |
Fat Tree, and one of its loops may experience a deadlock (due, for example,
|
|
Packit |
13e616 |
to high pressure).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The UPDN algorithm is based on the following main stages:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
1. Auto-detect root nodes - based on the CA hop length from any switch in
|
|
Packit |
13e616 |
the subnet, a statistical histogram is built for each switch (hop num vs
|
|
Packit |
13e616 |
number of occurrences). If the histogram reflects a specific column (higher
|
|
Packit |
13e616 |
than others) for a certain node, then it is marked as a root node. Since
|
|
Packit |
13e616 |
the algorithm is statistical, it may not find any root nodes. The list of
|
|
Packit |
13e616 |
the root nodes found by this auto-detect stage is used by the ranking
|
|
Packit |
13e616 |
process stage.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note 1: The user can override the node list manually.
|
|
Packit |
13e616 |
Note 2: If this stage cannot find any root nodes, and the user did
|
|
Packit |
13e616 |
not specify a guid list file, OpenSM defaults back to the
|
|
Packit |
13e616 |
Min Hop routing algorithm.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
2. Ranking process - All root switch nodes (found in stage 1) are assigned
|
|
Packit |
13e616 |
a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the
|
|
Packit |
13e616 |
subnet are ranked incrementally. This ranking aids in the process of enforcing
|
|
Packit |
13e616 |
rules that ensure loop-free paths.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from
|
|
Packit |
13e616 |
each (CA or switch) node in the subnet. During the BFS process, the FDB table
|
|
Packit |
13e616 |
of each switch node traversed by BFS is updated, in reference to the starting
|
|
Packit |
13e616 |
node, based on the ranking rules and guid values.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
At the end of the process, the updated FDB tables ensure loop-free paths
|
|
Packit |
13e616 |
through the subnet.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note: Up/Down routing does not allow LID routing communication between
|
|
Packit |
13e616 |
switches that are located inside spine "switch systems".
|
|
Packit |
13e616 |
The reason is that there is no way to allow a LID route between them
|
|
Packit |
13e616 |
that does not break the Up/Down rule.
|
|
Packit |
13e616 |
One ramification of this is that you cannot run SM on switches other
|
|
Packit |
13e616 |
than the leaf switches of the fabric.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
UPDN Algorithm Usage
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Activation through OpenSM
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm.
|
|
Packit |
13e616 |
Use '-a <root_guid_file>' for adding an UPDN guid file that contains the
|
|
Packit |
13e616 |
root nodes for ranking.
|
|
Packit |
13e616 |
If the `-a' option is not used, OpenSM uses its auto-detect root nodes
|
|
Packit |
13e616 |
algorithm.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Notes on the guid list file:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
1. A valid guid file specifies one guid in each line. Lines with an invalid
|
|
Packit |
13e616 |
format will be discarded.
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
2. The user should specify the root switch guids. However, it is also
|
|
Packit |
13e616 |
possible to specify CA guids; OpenSM will use the guid of the switch (if
|
|
Packit |
13e616 |
it exists) that connects the CA to the subnet as a root node.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Purpose of DNUP Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The DNUP algorithm is designed to serve a similar purpose to UPDN. However
|
|
Packit |
13e616 |
it is intended to work in network topologies which are unsuited to
|
|
Packit |
13e616 |
UPDN due to nodes being connected closer to the roots than some of
|
|
Packit |
13e616 |
the switches. An example would be a fabric which contains nodes and
|
|
Packit |
13e616 |
uplinks connected to the same switch. The operation of DNUP is the
|
|
Packit |
13e616 |
same as UPDN with the exception of the ranking process. In DNUP all
|
|
Packit |
13e616 |
switch nodes are ranked based solely on their distance from CA Nodes,
|
|
Packit |
13e616 |
all switch nodes directly connected to at least one CA are assigned a
|
|
Packit |
13e616 |
value of 1 all other switch nodes are assigned a value of one more than
|
|
Packit |
13e616 |
the minimum rank of all neighbor switch nodes.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Fat-tree Routing Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The fat-tree algorithm optimizes routing for "shift" communication pattern.
|
|
Packit |
13e616 |
It should be chosen if a subnet is a symmetrical or almost symmetrical
|
|
Packit |
13e616 |
fat-tree of various types.
|
|
Packit |
13e616 |
It supports not just K-ary-N-Trees, by handling for non-constant K,
|
|
Packit |
13e616 |
cases where not all leafs (CAs) are present, any CBB ratio.
|
|
Packit |
13e616 |
As in UPDN, fat-tree also prevents credit-loop-deadlocks.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
If the root guid file is not provided ('-a' or '--root_guid_file' options),
|
|
Packit |
13e616 |
the topology has to be pure fat-tree that complies with the following rules:
|
|
Packit |
13e616 |
- Tree rank should be between two and eight (inclusively)
|
|
Packit |
13e616 |
- Switches of the same rank should have the same number
|
|
Packit |
13e616 |
of UP-going port groups*, unless they are root switches,
|
|
Packit |
13e616 |
in which case the shouldn't have UP-going ports at all.
|
|
Packit |
13e616 |
- Switches of the same rank should have the same number
|
|
Packit |
13e616 |
of DOWN-going port groups, unless they are leaf switches.
|
|
Packit |
13e616 |
- Switches of the same rank should have the same number
|
|
Packit |
13e616 |
of ports in each UP-going port group.
|
|
Packit |
13e616 |
- Switches of the same rank should have the same number
|
|
Packit |
13e616 |
of ports in each DOWN-going port group.
|
|
Packit |
13e616 |
- All the CAs have to be at the same tree level (rank).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
If the root guid file is provided, the topology doesn't have to be pure
|
|
Packit |
13e616 |
fat-tree, and it should only comply with the following rules:
|
|
Packit |
13e616 |
- Tree rank should be between two and eight (inclusively)
|
|
Packit |
13e616 |
- All the Compute Nodes** have to be at the same tree level (rank).
|
|
Packit |
13e616 |
Note that non-compute node CAs are allowed here to be at different
|
|
Packit |
13e616 |
tree ranks.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
* ports that are connected to the same remote switch are referenced as
|
|
Packit |
13e616 |
\'port group\'.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\'
|
|
Packit |
13e616 |
OpenSM options.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Topologies that do not comply cause a fallback to min hop routing.
|
|
Packit |
13e616 |
Note that this can also occur on link failures which cause the topology
|
|
Packit |
13e616 |
to no longer be "pure" fat-tree.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note that although fat-tree algorithm supports trees with non-integer CBB
|
|
Packit |
13e616 |
ratio, the routing will not be as balanced as in case of integer CBB ratio.
|
|
Packit |
13e616 |
In addition to this, although the algorithm allows leaf switches to have any
|
|
Packit |
13e616 |
number of CAs, the closer the tree is to be fully populated, the more
|
|
Packit |
13e616 |
effective the "shift" communication pattern will be.
|
|
Packit |
13e616 |
In general, even if the root list is provided, the closer the topology to a
|
|
Packit |
13e616 |
pure and symmetrical fat-tree, the more optimal the routing will be.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump)
|
|
Packit |
13e616 |
in the same directory where the OpenSM log resides. This ordering file provides
|
|
Packit |
13e616 |
the CN order that may be used to create efficient communication pattern, that
|
|
Packit |
13e616 |
will match the routing tables.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Routing between non-CN nodes
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The use of the cn_guid_file option allows non-CN nodes to be located on different levels in the fat tree.
|
|
Packit |
13e616 |
In such case, it is not guaranteed that the Fat Tree algorithm will route between two non-CN nodes.
|
|
Packit |
13e616 |
To solve this problem, a list of non-CN nodes can be specified by \'-G\' or \'--io_guid_file\' option.
|
|
Packit |
13e616 |
Theses nodes will be allowed to use switches the wrong way round a specific number of times (specified by \'-H\' or \'--max_reverse_hops\'.
|
|
Packit |
13e616 |
With the proper max_reverse_hops and io_guid_file values, you can ensure full connectivity in the Fat Tree.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Please note that using max_reverse_hops creates routes that use the switch in a counter-stream way.
|
|
Packit |
13e616 |
This option should never be used to connect nodes with high bandwidth traffic between them ! It should only be used
|
|
Packit |
13e616 |
to allow connectivity for HA purposes or similar.
|
|
Packit |
13e616 |
Also having routes the other way around can in theory cause credit loops.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Use these options with extreme care !
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Activation through OpenSM
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Use '-R ftree' option to activate the fat-tree algorithm.
|
|
Packit |
13e616 |
Use '-a <root_guid_file>' to provide root nodes for ranking. If the `-a' option
|
|
Packit |
13e616 |
is not used, routing algorithm will detect roots automatically.
|
|
Packit |
13e616 |
Use '-u <root_cn_file>' to provide the list of compute nodes. If the `-u' option
|
|
Packit |
13e616 |
is not used, all the CAs are considered as compute nodes.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note: LMC > 0 is not supported by fat-tree routing. If this is
|
|
Packit |
13e616 |
specified, the default routing algorithm is invoked instead.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
LASH Routing Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
LASH is an acronym for LAyered SHortest Path Routing. It is a
|
|
Packit |
13e616 |
deterministic shortest path routing algorithm that enables topology
|
|
Packit |
13e616 |
agnostic deadlock-free routing within communication networks.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
When computing the routing function, LASH analyzes the network
|
|
Packit |
13e616 |
topology for the shortest-path routes between all pairs of sources /
|
|
Packit |
13e616 |
destinations and groups these paths into virtual layers in such a way
|
|
Packit |
13e616 |
as to avoid deadlock.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note LASH analyzes routes and ensures deadlock freedom between switch
|
|
Packit |
13e616 |
pairs. The link from HCA between and switch does not need virtual
|
|
Packit |
13e616 |
layers as deadlock will not arise between switch and HCA.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
In more detail, the algorithm works as follows:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
1) LASH determines the shortest-path between all pairs of source /
|
|
Packit |
13e616 |
destination switches. Note, LASH ensures the same SL is used for all
|
|
Packit |
13e616 |
SRC/DST - DST/SRC pairs and there is no guarantee that the return
|
|
Packit |
13e616 |
path for a given DST/SRC will be the reverse of the route SRC/DST.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
2) LASH then begins an SL assignment process where a route is assigned
|
|
Packit |
13e616 |
to a layer (SL) if the addition of that route does not cause deadlock
|
|
Packit |
13e616 |
within that layer. This is achieved by maintaining and analysing a
|
|
Packit |
13e616 |
channel dependency graph for each layer. Once the potential addition
|
|
Packit |
13e616 |
of a path could lead to deadlock, LASH opens a new layer and continues
|
|
Packit |
13e616 |
the process.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
3) Once this stage has been completed, it is highly likely that the
|
|
Packit |
13e616 |
first layers processed will contain more paths than the latter ones.
|
|
Packit |
13e616 |
To better balance the use of layers, LASH moves paths from one layer
|
|
Packit |
13e616 |
to another so that the number of paths in each layer averages out.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note, the implementation of LASH in opensm attempts to use as few layers
|
|
Packit |
13e616 |
as possible. This number can be less than the number of actual layers
|
|
Packit |
13e616 |
available.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
In general LASH is a very flexible algorithm. It can, for example,
|
|
Packit |
13e616 |
reduce to Dimension Order Routing in certain topologies, it is topology
|
|
Packit |
13e616 |
agnostic and fares well in the face of faults.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
It has been shown that for both regular and irregular topologies, LASH
|
|
Packit |
13e616 |
outperforms Up/Down. The reason for this is that LASH distributes the
|
|
Packit |
13e616 |
traffic more evenly through a network, avoiding the bottleneck issues
|
|
Packit |
13e616 |
related to a root node and always routes shortest-path.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The algorithm was developed by Simula Research Laboratory.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Use '-R lash -Q ' option to activate the LASH algorithm.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note: QoS support has to be turned on in order that SL/VL mappings are
|
|
Packit |
13e616 |
used.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note: LMC > 0 is not supported by the LASH routing. If this is
|
|
Packit |
13e616 |
specified, the default routing algorithm is invoked instead.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
For open regular cartesian meshes the DOR algorithm is the ideal
|
|
Packit |
13e616 |
routing algorithm. For toroidal meshes on the other hand there
|
|
Packit |
13e616 |
are routing loops that can cause deadlocks. LASH can be used to
|
|
Packit |
13e616 |
route these cases. The performance of LASH can be improved by
|
|
Packit |
13e616 |
preconditioning the mesh in cases where there are multiple links
|
|
Packit |
13e616 |
connecting switches and also in cases where the switches are not
|
|
Packit |
13e616 |
cabled consistently. An option exists for LASH to do this. To
|
|
Packit |
13e616 |
invoke this use '-R lash -Q --do_mesh_analysis'. This will
|
|
Packit |
13e616 |
add an additional phase that analyses the mesh to try to determine
|
|
Packit |
13e616 |
the dimension and size of a mesh. If it determines that the mesh
|
|
Packit |
13e616 |
looks like an open or closed cartesian mesh it reorders the ports
|
|
Packit |
13e616 |
in dimension order before the rest of the LASH algorithm runs.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
DOR Routing Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The Dimension Order Routing algorithm is based on the Min Hop
|
|
Packit |
13e616 |
algorithm and so uses shortest paths. Instead of spreading traffic
|
|
Packit |
13e616 |
out across different paths with the same shortest distance, it chooses
|
|
Packit |
13e616 |
among the available shortest paths based on an ordering of dimensions.
|
|
Packit |
13e616 |
Each port must be consistently cabled to represent a hypercube
|
|
Packit |
13e616 |
dimension or a mesh dimension. Alternatively, the -O option can be
|
|
Packit |
13e616 |
used to assign a custom mapping between the ports on a given switch,
|
|
Packit |
13e616 |
and the associated dimension. Paths are grown from a destination back
|
|
Packit |
13e616 |
to a source using the lowest dimension (port) of available paths at
|
|
Packit |
13e616 |
each step. This provides the ordering necessary to avoid deadlock.
|
|
Packit |
13e616 |
When there are multiple links between any two switches, they still
|
|
Packit |
13e616 |
represent only one dimension and traffic is balanced across them
|
|
Packit |
13e616 |
unless port equalization is turned off. In the case of hypercubes,
|
|
Packit |
13e616 |
the same port must be used throughout the fabric to represent the
|
|
Packit |
13e616 |
hypercube dimension and match on both ends of the cable, or the -O
|
|
Packit |
13e616 |
option used to accomplish the alignment. In the case of meshes, the
|
|
Packit |
13e616 |
dimension should consistently use the same pair of ports, one port on
|
|
Packit |
13e616 |
one end of the cable, and the other port on the other end, continuing
|
|
Packit |
13e616 |
along the mesh dimension, or the -O option used as an override.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Use '-R dor' option to activate the DOR algorithm.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
DFSSSP and SSSP Routing Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The (Deadlock-Free) Single-Source-Shortest-Path routing algorithm is
|
|
Packit |
13e616 |
designed to optimize link utilization thru global balancing of routes,
|
|
Packit |
13e616 |
while supporting arbitrary topologies. The DFSSSP routing algorithm
|
|
Packit |
13e616 |
uses InfiniBand virtual lanes (SL) to provide deadlock-freedom.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The DFSSSP algorithm consists of five major steps:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
1) It discovers the subnet and models the subnet as a directed
|
|
Packit |
13e616 |
multigraph in which each node represents a node of the physical
|
|
Packit |
13e616 |
network and each edge represents one direction of the full-duplex
|
|
Packit |
13e616 |
links used to connect the nodes.
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
2) A loop, which iterates over all CA and switches of the subnet, will
|
|
Packit |
13e616 |
perform three steps to generate the linear forwarding tables for
|
|
Packit |
13e616 |
each switch:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
2.1) use Dijkstra's algorithm to find the shortest path from all
|
|
Packit |
13e616 |
nodes to the current selected destination;
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
2.2) update the edge weights in the graph, i.e. add the number of
|
|
Packit |
13e616 |
routes, which use a link to reach the destination,
|
|
Packit |
13e616 |
to the link/edge;
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
2.3) update the LFT of each switch with the outgoing port which was
|
|
Packit |
13e616 |
used in the current step to route the traffic to the
|
|
Packit |
13e616 |
destination node.
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
3) After the number of available virtual lanes or layers in the subnet
|
|
Packit |
13e616 |
is detected and a channel dependency graph is initialized for each
|
|
Packit |
13e616 |
layer, the algorithm will put each possible route of the subnet into
|
|
Packit |
13e616 |
the first layer.
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
4) A loop iterates over all channel dependency graphs (CDG) and performs
|
|
Packit |
13e616 |
the following substeps:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
4.1) search for a cycle in the current CDG;
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
4.2) when a cycle is found, i.e. a possible deadlock is present,
|
|
Packit |
13e616 |
one edge is selected and all routes, which induced this edge,
|
|
Packit |
13e616 |
are moved to the "next higher" virtual layer (CDG[i+1]);
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
4.3) the cycle search is continued until all cycles are broken and
|
|
Packit |
13e616 |
routes are moved "up".
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
5) When the number of needed layers does not exceeds the number of
|
|
Packit |
13e616 |
available SL/VL to remove all cycles in all CDGs, the routing is
|
|
Packit |
13e616 |
deadlock-free and an relation table is generated, which contains
|
|
Packit |
13e616 |
the assignment of routes from source to destination to a SL
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note on SSSP:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
This algorithm does not perform the steps 3)-5) and can not be
|
|
Packit |
13e616 |
considered to be deadlock-free for all topologies. But on the one
|
|
Packit |
13e616 |
hand, you can choose this algorithm for really large networks
|
|
Packit |
13e616 |
(5,000+ CAs and deadlock-free by design) to reduce
|
|
Packit |
13e616 |
the runtime of the algorithm. On the other hand, you might use
|
|
Packit |
13e616 |
the SSSP routing algorithm as an alternative, when all deadlock-free
|
|
Packit |
13e616 |
routing algorithms fail to route the network for whatever reason.
|
|
Packit |
13e616 |
In the last case, SSSP was designed to deliver an equal or higher
|
|
Packit |
13e616 |
bandwidth due to better congestion avoidance than the Min Hop
|
|
Packit |
13e616 |
routing algorithm.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Notes for usage:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
a) running DFSSSP: '-R dfsssp -Q'
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
a.1) QoS has to be configured to equally spread the load on the
|
|
Packit |
13e616 |
available SL or virtual lanes
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
a.2) applications must perform a path record query to get path SL for
|
|
Packit |
13e616 |
each route, which the application will use to transmit packages
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
b) running SSSP: '-R sssp'
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
c) both algorithms support LMC > 0
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Hints for optimizing I/O traffic:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
Having more nodes (I/O and compute) connected to a switch than incoming links
|
|
Packit |
13e616 |
can result in a 'bad' routing of the I/O traffic as long as (DF)SSSP routing
|
|
Packit |
13e616 |
is not aware of the dedicated I/O nodes, i.e., in the following network
|
|
Packit |
13e616 |
configuration CN1-CN3 might send all I/O traffic via Link2 to IO1,IO2:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CN1 Link1 IO1
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
\\ /----\\ /
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
CN2 -- Switch1 Switch2 -- CN4
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
/ \\----/ \\
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
CN3 Link2 IO2
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
To prevent this from happening (DF)SSSP can use both the compute node guid
|
|
Packit |
13e616 |
file and the I/O guid file specified by the \'-u\' or \'--cn_guid_file\' and
|
|
Packit |
13e616 |
\'-G\' or \'--io_guid_file\' options (similar to the Fat-Tree routing).
|
|
Packit |
13e616 |
This ensures that traffic towards compute nodes and I/O nodes is balanced
|
|
Packit |
13e616 |
separately and therefore distributed as much as possible across the available
|
|
Packit |
13e616 |
links. Port GUIDs, as listed by ibstat, must be specified (not Node GUIDs).
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
The priority for the optimization is as follows:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
compute nodes -> I/O nodes -> other nodes
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
Possible use case scenarios:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
a) neither \'-u\' nor \'-G\' are specified: all nodes a treated as \'other nodes\'
|
|
Packit |
13e616 |
and therefore balanced equally;
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
b) \'-G\' is specified: traffic towards I/O nodes will be balanced optimally;
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
c) the system has three node types, such as login/admin, compute and I/O,
|
|
Packit |
13e616 |
but the balancing focus should be I/O, then one has to use \'-u\' and \'-G\'
|
|
Packit |
13e616 |
with I/O guids listed in cn_guid_file and compute node guids listed in
|
|
Packit |
13e616 |
io_guid_file;
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
d) ...
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Torus-2QoS Routing Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Torus-2QoS is routing algorithm designed for large-scale 2D/3D torus fabrics;
|
|
Packit |
13e616 |
see torus-2QoS(8) for full documentation.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Use '-R torus-2QoS -Q' or '-R torus-2QoS,no_fallback -Q'
|
|
Packit |
13e616 |
to activate the torus-2QoS algorithm.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Nue Routing Algorithm
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Use either `-R nue' or `-R nue -Q --nue_max_num_vls <int>' to activate Nue.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note: if `--nue_max_num_vls' is specified and unequal to 1, then QoS support
|
|
Packit |
13e616 |
must be turned on, so that SL2VL mappings are valid and applications comply with
|
|
Packit |
13e616 |
suggested SLs to avoid credit-loops. For more details on QoS and Nue see below.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The implementation of Nue routing for OpenSM is a 100%-applicable, balanced, and
|
|
Packit |
13e616 |
deadlock-free unicast routing engine (which also configures multicast tables,
|
|
Packit |
13e616 |
see 'Note on multicast' below). The key points of this algorithm are the
|
|
Packit |
13e616 |
following:
|
|
Packit |
13e616 |
- 100% fault-tolerant, oblivious routing strategy
|
|
Packit |
13e616 |
- topology-agnostic, i.e., applicable to every topology (no matter if topology
|
|
Packit |
13e616 |
is regular, irregular after faults, or random)
|
|
Packit |
13e616 |
- 100% deadlock-free routing within the resource limits (i.e., it never
|
|
Packit |
13e616 |
exceeds the given number of available virtual lanes, and it does not
|
|
Packit |
13e616 |
necessarily require virtual lanes) for every topology
|
|
Packit |
13e616 |
- very good path balancing and therefore high throughput (even better when
|
|
Packit |
13e616 |
using METIS, see notes below)
|
|
Packit |
13e616 |
- QoS (via SLs/VLs) + deadlock-freedom can be combined (since both rely on
|
|
Packit |
13e616 |
VLs), e.g., using VL0-3 for Nue's deadlock-freedom (and 1. QoS level) and
|
|
Packit |
13e616 |
VL4-7 as second QoS level
|
|
Packit |
13e616 |
- forwarding tables are fast to calculate: O(n^2 * log n), however slightly
|
|
Packit |
13e616 |
slower compared to topology-aware routings (for obvious reasons), and
|
|
Packit |
13e616 |
- the path-to-VL mapping only depends on the destination, which may be useful
|
|
Packit |
13e616 |
for scalable, efficient path resolution and caching mechanisms.
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
From a very high level perspective, Nue routing is similar to DFSSSP (see above)
|
|
Packit |
13e616 |
in the sense that both use Dijkstra and edge weight updates for path balancing,
|
|
Packit |
13e616 |
and paths are mapped to virtual layers assuming a 1:1 mapping of SL2VL tables.
|
|
Packit |
13e616 |
However, the fundamental difference is that Nue routing doesn't perform the path
|
|
Packit |
13e616 |
calculation on the graph representing the real fabric, and instead routes
|
|
Packit |
13e616 |
directly within the channel dependency graph. This approach allows Nue routing
|
|
Packit |
13e616 |
to place routing restrictions (to avoid any credit-loops) in an on-demand
|
|
Packit |
13e616 |
manner, which overcomes the problem of all other good VL-based algorithms.
|
|
Packit |
13e616 |
Meaning, the competitors cannot control or limit the use of VLs, and might run
|
|
Packit |
13e616 |
out of them and have to give up. On the flip side, Nue may have to use detours
|
|
Packit |
13e616 |
for a few routes, and hence cannot really be considered "shortest-path" routing,
|
|
Packit |
13e616 |
because it is impossible to accomplish deadlock-free, shortest-path routing with
|
|
Packit |
13e616 |
an limited number of available virtual lanes for arbitrary network topologies.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note on the use of METIS library with Nue:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
Nue routing may has to separate the LIDs into multiple subsets, one for every
|
|
Packit |
13e616 |
virtual layer, if multiple layers are used. Nue has two options to perform this
|
|
Packit |
13e616 |
partitioning (not to be confused with IB partitions); the first is a fairly
|
|
Packit |
13e616 |
simple semi-random assignment of LIDs to layers/subsets, and the second
|
|
Packit |
13e616 |
partitioning uses the METIS library to partition the network graph into k
|
|
Packit |
13e616 |
approximately equal sized parts. The latter approach has shown better results
|
|
Packit |
13e616 |
in terms of path balancing and avoidance of using fallback paths, and hence
|
|
Packit |
13e616 |
it is HIGHLY advised to install/use the METIS library with OpenSM (enforced
|
|
Packit |
13e616 |
via `--enable-metis' configure flag when building OpenSM). For the rare case,
|
|
Packit |
13e616 |
that METIS isn't packaged with the Linux distro, here is a link to the official
|
|
Packit |
13e616 |
website to download and install METIS 5.1.0 manually:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
http://glaros.dtc.umn.edu/gkhome/metis/metis/overview
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
OpenSM's configure script also provides options in case METIS header and library
|
|
Packit |
13e616 |
aren't found in the default path.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Runtime options for Nue:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
The behavior of Nue routing can be directly influenced by the osm.conf parameter
|
|
Packit |
13e616 |
(which is also available as command line option):
|
|
Packit |
13e616 |
- nue_max_num_vls: controls/limits the number of virtual lanes/layers which
|
|
Packit |
13e616 |
Nue is allowed to use (detailed explanation in osm.conf file).
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
Furthermore, Nue supports TRUE and FALSE settings of avoid_throttled_links,
|
|
Packit |
13e616 |
use_ucast_cache, and qos (more on this hereafter); and lmc > 0.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Notes on Quality of Service (QoS):
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
The advantage of Nue is that it works with AND without QoS being enabled, i.e.,
|
|
Packit |
13e616 |
the usage of SLs/VLs for deadlock-freedom can be avoided. Here are the three
|
|
Packit |
13e616 |
possible usage scenarios:
|
|
Packit |
13e616 |
- neither setting `--nue_max_num_vls <int>' nor `-Q': Nue assumes that only 1
|
|
Packit |
13e616 |
virtual layer (identical to physical network; or OperVLs equal to VL0) is
|
|
Packit |
13e616 |
usable and all paths are to be calculated within this one layer. Hence,
|
|
Packit |
13e616 |
there is no need for special SL2VL mappings in the network and the use of
|
|
Packit |
13e616 |
specific SLs by applications.
|
|
Packit |
13e616 |
- setting `-Q' but not `--nue_max_num_vls <int>': This combination works like
|
|
Packit |
13e616 |
the previous one, meaning the SL returned for path record requests is not
|
|
Packit |
13e616 |
defined by Nue, since all paths are deadlock-free without using VLs.
|
|
Packit |
13e616 |
However, any separate QoS settings may influence the SL returned to
|
|
Packit |
13e616 |
applications.
|
|
Packit |
13e616 |
- setting `-Q --nue_max_num_vls <int>' with int != 1: In this configuration,
|
|
Packit |
13e616 |
applications have to query and obey the SL for path records as returned
|
|
Packit |
13e616 |
by Nue because otherwise the deadlock-freedom cannot be guaranteed
|
|
Packit |
13e616 |
anymore. Furthermore, errors in the fabric may require applications to
|
|
Packit |
13e616 |
repath to avoid message deadlocks. Since Nue operates on virtual layer,
|
|
Packit |
13e616 |
admins should configure the SL2VL mapping tables in an homogeneous 1:1
|
|
Packit |
13e616 |
manner across the entire subnet to separate the layers.
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
As an additional note, using more VLs for Nue usually improves the overall
|
|
Packit |
13e616 |
network throughput, so there are trade offs admins may have to consider when
|
|
Packit |
13e616 |
configuring the subnet manager with Nue routing.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Note on multicast:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
The Nue routing engine configures multicast forwarding tables by utilizing a
|
|
Packit |
13e616 |
spanning tree calculation routed at a subnet switch suggested by OpenSM. This
|
|
Packit |
13e616 |
spanning tree for a mcast group will try to use the least overloaded links
|
|
Packit |
13e616 |
(w.r.t the ucast paths-per-link metric/weight) in the fabric. However, Nue
|
|
Packit |
13e616 |
routing currently does not guarantee deadlock-freedom for the set of multicast
|
|
Packit |
13e616 |
routes on all topologies, nor for the combination of deadlock-free unicast
|
|
Packit |
13e616 |
routes with additional multicast routes. Assuming, for a given topology the
|
|
Packit |
13e616 |
calculated mcast routes are dl-free, then an admin may fix the latter problem
|
|
Packit |
13e616 |
by separating the VLs, e.g., using VL0-6 for unicast routing by specifying
|
|
Packit |
13e616 |
`--nue_max_num_vls 7' and utilizing VL7 for multicast.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Routing References
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
To learn more about deadlock-free routing, see the article
|
|
Packit |
13e616 |
"Deadlock Free Message Routing in Multiprocessor Interconnection Networks"
|
|
Packit |
13e616 |
by William J Dally and Charles L Seitz (1985).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
To learn more about the up/down algorithm, see the article
|
|
Packit |
13e616 |
"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks"
|
|
Packit |
13e616 |
by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the
|
|
Packit |
13e616 |
Universidad Politecnica de Valencia.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
To learn more about LASH and the flexibility behind it, the requirement
|
|
Packit |
13e616 |
for layers, performance comparisons to other algorithms, see the
|
|
Packit |
13e616 |
following articles:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
"Layered Routing in Irregular Networks", Lysne et al, IEEE
|
|
Packit |
13e616 |
Transactions on Parallel and Distributed Systems, VOL.16, No12,
|
|
Packit |
13e616 |
December 2005.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
"Routing for the ASI Fabric Manager", Solheim et al. IEEE
|
|
Packit |
13e616 |
Communications Magazine, Vol.44, No.7, July 2006.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
"Layered Shortest Path (LASH) Routing in Irregular System Area
|
|
Packit |
13e616 |
Networks", Skeie et al. IEEE Computer Society Communication
|
|
Packit |
13e616 |
Architecture for Clusters 2002.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
To learn more about the DFSSSP and SSSP routing algorithm,
|
|
Packit |
13e616 |
see the articles:
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
J. Domke, T. Hoefler and W. Nagel: Deadlock-Free Oblivious Routing
|
|
Packit |
13e616 |
for Arbitrary Topologies, In Proceedings of the 25th IEEE International
|
|
Packit |
13e616 |
Parallel & Distributed Processing Symposium (IPDPS 2011)
|
|
Packit |
13e616 |
.br
|
|
Packit |
13e616 |
T. Hoefler, T. Schneider and A. Lumsdaine: Optimized Routing for
|
|
Packit |
13e616 |
Large-Scale InfiniBand Networks, In 17th Annual IEEE Symposium on High
|
|
Packit |
13e616 |
Performance Interconnects (HOTI 2009)
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
To learn more about the Nue routing algorithm, see the article "Routing on the
|
|
Packit |
13e616 |
Dependency Graph: A New Approach to Deadlock-Free High-Performance Routing" by
|
|
Packit |
13e616 |
J. Domke, T. Hoefler and S. Matsuoka (published in HPDC'16).
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Modular Routing Engine
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Modular routing engine structure allows for the ease of
|
|
Packit |
13e616 |
"plugging" new routing modules.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Currently, only unicast callbacks are supported. Multicast
|
|
Packit |
13e616 |
can be added later.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
One existing routing module is up-down "updn", which may be
|
|
Packit |
13e616 |
activated with '-R updn' option (instead of old '-u').
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
General usage is:
|
|
Packit |
13e616 |
$ opensm -R 'module-name'
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
There is also a trivial routing module which is able
|
|
Packit |
13e616 |
to load LFT tables from a file.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Main features:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
- this will load switch LFTs and/or LID matrices (min hops tables)
|
|
Packit |
13e616 |
- this will load switch LFTs according to the path entries introduced
|
|
Packit |
13e616 |
in the file
|
|
Packit |
13e616 |
- no additional checks will be performed (such as "is port connected",
|
|
Packit |
13e616 |
etc.)
|
|
Packit |
13e616 |
- in case when fabric LIDs were changed this will try to reconstruct
|
|
Packit |
13e616 |
LFTs correctly if endport GUIDs are represented in the file
|
|
Packit |
13e616 |
(in order to disable this, GUIDs may be removed from the file
|
|
Packit |
13e616 |
or zeroed)
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The file format is compatible with output of 'ibroute' util and for
|
|
Packit |
13e616 |
whole fabric can be generated with dump_lfts.sh script.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
To activate file based routing module, use:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
opensm -R file -U /path/to/lfts_file
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
If the lfts_file is not found or is in error, the default routing
|
|
Packit |
13e616 |
algorithm is utilized.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The ability to dump switch lid matrices (aka min hops tables) to file and
|
|
Packit |
13e616 |
later to load these is also supported.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The usage is similar to unicast forwarding tables loading from a lfts
|
|
Packit |
13e616 |
file (introduced by 'file' routing engine), but new lid matrix file
|
|
Packit |
13e616 |
name should be specified by -M or --lid_matrix_file option. For example:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
opensm -R file -M ./opensm-lid-matrix.dump
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The dump file is named \'opensm-lid-matrix.dump\' and will be generated
|
|
Packit |
13e616 |
in standard opensm dump directory (/var/log by default) when
|
|
Packit |
13e616 |
OSM_LOG_ROUTING logging flag is set.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
When routing engine 'file' is activated, but the lfts file is not specified
|
|
Packit |
13e616 |
or not cannot be open default lid matrix algorithm will be used.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
There is also a switch forwarding tables dumper which generates
|
|
Packit |
13e616 |
a file compatible with dump_lfts.sh output. This file can be used
|
|
Packit |
13e616 |
as input for forwarding tables loading by 'file' routing engine.
|
|
Packit |
13e616 |
Both or one of options -U and -M can be specified together with \'-R file\'.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH PER MODULE LOGGING CONFIGURATION
|
|
Packit |
13e616 |
.PP
|
|
Packit |
13e616 |
To enable per module logging, configure per_module_logging_file to
|
|
Packit |
13e616 |
the per module logging config file name in the opensm options
|
|
Packit |
13e616 |
file. To disable, configure per_module_logging_file to (null)
|
|
Packit |
13e616 |
there.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
The per module logging config file format is a set of lines with module
|
|
Packit |
13e616 |
name and logging level as follows:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
<module name><separator><logging level>
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
<module name> is the file name including .c
|
|
Packit |
13e616 |
<separator> is either = , space, or tab
|
|
Packit |
13e616 |
<logging level> is the same levels as used in the coarse/overall
|
|
Packit |
13e616 |
logging as follows:
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
BIT LOG LEVEL ENABLED
|
|
Packit |
13e616 |
---- -----------------
|
|
Packit |
13e616 |
0x01 - ERROR (error messages)
|
|
Packit |
13e616 |
0x02 - INFO (basic messages, low volume)
|
|
Packit |
13e616 |
0x04 - VERBOSE (interesting stuff, moderate volume)
|
|
Packit |
13e616 |
0x08 - DEBUG (diagnostic, high volume)
|
|
Packit |
13e616 |
0x10 - FUNCS (function entry/exit, very high volume)
|
|
Packit |
13e616 |
0x20 - FRAMES (dumps all SMP and GMP frames)
|
|
Packit |
13e616 |
0x40 - ROUTING (dump FDB routing information)
|
|
Packit |
13e616 |
0x80 - SYS (syslog at LOG_INFO level in addition to OpenSM logging)
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH FILES
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.B @OPENSM_CONFIG_DIR@/@OPENSM_CONFIG_FILE@
|
|
Packit |
13e616 |
default OpenSM config file.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.B @OPENSM_CONFIG_DIR@/@NODENAMEMAPFILE@
|
|
Packit |
13e616 |
default node name map file. See ibnetdiscover for more information on format.
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.B @OPENSM_CONFIG_DIR@/@PARTITION_CONFIG_FILE@
|
|
Packit |
13e616 |
default partition config file
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.B @OPENSM_CONFIG_DIR@/@QOS_POLICY_FILE@
|
|
Packit |
13e616 |
default QOS policy config file
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.B @OPENSM_CONFIG_DIR@/@PREFIX_ROUTES_FILE@
|
|
Packit |
13e616 |
default prefix routes file
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.B @OPENSM_CONFIG_DIR@/@PER_MOD_LOGGING_FILE@
|
|
Packit |
13e616 |
default per module logging config file
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
.B @OPENSM_CONFIG_DIR@/@TORUS2QOS_CONF_FILE@
|
|
Packit |
13e616 |
default torus-2QoS config file
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH AUTHORS
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
Hal Rosenstock
|
|
Packit |
13e616 |
.RI < hal@mellanox.com >
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
Sasha Khapyorsky
|
|
Packit |
13e616 |
.RI < sashak@voltaire.com >
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
Eitan Zahavi
|
|
Packit |
13e616 |
.RI < eitan@mellanox.co.il >
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
Yevgeny Kliteynik
|
|
Packit |
13e616 |
.RI < kliteyn@mellanox.co.il >
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
Thomas Sodring
|
|
Packit |
13e616 |
.RI < tsodring@simula.no >
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
Ira Weiny
|
|
Packit |
13e616 |
.RI < weiny2@llnl.gov >
|
|
Packit |
13e616 |
.TP
|
|
Packit |
13e616 |
Dale Purdy
|
|
Packit |
13e616 |
.RI < purdy@sgi.com >
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
.SH SEE ALSO
|
|
Packit |
13e616 |
torus-2QoS(8), torus-2QoS.conf(5).
|