diff --git a/SPECS/ibdev2netdev b/SPECS/ibdev2netdev new file mode 100644 index 0000000..440022d --- /dev/null +++ b/SPECS/ibdev2netdev @@ -0,0 +1,199 @@ +#! /bin/bash + +usage() +{ + echo "$(basename $0) " + echo "-h, --help print help message" + echo "-v, --verbose print more info" +} + +case $1 in + "-h" | "--help") + usage + exit 0 + ;; +esac + +if (( $# > 1 )); then + usage + exit -1 +fi + +if (( $# == 1 )) && [ "$1" != "-v" ]; then + usage + exit -1 +fi + +ibdevs=$(ls /sys/class/infiniband/) + +pushd /sys/class/net > /dev/null +devs=$(ls -d e* i* 2> /dev/null) +popd > /dev/null +if [ "x$devs" == "x" ]; then + # no relevant devices - quit immediately + exit +fi + +for d in $devs; do + if [ -f /sys/class/net/$d/dev_id ]; then + oldstyle=n + break + fi +done + +function print_line() +{ + echo "$1 port $2 <===> $3" +} + +function find_guid() +{ + ibdevs=$(ls /sys/class/infiniband/) + for ibdev in $ibdevs; do + ports=$(ls /sys/class/infiniband/$ibdev/ports/) + for port in $ports; do + gids=$(ls /sys/class/infiniband/$ibdev/ports/$port/gids) + for gid in $gids; do + pguid=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g') + if [ x$pguid == x$1 ]; then + print_line $ibdev $port $2 + fi + done + done + done +} + +function find_mac() +{ + ibdevs=$(ls /sys/class/infiniband/) + for ibdev in $ibdevs; do + type=$(cat /sys/class/infiniband/$ibdev/node_type|cut -d ' ' -f 2) + ports=$(ls /sys/class/infiniband/$ibdev/ports/) + for port in $ports; do + gids=$(ls /sys/class/infiniband/$ibdev/ports/$port/gids) + for gid in $gids; do + if [[ "$type" = "RNIC" ]]; then + pmac=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 1-14|sed -e 's/://g') + else + first=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21-22) + first=$(( first ^ 2 )) + first=$(printf "%02x" $first) + second=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g' | cut -b 3-6) + third=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g' | cut -b 11-) + pmac=$first$second$third + fi + if [ x$pmac == x$1 ]; then + print_line $ibdev $port $2 + fi + done + done + done +} + +if [ "x$oldstyle" == "xn" ]; then + for d in $ibdevs; do + ibrsc=$(cat /sys/class/infiniband/$d/device/resource) + eths=$(ls /sys/class/net/) + for eth in $eths; do + filepath_resource=/sys/class/net/$eth/device/resource + + if [ -f $filepath_resource ]; then + ethrsc=$(cat $filepath_resource) + if [ "x$ethrsc" == "x$ibrsc" ]; then + filepath_devid=/sys/class/net/$eth/dev_id + filepath_devport=/sys/class/net/$eth/dev_port + if [ -f $filepath_devid ]; then + port1=0 + if [ -f $filepath_devport ]; then + port1=$(cat $filepath_devport) + port1=$(printf "%d" $port1) + fi + + port=$(cat $filepath_devid) + port=$(printf "%d" $port) + if [ $port1 -gt $port ]; then + port=$port1 + fi + + port=$(( port + 1 )) + + filepath_carrier=/sys/class/net/$eth/carrier + + if [ -f $filepath_carrier ]; then + link_state=$(cat $filepath_carrier 2> /dev/null) + if (( link_state == 1 )); then + link_state="Up" + else + link_state="Down" + fi + else + link_state="NA" + fi + + if [ "$1" == "-v" ]; then + filepath_portstate=/sys/class/infiniband/$d/ports/$port/state + filepath_deviceid=/sys/class/infiniband/$d/device/device + filepath_fwver=/sys/class/infiniband/$d/fw_ver + filepath_vpd=/sys/class/infiniband/$d/device/vpd + + # read port state + if [ -f $filepath_portstate ]; then + ibstate=$(printf "%-6s" "$(cat $filepath_portstate | gawk '{print $2}')") + else + ibstate="na" + fi + + # read device + if [ -f $filepath_deviceid ]; then + devid=$(printf "mt%d" "$(cat $filepath_deviceid)") + else + devid="na" + fi + + # read fw version + if [ -f $filepath_fwver ]; then + fwver=$(cat $filepath_fwver) + else + fwver="na" + fi + + # read device description and part id from the vpd + if [ -f $filepath_vpd ]; then + tmp=$ifs + ifs=":" + vpd_content=`cat $filepath_vpd | tr -d '\0'` + devdesc=$(printf "%-15s" "$(echo $vpd_content | strings | head -1)") + partid=$(printf "%-11s" "$(echo $vpd_content | strings | head -4 | tail -1 | gawk '{print $1}')") + ifs=$tmp + else + devdesc="" + partid="na" + fi + + echo "$d ($devid - $partid) $devdesc fw $fwver port $port ($ibstate) ==> $eth ($link_state)" + else + echo "$d port $port ==> $eth ($link_state)" + fi + fi + fi + fi + done + done +else +########################## +### old style +########################## + ifcs=$(ifconfig -a | egrep '^eth|^ib' | gawk '{print $1}') + + for ifc in $ifcs; do + len=$(cat /sys/class/net/$ifc/addr_len) + if (( len == 20 )); then + guid=$(cat /sys/class/net/$ifc/address | cut -b 37- | sed -e 's/://g') + find_guid $guid $ifc + elif (( len == 6)); then + mac=$(cat /sys/class/net/$ifc/address | sed -e 's/://g') + find_mac $mac $ifc + fi + done +fi + diff --git a/SPECS/rxe_cfg b/SPECS/rxe_cfg new file mode 100755 index 0000000..d74b253 --- /dev/null +++ b/SPECS/rxe_cfg @@ -0,0 +1,677 @@ +#!/usr/bin/perl + +# * Copyright (c) 2009-2011 Mellanox Technologies Ltd. All rights reserved. +# * Copyright (c) 2009-2011 System Fabric Works, Inc. All rights reserved. +# * +# * This software is available to you under a choice of one of two +# * licenses. You may choose to be licensed under the terms of the GNU +# * General Public License (GPL) Version 2, available from the file +# * COPYING in the main directory of this source tree, or the +# * OpenIB.org BSD license below: +# * +# * Redistribution and use in source and binary forms, with or +# * without modification, are permitted provided that the following +# * conditions are met: +# * +# * - Redistributions of source code must retain the above +# * copyright notice, this list of conditions and the following +# * disclaimer. +# * +# * - Redistributions in binary form must reproduce the above +# * copyright notice, this list of conditions and the following +# * disclaimer in the documentation and/or other materials +# * provided with the distribution. +# * +# * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# * SOFTWARE. +# + +use warnings; +use strict; + +use File::Basename; +use File::Path qw(make_path); +use Getopt::Long; + +my $help = 0; +my $no_persist = 0; +my $debug = 0; +my $force = 0; +my $linkonly = 0; +my $parms = "/sys/module/rdma_rxe/parameters"; +my $modprobe_opt = ""; +my $modprobe_checked = "0"; +my $persistence_path = "/var/lib/rxe"; +my $persistence_file = "${persistence_path}/rxe"; +my $num_persistent = 0; +my $sys = "/sys/module/rdma_rxe/parameters"; +my %rxe_names; +my @rxe_array; +my %eth_names; +my @eth_list; +my %eth_driver; +my %link_state; +my %link_speed; +my %eth_mtu; +my %ipv4_addr; +my %rxe_mtu; +my @persistence_array; +my %persistence_hash; +my @mlx4_port; +my @mlx4_ether; +my @roce_list; + +# Read a file and return its contents as a string. +sub read_file { + my $filename = shift; + my $result = ""; + + if (open(FILE, $filename)) { + $result = ; + close FILE; + } + return $result; +} + +#get mapping between rxe and eth devices +sub get_names { + my $i = 0; + + foreach my $rxe (glob("/sys/class/infiniband/rxe*")) { + $rxe = basename($rxe); + my $eth = read_file("/sys/class/infiniband/$rxe/parent"); + chomp($eth); + + if (($eth =~ /[\w]+[\d]/) + && ($rxe =~ /rxe[0123456789]/)) { + + # hash ethername to rxename + $rxe_names{$eth} = $rxe; + $rxe_array[$i++] = $rxe; + + # hash rxename to ethername + $eth_names{$rxe} = $eth; + } + } +} + +# get list of Mellanox RoCE ports +sub get_mlx4_list { + my $i = 0; + + foreach my $mlx4 (glob("/sys/class/infiniband/mlx4_*")) { + $mlx4 = basename($mlx4); + foreach my $port (glob("/sys/class/infiniband/$mlx4/ports/*")) { + $port = basename($port); + my $link = read_file("$port/link_layer"); + chomp($link); + + if ($link =~ "Ethernet") { + $roce_list[$i++] = "$mlx4:$port"; + } + } + } +} + +#collect per device information +sub get_dev_info { + my @list; + my @fields; + my @lines; + my $line; + my $eth; + my $drv; + my $np; + my $i = 0; + my $j = 0; + + get_mlx4_list(); + + my @my_eth_list = (); + foreach my $my_eth_dev (glob("/sys/class/net/*")) { + $my_eth_dev = basename($my_eth_dev); + if ($my_eth_dev ne "bonding_masters"){ + my $my_dev_type = read_file("/sys/class/net/${my_eth_dev}/type"); + chomp($my_dev_type); + if ($my_dev_type == "1") { + push(@my_eth_list, "$my_eth_dev"); + } + } + } + + @list = @my_eth_list; + foreach $eth (@list) { + chomp($eth); + + $eth_list[$i++] = $eth; + + @lines = `ethtool -i $eth`; + foreach $line (@lines) { + chomp($line); + + @fields = split(/\s+/, $line); + chomp($fields[0]); + + if ($fields[0] =~ /driver:/) { + $drv = $fields[1]; + $eth_driver{$eth} = $drv; + + if ($drv =~ /mlx4_en/ && scalar(@roce_list) > 0 ) { + $eth_names{$roce_list[$j++]} = $eth; + } + } + } + + # get link status + $link_state{$eth} = ""; + $link_speed{$eth} = ""; + + @lines = `ethtool $eth`; + foreach $line (@lines) { + chomp($line); + + @fields = split(/:/, $line); + if (defined($fields[1])) { + $fields[1] =~ s/^\s+//g; + if ($fields[0] =~ "Link detected") { + $link_state{$eth} = $fields[1]; + } + } + elsif ($line =~ "10000baseT") { + $link_speed{$eth} = "10GigE"; + } + } + + $ipv4_addr{$eth} = " "; + $eth_mtu{$eth} = ""; + + @lines = `ip addr show $eth`; + foreach $line (@lines) { + # get IP address + if ($line =~ /inet /) { + $line =~ s/^\s+inet ([0-9.]+)\//$1 /g; + @fields = split(/\s+/, $line); + $ipv4_addr{$eth} = $fields[0]; + } + + # get ethernet mtu + if ($line =~ /mtu /) { + $line =~ s/^.*mtu //g; + @fields = split(/\s+/, $line); + $eth_mtu{$eth} = $fields[0]; + } + } + } + + # get rxe mtu + foreach my $rxe (@rxe_array) { + + @lines = `ibv_devinfo -d $rxe`; + foreach $line (@lines) { + if ($line =~ "active_mtu") { + $line =~ s/^\s+active_mtu:\s+//g; + chomp($line); + + $rxe_mtu{$rxe} = $line; + } + } + $rxe_mtu{$rxe} = "(?)" if (!$rxe_mtu{$rxe}); + } +} + +# return string or the string "###" if string is all whitespace +sub set_field { + my $fld = $_[0]; + + if (defined($fld) && $fld =~ /\S/) { + return $fld; + } else { + return "###"; + } +} + +# format status output into fixed width columns +sub status_print { + my @fields; + my $field; + my @flen = (); + my $num_fields = 0; + my $i; + my $pad; + my $line; + + # one pass to size the columns + foreach $line (@_) { + @fields = split(/\s+/, $line); + $i = 0; + foreach $field (@fields) { + if (!defined($flen[$i])) { + $flen[$i] = length($field); + } + else { + $flen[$i] = max($flen[$i], length($field)); + } + $i++; + } + + if ($i > $num_fields) { + $num_fields = $i; + } + } + + # one pass to print + foreach $line (@_) { + print " "; + @fields = split(/\s+/, $line); + for ($i = 0; $i < $num_fields; $i++) { + if (defined($fields[$i])) { + $pad = $flen[$i] - length($fields[$i]) + 2; + } + else { + $pad = $flen[$i] + 2; + } + if (defined($fields[$i]) && ($fields[$i] ne "###")) { + print "$fields[$i]"; + } + else { + print " "; + } + printf("%*s", $pad, ""); + } + print "\n"; + } +} + +# check driver load status +sub check_module_status { + if (-e $sys) { + return 0; + } else { + return 1; + } +} + +# print driver load status and ethertype for rdma_rxe and rdma_rxe_net +sub show_module_status { + print "rdma_rxe module not loaded\n" if (!(-e $sys)); +} + +# print rxe status +sub do_status { + my $instance = $_[0]; + my $ln = 0; + my @outp; + my $rxe; + my $rmtu; + + get_names(); + get_dev_info(); + show_module_status(); + + $outp[$ln++] = "Name\tLink\tDriver\t\tSpeed\tNMTU\tIPv4_addr\tRDEV\tRMTU"; + + foreach my $eth (@eth_list) { + + # handle case where rxe_drivers are not loaded + if (defined($rxe_names{$eth})) { + $rxe = $rxe_names{$eth}; + $rmtu = $rxe_mtu{$rxe}; + } + else { + $rxe = ""; + $rmtu = ""; + } + + if ((!defined($instance) + && (($linkonly == 0) || ($link_state{$eth} =~ "yes"))) + || (defined($instance) && ($rxe =~ "$instance"))) { + $outp[$ln] = set_field("$eth"); + $outp[$ln] .= "\t"; + $outp[$ln] .= set_field("$link_state{$eth}"); + $outp[$ln] .= "\t"; + $outp[$ln] .= set_field(exists($eth_driver{$eth}) ? $eth_driver{$eth} : ""); + $outp[$ln] .= "\t"; + $outp[$ln] .= set_field("$link_speed{$eth}"); + $outp[$ln] .= "\t"; + $outp[$ln] .= set_field("$eth_mtu{$eth}"); + $outp[$ln] .= "\t"; + $outp[$ln] .= set_field("$ipv4_addr{$eth}"); + $outp[$ln] .= "\t"; + $outp[$ln] .= set_field("$rxe"); + $outp[$ln] .= "\t"; + $outp[$ln] .= set_field("$rmtu"); + $ln++; + } + } + + status_print(@outp); +} + +# read file containing list of ethernet devices into a list +sub populate_persistence { + my $i = 0; + + open FILE, $persistence_file; + while() { + my $line = $_; + chomp($line); + $line =~ s/^\s+//g; + if ($line =~ /[\w]+[\d]/) { + # in case we add fields later + my ($eth, $cruft) = split(/\s+/, $line, 2); + if ($eth =~ /^[\w]+[\d]/) { + $persistence_array[$i] = $eth; + $persistence_hash{$eth} = $i++; + } + } + } + close FILE; + + $num_persistent = $i; +} + +# print out list of ethernet devices to file +sub commit_persistent { + my $i; + my $eth; + + open(PF, ">$persistence_file"); + + for ($i = 0; $i < $num_persistent; $i++) { + $eth = $persistence_array[$i]; + if ($eth =~ /[\w]+[\d]/) { + print(PF "$persistence_array[$i]\n"); + } + } + + close(PF); +} + +sub delete_persistent { + my $eth = $_[0]; + + if (defined($persistence_hash{$eth})) { + $persistence_array[$persistence_hash{$eth}] = ""; + } +} + +sub add_persistent { + my $eth = $_[0]; + + # Is this one already in the persistence list? + if (!defined($persistence_hash{$eth})) { + $persistence_array[$num_persistent] = $eth; + $persistence_hash{$eth} = $num_persistent; + $num_persistent++; + } +} + +# add new rxe device to eth if not already up +sub rxe_add { + my $eth = $_[0]; + + if (!($eth =~ /[\w]+[\d]/)) { + print "eth_name ($eth) looks bogus\n"; + return; + } + + if (!defined($rxe_names{$eth})) { + system("echo '$eth' > $parms/add"); + } + if (!$no_persist) { + add_persistent($eth); + commit_persistent(); + } +} + +sub rxe_remove { + my $arg2 = $_[0]; + my $rxe; + my $eth; + + print "remove $arg2\n" if ($debug > 0); + + if ($arg2 =~ /[\w]+[\d]/) { + $eth = $arg2; + $rxe = $rxe_names{$eth}; + } + elsif ($arg2 =~ /rxe[0123456789]/) { + $rxe = $arg2; + $eth = $eth_names{$rxe}; + } + elsif ($arg2 eq "all") { + $rxe = "all"; + } + + if (($rxe eq "all") || ($rxe =~ /^rxe[0123456789]/)) { + my $cmd = "echo '$rxe' > $parms/remove"; + #print "$cmd\n"; + system($cmd); + if (!$no_persist) { + if ($rxe eq "all") { + unlink($persistence_file); + } + elsif ($eth =~/[\w]+[\d]/) { + delete_persistent($eth); + commit_persistent(); + } + else { + print "Warning: Unable to resolve ethname; " + . "instance may persist on restart\n"; + } + } + } + else { + print "rxe instance $rxe not found\n"; + } +} + +sub get_devinfo { + my $rxe = $_[0]; + + my $cmd = "ibv_devinfo -d $rxe"; + return `$cmd`; +} + +# allow unsupported modules to load in SLES11 if allowed +sub modprobe { + my $module = $_[0]; + my $opts = $_[1]; + my @lines; + my $line; + + if ($modprobe_checked == "0") { + @lines = `modprobe -c`; + foreach $line (@lines) { + if ($line =~ /^allow_unsupported_modules *0/) { + $modprobe_opt = " --allow-unsupported-modules "; + last; + } + } + $modprobe_checked = "1"; + } + + if (!defined($opts)) { + $opts = ""; + } + + system("modprobe $modprobe_opt $module $opts"); +} + +# bring up rxe +sub do_start { + my $proto_str = ""; + + system("mkdir -p $persistence_path"); + system("touch $persistence_file"); + + modprobe("ib_core"); + modprobe("ib_uverbs"); + modprobe("rdma_ucm"); + modprobe("rdma_rxe"); + + populate_persistence(); + system("udevadm control --reload"); + + foreach my $eth (@persistence_array) { + rxe_add($eth); + } + + get_names(); + + foreach my $rxe (@rxe_array) { + my $stat = get_devinfo($rxe); + if ($stat =~ "PORT_DOWN") { + my $cmd = "ip link set $eth_names{$rxe} up"; + system($cmd); + } + } + +} + +# check if argument is an integer +sub is_integer { + defined $_[0] && $_[0] =~ /^[+-]?\d+$/; +} + +# remove all rxe devices and unload drivers +sub do_stop { + my $rxe; + + foreach $rxe (@rxe_array) { + system("echo '$rxe' > $sys/remove"); + } + + if (-e $sys) { + system("rmmod rdma_rxe"); + } + + if (-e $sys) { + print "unable to unload drivers, reboot required\n"; + } +} + +sub do_debug { + my $arg2 = $_[0]; + my $debugfile = "$parms/debug"; + chomp($arg2); + + if (!(-e "$debugfile")) { + print "Error: debug is compiled out of this rxe driver\n"; + return; + } + + if ($arg2 eq "on") { system("echo '31' > $debugfile"); } + elsif ($arg2 eq "off") { system("echo '0' > $debugfile"); } + elsif ($arg2 eq "0") { system("echo '0' > $debugfile"); } + elsif ($arg2 eq "") { } + elsif ($arg2 ge "0" && $arg2 le "31") { + system("echo '$arg2' > $debugfile"); + } + else { + print "unrecognized debug cmd ($arg2)\n"; + } + + my $current = read_file($debugfile); + chomp($current); + if ($current > 0) { + print "Debug is ON ($current)\n"; + } + elsif ($current == 0) { + print "Debug is OFF\n"; + } + else { + print "Unrecognized debug value\n"; + } +} + +sub max { + my $a = $_[0]; + my $b = $_[1]; + return $a if ($a > $b); + return $b; +} + +# show usage for rxe_cfg +sub usage { + print " Usage:\n"; + print " rxe_cfg [options] start|stop|status|persistent\n"; + print " rxe_cfg debug on|off|\n"; + print " rxe_cfg [-n] add \n"; + print " rxe_cfg [-n] remove |\n"; + print "\n"; + print " = network device e.g. eth3\n"; + print " = rdma device e.g. rxe1\n"; + print "\n"; + print " Options:\n"; + print " -h: print this usage information\n"; + print " -n: do not make the configuration action persistent\n"; + print " -v: print additional debug output\n"; + print " -l: show status for interfaces with link up\n"; + print " -p : (start command only) - set ethertype\n"; +} + +sub main { + GetOptions( + "-h" => \$help, + "--help" => \$help, + "-n" => \$no_persist, + "-v:+" => \$debug, + "-f" => \$force, + "-l" => \$linkonly, + ); + + my $arg1 = $ARGV[0]; + my $arg2 = $ARGV[1]; + my $arg3 = $ARGV[2]; + + # status is the default + if (!defined($arg1) || ($arg1 =~ /status/)) { + do_status($arg2); + exit; + } + + if ($help) { + usage(); + exit; + } + + # stuff that does not require modules to be loaded + if ($arg1 eq "help") { usage(); exit; } + elsif ($arg1 eq "start") { do_start(); do_status(); exit; } + elsif ($arg1 eq "persistent") { system("cat $persistence_file"); exit; } + + + # can't do much else, bail if modules aren't loaded + if (check_module_status()) { + exit; + } + + # create persistence file if necessary + make_path($persistence_path); + if (!(-e $persistence_file)) { + `touch $persistence_file`; + } + + # Get full context of the configuration + populate_persistence(); + get_names(); + get_dev_info(); + + # Stuff that requires the rdma_rxe module to be loaded + if ($arg1 eq "stop") { do_stop(); exit; } + elsif ($arg1 eq "debug") { do_debug($arg2); exit; } + elsif ($arg1 eq "add") { rxe_add($arg2); exit; } + elsif ($arg1 eq "remove") { rxe_remove($arg2); exit; } + elsif ($arg1 eq "help") { usage(); exit; } +} + +main(); + +exit; diff --git a/SPECS/rxe_cfg.8.gz b/SPECS/rxe_cfg.8.gz new file mode 100644 index 0000000..499a25b Binary files /dev/null and b/SPECS/rxe_cfg.8.gz differ