Blob Blame History Raw
// SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
/* Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file */

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
#include <getopt.h>
#include <sys/types.h>
#include <dirent.h>
#include <syslog.h>
#include <rdma/rdma_netlink.h>
#include <netlink/netlink.h>
#include <netlink/msg.h>
#include <netlink/attr.h>
#include <linux/pci_regs.h>
#include <util/rdma_nl.h>

/*
 * Rename modes:
 * NAME_FALLBACK - Try to name devices in the following order:
 *                 by->onboard -> by-pci -> by-guid -> kernel
 * NAME_KERNEL - leave name as kernel provided
 * NAME_PCI - based on PCI/slot/function location
 * NAME_GUID - based on node GUID
 * NAME_ONBOARD - based on-board device index
 *
 * The stable names are combination of device type technology and rename mode.
 * Infiniband - ib*
 * RoCE - roce*
 * iWARP - iw*
 * OPA - opa*
 * Default (unknown protocol) - rdma*
 *
 * Example:
 * NAME_PCI
 *  pci = 0000:00:0c.4
 *  Device type = IB
 *  mlx5_0 -> ibp0s12f4
 * NAME_GUID
 *  GUID = 5254:00c0:fe12:3455
 *  Device type = RoCE
 *  mlx5_0 -> rocex525400c0fe123455
 * NAME_ONBOARD
 *  Index = 3
 *  Device type = OPA
 *  hfi1_1 -> opao3
 */

struct data {
	const char *curr;
	char *prefix;
	uint64_t node_guid;
	char *name;
	int idx;
};

static bool debug_mode;
#define pr_err(args...) syslog(LOG_ERR, ##args)
#define pr_dbg(args...)                                                        \
	do {                                                                   \
		if (debug_mode)                                                \
			syslog(LOG_ERR, ##args);                               \
	} while (0)

#define ONBOARD_INDEX_MAX (16*1024-1)
static int by_onboard(struct data *d)
{
	char *index = NULL;
	char *acpi = NULL;
	unsigned int o;
	FILE *fp;
	int ret;

	/*
	 * ACPI_DSM - device specific method for naming
	 * PCI or PCI Express device
	 */
	ret = asprintf(&acpi, "/sys/class/infiniband/%s/device/acpi_index",
		      d->curr);
	if (ret < 0)
		return -ENOMEM;

	/* SMBIOS type 41 - Onboard Devices Extended Information */
	ret = asprintf(&index, "/sys/class/infiniband/%s/device/index", d->curr);
	if (ret < 0) {
		index = NULL;
		ret = -ENOMEM;
		goto out;
	}

	fp = fopen(acpi, "r");
	if (!fp)
		fp = fopen(index, "r");
	if (!fp) {
		pr_dbg("%s: Device is not embedded onboard\n", d->curr);
		ret = -ENOENT;
		goto out;
	}

	ret = fscanf(fp, "%u", &o);
	fclose(fp);
	/* https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L263 */
	if (!ret || o > ONBOARD_INDEX_MAX) {
		pr_err("%s: Onboard index %d and ret %d\n", d->curr, o, ret);
		ret = -ENOENT;
		goto out;
	}

	ret = asprintf(&d->name, "%so%u", d->prefix, o);
	if (ret < 0) {
		pr_err("%s: Failed to allocate name with prefix %s and onboard index %d\n",
		       d->curr, d->prefix, o);
		ret = -ENOENT;
		d->name = NULL;
		goto out;
	}
	ret = 0;
out:
	free(index);
	free(acpi);
	return ret;
}

static int find_sun(char *devname, char *pci)
{
	char bof[256], tmp[256];
	struct dirent *dent;
	char *slots;
	DIR *dir;
	int ret;

	ret = asprintf(&slots, "%s/subsystem/slots", devname);
	if (ret < 0)
		return 0;

	ret = 0;
	dir = opendir(slots);
	if (!dir)
		goto err_dir;

	if (sscanf(pci, "%s.%s", bof, tmp) != 2)
		goto out;

	while ((dent = readdir(dir))) {
		char *str, address[256];
		FILE *fp;
		int i;

		if (dent->d_name[0] == '.')
			continue;
		i = atoi(dent->d_name);
		if (i <= 0)
			continue;

		ret = asprintf(&str, "%s/%s/address", slots, dent->d_name);
		if (ret < 0) {
			ret = 0;
			goto out;
		}

		fp = fopen(str, "r");
		free(str);
		if (!fp) {
			ret = 0;
			goto out;
		}

		ret = fscanf(fp, "%255s", address);
		fclose(fp);

		if (ret != 1) {
			ret = 0;
			goto out;
		}

		if (!strcmp(bof, address)) {
			ret = i;
			break;
		}
	}
out:
	closedir(dir);
err_dir:
	free(slots);
	return ret;
}

static int is_pci_multifunction(char *devname)
{
	char c[64] = {};
	char *config;
	FILE *fp;
	int ret;

	ret = asprintf(&config, "%s/config", devname);
	if (ret < 0)
		return 0;

	fp = fopen(config, "r");
	free(config);
	if (!fp)
		return 0;

	ret = fread(c, 1, sizeof(c), fp);
	fclose(fp);
	if (ret != sizeof(c))
		return 0;

	/* bit 0-6 header type, bit 7 multi/single function device */
	return c[PCI_HEADER_TYPE] & 0x80;
}

static int is_pci_ari_enabled(char *devname)
{
	int ret, a;
	char *ari;
	FILE *fp;

	ret = asprintf(&ari, "%s/ari_enabled", devname);
	if (ret < 0)
		return 0;

	fp = fopen(ari, "r");
	free(ari);
	if (!fp)
		return 0;

	ret = fscanf(fp, "%d", &a);
	fclose(fp);
	return (ret) ? a == 1 : 0;
}

struct pci_info {
	char *pcidev;

	unsigned int domain;
	unsigned int bus;
	unsigned int slot;
	unsigned int func;
	unsigned int sun;
	unsigned int vf;
	bool valid_vf;
};

static int fill_pci_info(struct data *d, struct pci_info *p)
{
	char buf[256] = {};
	char *pci;
	int ret;

	ret = readlink(p->pcidev, buf, sizeof(buf)-1);
	if (ret == -1 || ret == sizeof(buf))
		return -EINVAL;

	buf[ret] = 0;

	pci = basename(buf);
	/*
	 * pci = 0000:00:0c.0
	 */
	ret = sscanf(pci, "%x:%x:%x.%u", &p->domain, &p->bus, &p->slot,
		     &p->func);
	if (ret != 4) {
		pr_err("%s: Failed to read PCI BOF\n", d->curr);
		return -ENOENT;
	}

	if (is_pci_ari_enabled(p->pcidev)) {
		/*
		 * ARI devices support up to 256 functions on a single device
		 * ("slot"), and interpret the traditional 5-bit slot and 3-bit
		 * function number as a single 8-bit function number, where the
		 * slot makes up the upper 5 bits.
		 *
		 * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L344
		 */
		p->func += p->slot * 8;
		pr_dbg("%s: This is ARI device, new PCI BOF is %04x:%02x:%02x.%u\n",
		       d->curr, p->domain, p->bus, p->slot, p->func);
	}

	p->sun = find_sun(p->pcidev, pci);

	return 0;
}

static int get_virtfn_info(struct data *d, struct pci_info *p)
{
	struct pci_info vf = {};
	char *physfn_pcidev;
	struct dirent *dent;
	DIR *dir;
	int ret;

	/* Check if this is a virtual function. */
	ret = asprintf(&physfn_pcidev, "%s/physfn", p->pcidev);
	if (ret < 0)
		return -ENOMEM;

	/* We are VF, get VF number and replace pcidev to point to PF */
	dir = opendir(physfn_pcidev);
	if (!dir) {
		/*
		 * -ENOENT means that we are already in PF
		 *  and pcidev points to right PCI.
		 */
		ret = (errno == ENOENT) ? 0 : -ENOMEM;
		goto err_free;
	}

	p->valid_vf = true;
	vf.pcidev = p->pcidev;
	ret = fill_pci_info(d, &vf);
	if (ret)
		goto err_dir;

	while ((dent = readdir(dir))) {
		const char *s = "virtfn";
		struct pci_info v = {};

		if (strncmp(dent->d_name, s, strlen(s)) ||
		    strlen(dent->d_name) == strlen(s))
			continue;

		ret = asprintf(&v.pcidev, "%s/%s", physfn_pcidev, dent->d_name);
		if (ret < 0) {
			ret = -ENOMEM;
			goto err_dir;
		}
		ret = fill_pci_info(d, &v);
		free(v.pcidev);
		if (ret) {
			ret = -ENOMEM;
			goto err_dir;
		}
		if (vf.func == v.func) {
			p->vf = atoi(&dent->d_name[6]);
			break;
		}
	}

	p->pcidev = physfn_pcidev;
	closedir(dir);

	return 0;

err_dir:
	closedir(dir);
err_free:
	free(physfn_pcidev);
	return ret;
}

static int by_pci(struct data *d)
{
	struct pci_info p = {};
	char *subsystem;
	char buf[256] = {};
	char *subs;
	int ret;

	ret = asprintf(&subsystem, "/sys/class/infiniband/%s/device/subsystem",
		      d->curr);
	if (ret < 0)
		return -ENOMEM;

	ret = readlink(subsystem, buf, sizeof(buf)-1);
	if (ret == -1 || ret == sizeof(buf)) {
		ret = -EINVAL;
		goto out;
	}
	buf[ret] = 0;

	subs = basename(buf);
	if (strcmp(subs, "pci")) {
		/* Ball out virtual devices */
		pr_dbg("%s: Non-PCI device (%s) was detected\n", d->curr, subs);
		ret = -EINVAL;
		goto out;
	}

	/* Real devices */
	ret = asprintf(&p.pcidev, "/sys/class/infiniband/%s/device", d->curr);
	if (ret < 0) {
		ret = -ENOMEM;
		p.pcidev = NULL;
		goto out;
	}

	ret = get_virtfn_info(d, &p);
	if (ret)
		goto out;

	ret = fill_pci_info(d, &p);
	if (ret) {
		pr_err("%s: Failed to fill PCI device information\n", d->curr);
		goto out;
	}

	d->name = calloc(256, sizeof(char));
	if (!d->name) {
		ret = -ENOMEM;
		goto out;
	}

	ret = sprintf(d->name, "%s", d->prefix);
	if (ret == -1) {
		ret = -EINVAL;
		goto out;
	}

	if (p.domain > 0) {
		ret = sprintf(buf, "P%u", p.domain);
		if (ret == -1) {
			ret = -ENOMEM;
			goto out;
		}
		strcat(d->name, buf);
	}

	if (p.sun > 0)
		ret = sprintf(buf, "s%u", p.sun);
	else
		ret = sprintf(buf, "p%us%u", p.bus, p.slot);
	if (ret == -1) {
		ret = -ENOMEM;
		goto out;
	}

	strcat(d->name, buf);

	if (p.func > 0 || is_pci_multifunction(p.pcidev)) {
		ret = sprintf(buf, "f%u", p.func);
		if (ret == -1) {
			ret = -ENOMEM;
			goto out;
		}
		strcat(d->name, buf);

		if (p.valid_vf) {
			ret = sprintf(buf, "v%u", p.vf);
			if (ret == -1) {
				ret = -ENOMEM;
				goto out;
			}
			strcat(d->name, buf);
		}
	}
	ret = 0;
out:
	free(p.pcidev);
	free(subsystem);
	if (ret) {
		free(d->name);
		d->name = NULL;
	}

	return ret;
}

static int by_guid(struct data *d)
{
	uint16_t vp[4];
	int ret = -1;

	if (!d->node_guid)
		/* virtual devices start without GUID */
		goto out;

	memcpy(vp, &d->node_guid, sizeof(uint64_t));
	ret = asprintf(&d->name, "%sx%04x%04x%04x%04x", d->prefix, vp[3], vp[2],
		       vp[1], vp[0]);
out:
	if (ret == -1) {
		d->name = NULL;
		return -ENOMEM;
	}

	return 0;
}

static int device_rename(struct nl_sock *nl, struct data *d)
{
	struct nlmsghdr *hdr;
	struct nl_msg *msg;
	int ret = -1;

	msg = nlmsg_alloc();
	if (!msg)
		return -ENOMEM;

	hdr = nlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ,
			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_SET),
			0, 0);
	if (!hdr) {
		ret = -ENOMEM;
		goto nla_put_failure;
	}

	NLA_PUT_U32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, d->idx);
	NLA_PUT_STRING(msg, RDMA_NLDEV_ATTR_DEV_NAME, d->name);
	ret = nl_send_auto(nl, msg);
	if (ret < 0)
		return ret;
nla_put_failure:
	nlmsg_free(msg);
	return (ret < 0) ? ret : 0;
}

static int get_nldata_cb(struct nl_msg *msg, void *data)
{
	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {};
	struct nlmsghdr *hdr = nlmsg_hdr(msg);
	struct data *d = data;
	int ret;

	ret = nlmsg_parse(hdr, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, rdmanl_policy);
	if (ret < 0)
		return NL_STOP;

	if (!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
	    !tb[RDMA_NLDEV_ATTR_NODE_GUID])
		return NL_STOP;

	ret = strcmp(d->curr, nla_get_string(tb[RDMA_NLDEV_ATTR_DEV_NAME]));
	if (ret)
		return NL_OK;

	if (tb[RDMA_NLDEV_ATTR_DEV_PROTOCOL])
		d->prefix = strdup(
			nla_get_string(tb[RDMA_NLDEV_ATTR_DEV_PROTOCOL]));
	if (!d->prefix)
		ret = asprintf(&d->prefix, "rdma");
	if (ret < 0)
		return NL_STOP;

	d->idx = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
	d->node_guid = nla_get_u64(tb[RDMA_NLDEV_ATTR_NODE_GUID]);
	return NL_STOP;
}

enum name_policy {
	NAME_KERNEL = 1 << 0,
	NAME_PCI = 1 << 1,
	NAME_GUID = 1 << 2,
	NAME_ONBOARD = 1 << 3,
	NAME_ERROR = 1 << 8
};

static int str2policy(const char *np)
{
	if (!strcmp(np, "NAME_KERNEL"))
		return NAME_KERNEL;
	if (!strcmp(np, "NAME_PCI"))
		return NAME_PCI;
	if (!strcmp(np, "NAME_GUID"))
		return NAME_GUID;
	if (!strcmp(np, "NAME_ONBOARD"))
		return NAME_ONBOARD;
	if (!strcmp(np, "NAME_FALLBACK"))
		return NAME_ONBOARD | NAME_PCI;
	return NAME_ERROR;
};

int main(int argc, char **argv)
{
	struct data d = { .idx = -1 };
	struct nl_sock *nl;
	int ret = -1;
	int np, opt;

	if (argc < 3)
		goto err;

	while ((opt = getopt(argc, argv, "v")) >= 0) {
		switch (opt) {
		case 'v':
			debug_mode = true;
			break;
		default:
			goto err;
		}
	}

	argc -= optind;
	argv += optind;

	d.curr = argv[0];

	np = str2policy(argv[1]);
	if (np & NAME_ERROR) {
		pr_err("%s: Unknown policy %s\n", d.curr, argv[1]);
		goto err;
	}

	pr_dbg("%s: Requested policy is %s\n", d.curr, argv[1]);

	if (np & NAME_KERNEL) {
		pr_dbg("%s: Leave kernel names, do nothing\n", d.curr);
		/* Do nothing */
		exit(0);
	}

	nl = rdmanl_socket_alloc();
	if (!nl) {
		pr_err("%s: Failed to allocate netlink socket\n", d.curr);
		goto err;
	}

	if (rdmanl_get_devices(nl, get_nldata_cb, &d)) {
		pr_err("%s: Failed to connect to NETLINK_RDMA\n", d.curr);
		goto out;
	}

	if (d.idx == -1 || !d.prefix) {
		pr_err("%s: Failed to get current device name and index\n",
		       d.curr);
		goto out;
	}

	ret = -1;
	if (np & NAME_ONBOARD)
		ret = by_onboard(&d);
	if (ret && (np & NAME_PCI))
		ret = by_pci(&d);
	if (ret && (np & NAME_GUID))
		ret = by_guid(&d);
	if (ret)
		goto out;

	ret = device_rename(nl, &d);
	if (ret) {
		pr_err("%s: Device rename to %s failed with error %d\n", d.curr,
		       d.name, ret);
		goto out;
	}
	pr_dbg("%s: Successfully renamed device to be %s\n", d.curr, d.name);

	printf("%s\n", d.name);
	free(d.name);

out:
	free(d.prefix);
	nl_socket_free(nl);
err:
	ret = (ret) ? 1 : 0;
	exit(ret);
}