/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define _GNU_SOURCE
#include <config.h>
#include <stdlib.h>
#include <string.h>
#include <glob.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <fcntl.h>
#include <dirent.h>
#include <errno.h>
#include <assert.h>
#include <fnmatch.h>
#include <sys/sysmacros.h>
#include <rdma/rdma_netlink.h>
#include <util/util.h>
#include "ibverbs.h"
#include <infiniband/cmd_write.h>
int abi_ver;
struct ibv_driver {
struct list_node entry;
const struct verbs_device_ops *ops;
};
static LIST_HEAD(driver_list);
static int try_access_device(const struct verbs_sysfs_dev *sysfs_dev)
{
struct stat cdev_stat;
char *devpath;
int ret;
if (asprintf(&devpath, RDMA_CDEV_DIR"/%s",
sysfs_dev->sysfs_name) < 0)
return ENOMEM;
ret = stat(devpath, &cdev_stat);
free(devpath);
return ret;
}
enum ibv_node_type decode_knode_type(unsigned int knode_type)
{
switch (knode_type) {
case RDMA_NODE_IB_CA:
return IBV_NODE_CA;
case RDMA_NODE_IB_SWITCH:
return IBV_NODE_SWITCH;
case RDMA_NODE_IB_ROUTER:
return IBV_NODE_ROUTER;
case RDMA_NODE_RNIC:
return IBV_NODE_RNIC;
case RDMA_NODE_USNIC:
return IBV_NODE_USNIC;
case RDMA_NODE_USNIC_UDP:
return IBV_NODE_USNIC_UDP;
case RDMA_NODE_UNSPECIFIED:
return IBV_NODE_UNSPECIFIED;
}
return IBV_NODE_UNKNOWN;
}
int setup_sysfs_uverbs(int uv_dirfd, const char *uverbs,
struct verbs_sysfs_dev *sysfs_dev)
{
unsigned int major;
unsigned int minor;
struct stat buf;
char value[32];
if (!check_snprintf(sysfs_dev->sysfs_name,
sizeof(sysfs_dev->sysfs_name), "%s", uverbs))
return -1;
if (stat(sysfs_dev->ibdev_path, &buf))
return -1;
sysfs_dev->time_created = buf.st_mtim;
if (ibv_read_sysfs_file_at(uv_dirfd, "dev", value,
sizeof(value)) < 0)
return -1;
if (sscanf(value, "%u:%u", &major, &minor) != 2)
return -1;
sysfs_dev->sysfs_cdev = makedev(major, minor);
if (ibv_read_sysfs_file_at(uv_dirfd, "abi_version", value,
sizeof(value)) > 0)
sysfs_dev->abi_ver = strtoul(value, NULL, 10);
return 0;
}
static int setup_sysfs_dev(int dirfd, const char *uverbs,
struct list_head *tmp_sysfs_dev_list)
{
struct verbs_sysfs_dev *sysfs_dev = NULL;
char value[32];
int uv_dirfd;
sysfs_dev = calloc(1, sizeof(*sysfs_dev));
if (!sysfs_dev)
return ENOMEM;
sysfs_dev->ibdev_idx = -1;
uv_dirfd = openat(dirfd, uverbs, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
if (uv_dirfd == -1)
goto err_alloc;
if (ibv_read_sysfs_file_at(uv_dirfd, "ibdev", sysfs_dev->ibdev_name,
sizeof(sysfs_dev->ibdev_name)) < 0)
goto err_fd;
if (!check_snprintf(
sysfs_dev->ibdev_path, sizeof(sysfs_dev->ibdev_path),
"%s/class/infiniband/%s", ibv_get_sysfs_path(),
sysfs_dev->ibdev_name))
goto err_fd;
if (setup_sysfs_uverbs(uv_dirfd, uverbs, sysfs_dev))
goto err_fd;
if (ibv_read_ibdev_sysfs_file(value, sizeof(value), sysfs_dev,
"node_type") <= 0)
sysfs_dev->node_type = IBV_NODE_UNKNOWN;
else
sysfs_dev->node_type =
decode_knode_type(strtoul(value, NULL, 10));
if (try_access_device(sysfs_dev))
goto err_fd;
close(uv_dirfd);
list_add(tmp_sysfs_dev_list, &sysfs_dev->entry);
return 0;
err_fd:
close(uv_dirfd);
err_alloc:
free(sysfs_dev);
return 0;
}
static int find_sysfs_devs(struct list_head *tmp_sysfs_dev_list)
{
struct verbs_sysfs_dev *dev, *dev_tmp;
char class_path[IBV_SYSFS_PATH_MAX];
DIR *class_dir;
struct dirent *dent;
int ret = 0;
if (!check_snprintf(class_path, sizeof(class_path),
"%s/class/infiniband_verbs", ibv_get_sysfs_path()))
return ENOMEM;
class_dir = opendir(class_path);
if (!class_dir)
return ENOSYS;
while ((dent = readdir(class_dir))) {
if (dent->d_name[0] == '.')
continue;
ret = setup_sysfs_dev(dirfd(class_dir), dent->d_name,
tmp_sysfs_dev_list);
if (ret)
break;
}
closedir(class_dir);
if (ret) {
list_for_each_safe (tmp_sysfs_dev_list, dev, dev_tmp, entry) {
list_del(&dev->entry);
free(dev);
}
}
return ret;
}
void verbs_register_driver(const struct verbs_device_ops *ops)
{
struct ibv_driver *driver;
driver = malloc(sizeof *driver);
if (!driver) {
fprintf(stderr,
PFX "Warning: couldn't allocate driver for %s\n",
ops->name);
return;
}
driver->ops = ops;
list_add_tail(&driver_list, &driver->entry);
}
/* Match a single modalias value */
static bool match_modalias(const struct verbs_match_ent *ent, const char *value)
{
char pci_ma[100];
switch (ent->kind) {
case VERBS_MATCH_MODALIAS:
return fnmatch(ent->u.modalias, value, 0) == 0;
case VERBS_MATCH_PCI:
snprintf(pci_ma, sizeof(pci_ma), "pci:v%08Xd%08Xsv*",
ent->vendor, ent->device);
return fnmatch(pci_ma, value, 0) == 0;
default:
return false;
}
}
/* Search a null terminated table of verbs_match_ent's and return the one
* that matches the device the verbs sysfs device is bound to or NULL.
*/
static const struct verbs_match_ent *
match_modalias_device(const struct verbs_device_ops *ops,
struct verbs_sysfs_dev *sysfs_dev)
{
const struct verbs_match_ent *i;
if (!(sysfs_dev->flags & VSYSFS_READ_MODALIAS)) {
sysfs_dev->flags |= VSYSFS_READ_MODALIAS;
if (ibv_read_ibdev_sysfs_file(
sysfs_dev->modalias, sizeof(sysfs_dev->modalias),
sysfs_dev, "device/modalias") <= 0) {
sysfs_dev->modalias[0] = 0;
return NULL;
}
}
for (i = ops->match_table; i->kind != VERBS_MATCH_SENTINEL; i++)
if (match_modalias(i, sysfs_dev->modalias))
return i;
return NULL;
}
/* Match the device name itself */
static const struct verbs_match_ent *
match_name(const struct verbs_device_ops *ops,
struct verbs_sysfs_dev *sysfs_dev)
{
char name_ma[100];
const struct verbs_match_ent *i;
if (!check_snprintf(name_ma, sizeof(name_ma),
"rdma_device:N%s", sysfs_dev->ibdev_name))
return NULL;
for (i = ops->match_table; i->kind != VERBS_MATCH_SENTINEL; i++)
if (match_modalias(i, name_ma))
return i;
return NULL;
}
/* Match the driver id we get from netlink */
static const struct verbs_match_ent *
match_driver_id(const struct verbs_device_ops *ops,
struct verbs_sysfs_dev *sysfs_dev)
{
const struct verbs_match_ent *i;
if (sysfs_dev->driver_id == RDMA_DRIVER_UNKNOWN)
return NULL;
for (i = ops->match_table; i->kind != VERBS_MATCH_SENTINEL; i++)
if (i->kind == VERBS_MATCH_DRIVER_ID &&
i->u.driver_id == sysfs_dev->driver_id)
return i;
return NULL;
}
/* True if the provider matches the selected rdma sysfs device */
static bool match_device(const struct verbs_device_ops *ops,
struct verbs_sysfs_dev *sysfs_dev)
{
if (ops->match_table) {
sysfs_dev->match = match_driver_id(ops, sysfs_dev);
if (!sysfs_dev->match)
sysfs_dev->match = match_name(ops, sysfs_dev);
if (!sysfs_dev->match)
sysfs_dev->match =
match_modalias_device(ops, sysfs_dev);
}
if (ops->match_device) {
/* If a matching function is provided then it is called
* unconditionally after the table match above, it is
* responsible for determining if the device matches based on
* the match pointer and any other internal information.
*/
if (!ops->match_device(sysfs_dev))
return false;
} else {
/* With no match function, we must have a table match */
if (!sysfs_dev->match)
return false;
}
if (sysfs_dev->abi_ver < ops->match_min_abi_version ||
sysfs_dev->abi_ver > ops->match_max_abi_version) {
fprintf(stderr, PFX
"Warning: Driver %s does not support the kernel ABI of %u (supports %u to %u) for device %s\n",
ops->name, sysfs_dev->abi_ver,
ops->match_min_abi_version,
ops->match_max_abi_version,
sysfs_dev->ibdev_path);
return false;
}
return true;
}
static struct verbs_device *try_driver(const struct verbs_device_ops *ops,
struct verbs_sysfs_dev *sysfs_dev)
{
struct verbs_device *vdev;
struct ibv_device *dev;
if (!match_device(ops, sysfs_dev))
return NULL;
vdev = ops->alloc_device(sysfs_dev);
if (!vdev) {
fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
sysfs_dev->ibdev_path);
return NULL;
}
vdev->ops = ops;
atomic_init(&vdev->refcount, 1);
dev = &vdev->device;
assert(dev->_ops._dummy1 == NULL);
assert(dev->_ops._dummy2 == NULL);
dev->node_type = sysfs_dev->node_type;
switch (sysfs_dev->node_type) {
case IBV_NODE_CA:
case IBV_NODE_SWITCH:
case IBV_NODE_ROUTER:
dev->transport_type = IBV_TRANSPORT_IB;
break;
case IBV_NODE_RNIC:
dev->transport_type = IBV_TRANSPORT_IWARP;
break;
case IBV_NODE_USNIC:
dev->transport_type = IBV_TRANSPORT_USNIC;
break;
case IBV_NODE_USNIC_UDP:
dev->transport_type = IBV_TRANSPORT_USNIC_UDP;
break;
case IBV_NODE_UNSPECIFIED:
dev->transport_type = IBV_TRANSPORT_UNSPECIFIED;
break;
default:
dev->transport_type = IBV_TRANSPORT_UNKNOWN;
break;
}
strcpy(dev->dev_name, sysfs_dev->sysfs_name);
if (!check_snprintf(dev->dev_path, sizeof(dev->dev_path),
"%s/class/infiniband_verbs/%s",
ibv_get_sysfs_path(), sysfs_dev->sysfs_name))
goto err;
strcpy(dev->name, sysfs_dev->ibdev_name);
strcpy(dev->ibdev_path, sysfs_dev->ibdev_path);
vdev->sysfs = sysfs_dev;
return vdev;
err:
ops->uninit_device(vdev);
return NULL;
}
static struct verbs_device *try_drivers(struct verbs_sysfs_dev *sysfs_dev)
{
struct ibv_driver *driver;
struct verbs_device *dev;
/*
* Matching by driver_id takes priority over other match types, do it
* first.
*/
if (sysfs_dev->driver_id != RDMA_DRIVER_UNKNOWN) {
list_for_each (&driver_list, driver, entry) {
if (match_driver_id(driver->ops, sysfs_dev)) {
dev = try_driver(driver->ops, sysfs_dev);
if (dev)
return dev;
}
}
}
list_for_each(&driver_list, driver, entry) {
dev = try_driver(driver->ops, sysfs_dev);
if (dev)
return dev;
}
return NULL;
}
static int check_abi_version(void)
{
char value[8];
if (abi_ver)
return 0;
if (ibv_read_sysfs_file(ibv_get_sysfs_path(),
"class/infiniband_verbs/abi_version", value,
sizeof(value)) < 0) {
return ENOSYS;
}
abi_ver = strtol(value, NULL, 10);
if (abi_ver < IB_USER_VERBS_MIN_ABI_VERSION ||
abi_ver > IB_USER_VERBS_MAX_ABI_VERSION) {
fprintf(stderr, PFX "Fatal: kernel ABI version %d "
"doesn't match library version %d.\n",
abi_ver, IB_USER_VERBS_MAX_ABI_VERSION);
return ENOSYS;
}
return 0;
}
static void check_memlock_limit(void)
{
struct rlimit rlim;
if (!geteuid())
return;
if (getrlimit(RLIMIT_MEMLOCK, &rlim)) {
fprintf(stderr, PFX "Warning: getrlimit(RLIMIT_MEMLOCK) failed.");
return;
}
if (rlim.rlim_cur <= 32768)
fprintf(stderr, PFX "Warning: RLIMIT_MEMLOCK is %llu bytes.\n"
" This will severely limit memory registrations.\n",
(unsigned long long)rlim.rlim_cur);
}
static int same_sysfs_dev(struct verbs_sysfs_dev *sysfs1,
struct verbs_sysfs_dev *sysfs2)
{
if (strcmp(sysfs1->sysfs_name, sysfs2->sysfs_name) != 0)
return 0;
/* In netlink mode the idx is a globally unique ID */
if (sysfs1->ibdev_idx != sysfs2->ibdev_idx)
return 0;
if (sysfs1->ibdev_idx == -1 &&
ts_cmp(&sysfs1->time_created, &sysfs2->time_created, !=))
return 0;
return 1;
}
/* Match every ibv_sysfs_dev in the sysfs_list to a driver and add a new entry
* to device_list. Once matched to a driver the entry in sysfs_list is
* removed.
*/
static void try_all_drivers(struct list_head *sysfs_list,
struct list_head *device_list,
unsigned int *num_devices)
{
struct verbs_sysfs_dev *sysfs_dev;
struct verbs_sysfs_dev *tmp;
struct verbs_device *vdev;
list_for_each_safe(sysfs_list, sysfs_dev, tmp, entry) {
vdev = try_drivers(sysfs_dev);
if (vdev) {
list_del(&sysfs_dev->entry);
/* Ownership of sysfs_dev moves into vdev->sysfs */
list_add(device_list, &vdev->entry);
(*num_devices)++;
}
}
}
int ibverbs_get_device_list(struct list_head *device_list)
{
LIST_HEAD(sysfs_list);
struct verbs_sysfs_dev *sysfs_dev, *next_dev;
struct verbs_device *vdev, *tmp;
static int drivers_loaded;
unsigned int num_devices = 0;
int ret;
ret = find_sysfs_devs_nl(&sysfs_list);
if (ret) {
ret = find_sysfs_devs(&sysfs_list);
if (ret)
return -ret;
}
if (!list_empty(&sysfs_list)) {
ret = check_abi_version();
if (ret)
return -ret;
}
/* Remove entries from the sysfs_list that are already preset in the
* device_list, and remove entries from the device_list that are not
* present in the sysfs_list.
*/
list_for_each_safe(device_list, vdev, tmp, entry) {
struct verbs_sysfs_dev *old_sysfs = NULL;
list_for_each(&sysfs_list, sysfs_dev, entry) {
if (same_sysfs_dev(vdev->sysfs, sysfs_dev)) {
old_sysfs = sysfs_dev;
break;
}
}
if (old_sysfs) {
list_del(&old_sysfs->entry);
free(old_sysfs);
num_devices++;
} else {
list_del(&vdev->entry);
ibverbs_device_put(&vdev->device);
}
}
try_all_drivers(&sysfs_list, device_list, &num_devices);
if (list_empty(&sysfs_list) || drivers_loaded)
goto out;
load_drivers();
drivers_loaded = 1;
try_all_drivers(&sysfs_list, device_list, &num_devices);
out:
/* Anything left in sysfs_list was not assoicated with a
* driver.
*/
list_for_each_safe(&sysfs_list, sysfs_dev, next_dev, entry) {
if (getenv("IBV_SHOW_WARNINGS")) {
fprintf(stderr, PFX
"Warning: no userspace device-specific driver found for %s\n",
sysfs_dev->ibdev_name);
}
free(sysfs_dev);
}
return num_devices;
}
int ibverbs_init(void)
{
char *env_value;
if (getenv("RDMAV_FORK_SAFE") || getenv("IBV_FORK_SAFE"))
if (ibv_fork_init())
fprintf(stderr, PFX "Warning: fork()-safety requested "
"but init failed\n");
/* Backward compatibility for the mlx4 driver env */
env_value = getenv("MLX4_DEVICE_FATAL_CLEANUP");
if (env_value)
verbs_allow_disassociate_destroy = strcmp(env_value, "0") != 0;
if (getenv("RDMAV_ALLOW_DISASSOC_DESTROY"))
verbs_allow_disassociate_destroy = true;
if (!ibv_get_sysfs_path())
return -errno;
check_memlock_limit();
return 0;
}
void ibverbs_device_hold(struct ibv_device *dev)
{
struct verbs_device *verbs_device = verbs_get_device(dev);
atomic_fetch_add(&verbs_device->refcount, 1);
}
void ibverbs_device_put(struct ibv_device *dev)
{
struct verbs_device *verbs_device = verbs_get_device(dev);
if (atomic_fetch_sub(&verbs_device->refcount, 1) == 1) {
free(verbs_device->sysfs);
if (verbs_device->ops->uninit_device)
verbs_device->ops->uninit_device(verbs_device);
}
}