/* SPDX-License-Identifier: MIT */ /* * io_uring_register.c * * Description: Unit tests for the io_uring_register system call. * * Copyright 2019, Red Hat, Inc. * Author: Jeff Moyer */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "../src/syscall.h" static int pagesize; static rlim_t mlock_limit; static int devnull; int expect_fail(int fd, unsigned int opcode, void *arg, unsigned int nr_args, int error) { int ret; printf("io_uring_register(%d, %u, %p, %u)\n", fd, opcode, arg, nr_args); ret = __sys_io_uring_register(fd, opcode, arg, nr_args); if (ret != -1) { int ret2 = 0; printf("expected %s, but call succeeded\n", strerror(error)); if (opcode == IORING_REGISTER_BUFFERS) { ret2 = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); } else if (opcode == IORING_REGISTER_FILES) { ret2 = __sys_io_uring_register(fd, IORING_UNREGISTER_FILES, 0, 0); } if (ret2) { printf("internal error: failed to unregister\n"); exit(1); } return 1; } if (errno != error) { printf("expected %d, got %d\n", error, errno); return 1; } return 0; } int new_io_uring(int entries, struct io_uring_params *p) { int fd; fd = __sys_io_uring_setup(entries, p); if (fd < 0) { perror("io_uring_setup"); exit(1); } return fd; } #define MAXFDS (UINT_MAX * sizeof(int)) void * map_filebacked(size_t size) { int fd, ret; void *addr; char template[32] = "io_uring_register-test-XXXXXXXX"; fd = mkstemp(template); if (fd < 0) { perror("mkstemp"); return NULL; } unlink(template); ret = ftruncate(fd, size); if (ret < 0) { perror("ftruncate"); close(fd); return NULL; } addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); close(fd); return NULL; } close(fd); return addr; } /* * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now, * but probably should augment it to test 253 and 254, specifically. */ int test_max_fds(int uring_fd) { int status = 1; int ret; void *fd_as; /* file descriptor address space */ int fdtable_fd; /* fd for the file that will be mapped over and over */ int io_fd; /* the valid fd for I/O -- /dev/null */ int *fds; /* used to map the file into the address space */ char template[32] = "io_uring_register-test-XXXXXXXX"; unsigned long long i, nr_maps, nr_fds; /* * First, mmap anonymous the full size. That will guarantee the * mapping will fit in the memory area selected by mmap. Then, * over-write that mapping using a file-backed mapping, 128MiB at * a time using MAP_FIXED. */ fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (fd_as == MAP_FAILED) { if (errno == ENOMEM) { printf("Not enough memory for this test, skipping\n"); return 0; } perror("mmap fd_as"); exit(1); } printf("allocated %zu bytes of address space\n", UINT_MAX * sizeof(int)); fdtable_fd = mkstemp(template); if (fdtable_fd < 0) { perror("mkstemp"); exit(1); } unlink(template); ret = ftruncate(fdtable_fd, 128*1024*1024); if (ret < 0) { perror("ftruncate"); exit(1); } io_fd = open("/dev/null", O_RDWR); if (io_fd < 0) { perror("open /dev/null"); exit(1); } fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, fdtable_fd, 0); if (fds == MAP_FAILED) { perror("mmap fdtable"); exit(1); } /* fill the fd table */ nr_fds = 128*1024*1024 / sizeof(int); for (i = 0; i < nr_fds; i++) fds[i] = io_fd; /* map the file through the rest of the address space */ nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024); for (i = 0; i < nr_maps; i++) { fds = &fds[nr_fds]; /* advance fds by 128MiB */ fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, fdtable_fd, 0); if (fds == MAP_FAILED) { printf("mmap failed at offset %lu\n", (unsigned long)((char *)fd_as - (char *)fds)); exit(1); } } /* Now fd_as points to the file descriptor array. */ /* * We may not be able to map all of these files. Let's back off * until success. */ nr_fds = UINT_MAX; while (nr_fds) { ret = __sys_io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as, nr_fds); if (ret != 0) { nr_fds /= 2; continue; } printf("io_uring_register(%d, IORING_REGISTER_FILES, %p, %llu)" "...succeeded\n", uring_fd, fd_as, nr_fds); status = 0; printf("io_uring_register(%d, IORING_UNREGISTER_FILES, 0, 0)...", uring_fd); ret = __sys_io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0); if (ret < 0) { ret = errno; printf("failed\n"); errno = ret; perror("io_uring_register UNREGISTER_FILES"); exit(1); } printf("succeeded\n"); break; } close(io_fd); close(fdtable_fd); ret = munmap(fd_as, UINT_MAX * sizeof(int)); if (ret != 0) { printf("munmap(%zu) failed\n", UINT_MAX * sizeof(int)); exit(1); } return status; } int test_memlock_exceeded(int fd) { int ret; void *buf; struct iovec iov; /* if limit is larger than 2gb, just skip this test */ if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL) return 0; iov.iov_len = mlock_limit * 2; buf = malloc(iov.iov_len); assert(buf); iov.iov_base = buf; while (iov.iov_len) { ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); if (ret < 0) { if (errno == ENOMEM) { printf("io_uring_register of %zu bytes failed " "with ENOMEM (expected).\n", iov.iov_len); iov.iov_len /= 2; continue; } printf("expected success or EFAULT, got %d\n", errno); free(buf); return 1; } printf("successfully registered %zu bytes (%d).\n", iov.iov_len, ret); ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0); if (ret != 0) { printf("error: unregister failed with %d\n", errno); free(buf); return 1; } break; } if (!iov.iov_len) printf("Unable to register buffers. Check memlock rlimit.\n"); free(buf); return 0; } int test_iovec_nr(int fd) { int i, ret, status = 0; unsigned int nr = UIO_MAXIOV + 1; struct iovec *iovs; void *buf; buf = malloc(pagesize); assert(buf); iovs = malloc(nr * sizeof(struct iovec)); assert(iovs); for (i = 0; i < nr; i++) { iovs[i].iov_base = buf; iovs[i].iov_len = pagesize; } status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL); /* reduce to UIO_MAXIOV */ nr--; printf("io_uring_register(%d, %u, %p, %u)\n", fd, IORING_REGISTER_BUFFERS, iovs, nr); ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr); if (ret != 0) { printf("expected success, got %d\n", errno); status = 1; } else __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); free(buf); free(iovs); return status; } /* * io_uring limit is 1G. iov_len limit is ~OUL, I think */ int test_iovec_size(int fd) { unsigned int status = 0; int ret; struct iovec iov; void *buf; /* NULL pointer for base */ iov.iov_base = 0; iov.iov_len = 4096; status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); /* valid base, 0 length */ iov.iov_base = &buf; iov.iov_len = 0; status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); /* valid base, length exceeds size */ /* this requires an unampped page directly after buf */ buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); assert(buf != MAP_FAILED); ret = munmap(buf + pagesize, pagesize); assert(ret == 0); iov.iov_base = buf; iov.iov_len = 2 * pagesize; status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); munmap(buf, pagesize); /* huge page */ buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { printf("Unable to map a huge page. Try increasing " "/proc/sys/vm/nr_hugepages by at least 1.\n"); printf("Skipping the hugepage test\n"); } else { /* * This should succeed, so long as RLIMIT_MEMLOCK is * not exceeded */ iov.iov_base = buf; iov.iov_len = 2*1024*1024; ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); if (ret < 0) { if (errno == ENOMEM) printf("Unable to test registering of a huge " "page. Try increasing the " "RLIMIT_MEMLOCK resource limit by at " "least 2MB."); else { printf("expected success, got %d\n", errno); status = 1; } } else { printf("Success!\n"); ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); if (ret < 0) { perror("io_uring_unregister"); status = 1; } } } ret = munmap(iov.iov_base, iov.iov_len); assert(ret == 0); /* file-backed buffers -- not supported */ buf = map_filebacked(2*1024*1024); if (!buf) status = 1; iov.iov_base = buf; iov.iov_len = 2*1024*1024; printf("reserve file-backed buffers\n"); status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP); munmap(buf, 2*1024*1024); /* bump up against the soft limit and make sure we get EFAULT * or whatever we're supposed to get. NOTE: this requires * running the test as non-root. */ if (getuid() != 0) status |= test_memlock_exceeded(fd); return status; } void dump_sqe(struct io_uring_sqe *sqe) { printf("\topcode: %d\n", sqe->opcode); printf("\tflags: 0x%.8x\n", sqe->flags); printf("\tfd: %d\n", sqe->fd); if (sqe->opcode == IORING_OP_POLL_ADD) printf("\tpoll_events: 0x%.8x\n", sqe->poll_events); } int ioring_poll(struct io_uring *ring, int fd, int fixed) { int ret; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; sqe = io_uring_get_sqe(ring); memset(sqe, 0, sizeof(*sqe)); sqe->opcode = IORING_OP_POLL_ADD; if (fixed) sqe->flags = IOSQE_FIXED_FILE; sqe->fd = fd; sqe->poll_events = POLLIN|POLLOUT; printf("io_uring_submit:\n"); dump_sqe(sqe); ret = io_uring_submit(ring); if (ret != 1) { printf("failed to submit poll sqe: %d.\n", errno); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("io_uring_wait_cqe failed with %d\n", ret); return 1; } ret = 0; if (cqe->res != POLLOUT) { printf("io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n", POLLOUT, cqe->res); ret = 1; } io_uring_cqe_seen(ring, cqe); return ret; } int test_poll_ringfd(void) { int status = 0; int ret; int fd; struct io_uring ring; ret = io_uring_queue_init(1, &ring, 0); if (ret) { perror("io_uring_queue_init"); return 1; } fd = ring.ring_fd; /* try polling the ring fd */ status = ioring_poll(&ring, fd, 0); /* * now register the ring fd, and try the poll again. This should * fail, because the kernel does not allow registering of the * ring_fd. */ status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF); /* tear down queue */ io_uring_queue_exit(&ring); return status; } int main(int argc, char **argv) { int fd, ret; unsigned int status = 0; struct io_uring_params p; struct rlimit rlim; if (argc > 1) return 0; /* setup globals */ pagesize = getpagesize(); ret = getrlimit(RLIMIT_MEMLOCK, &rlim); if (ret < 0) { perror("getrlimit"); return 1; } mlock_limit = rlim.rlim_cur; printf("RELIMIT_MEMLOCK: %lu (%lu)\n", rlim.rlim_cur, rlim.rlim_max); devnull = open("/dev/null", O_RDWR); if (devnull < 0) { perror("open /dev/null"); exit(1); } /* invalid fd */ status |= expect_fail(-1, 0, NULL, 0, EBADF); /* valid fd that is not an io_uring fd */ status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP); /* invalid opcode */ memset(&p, 0, sizeof(p)); fd = new_io_uring(1, &p); ret = expect_fail(fd, ~0U, NULL, 0, EINVAL); if (ret) { /* if this succeeds, tear down the io_uring instance * and start clean for the next test. */ close(fd); fd = new_io_uring(1, &p); } /* IORING_REGISTER_BUFFERS */ status |= test_iovec_size(fd); status |= test_iovec_nr(fd); /* IORING_REGISTER_FILES */ status |= test_max_fds(fd); close(fd); /* uring poll on the uring fd */ status |= test_poll_ringfd(); if (!status) printf("PASS\n"); else printf("FAIL\n"); return status; }