Blame man/io_uring_enter.2

Packit d3489f
.\" Copyright (C) 2019 Jens Axboe <axboe@kernel.dk>
Packit d3489f
.\" Copyright (C) 2019 Red Hat, Inc.
Packit d3489f
.\"
Packit d3489f
.\" SPDX-License-Identifier: LGPL-2.0-or-later
Packit d3489f
.\"
Packit d3489f
.TH IO_URING_ENTER 2 2019-01-22 "Linux" "Linux Programmer's Manual"
Packit d3489f
.SH NAME
Packit d3489f
io_uring_enter \- initiate and/or complete asynchronous I/O
Packit d3489f
.SH SYNOPSIS
Packit d3489f
.nf
Packit d3489f
.BR "#include <linux/io_uring.h>"
Packit d3489f
.PP
Packit d3489f
.BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
Packit d3489f
.BI "                   unsigned int " min_complete ", unsigned int " flags ,
Packit d3489f
.BI "                   sigset_t *" sig );
Packit d3489f
.fi
Packit d3489f
.PP
Packit d3489f
.SH DESCRIPTION
Packit d3489f
.PP
Packit d3489f
.BR io_uring_enter ()
Packit d3489f
is used to initiate and complete I/O using the shared submission and
Packit d3489f
completion queues setup by a call to
Packit d3489f
.BR io_uring_setup (2).
Packit d3489f
A single call can both submit new I/O and wait for completions of I/O
Packit d3489f
initiated by this call or previous calls to
Packit d3489f
.BR io_uring_enter ().
Packit d3489f
Packit d3489f
.I fd
Packit d3489f
is the file descriptor returned by
Packit d3489f
.BR io_uring_setup (2).
Packit d3489f
.I to_submit
Packit d3489f
specifies the number of I/Os to submit from the submission queue.  If
Packit d3489f
the
Packit d3489f
.B IORING_ENTER_GETEVENTS
Packit d3489f
bit is set in
Packit d3489f
.IR flags ,
Packit d3489f
then the system call will attempt to wait for
Packit d3489f
.I min_complete
Packit d3489f
event completions before returning.  If the io_uring instance was
Packit d3489f
configured for polling, by specifying
Packit d3489f
.B IORING_SETUP_IOPOLL
Packit d3489f
in the call to
Packit d3489f
.BR io_uring_setup (2),
Packit d3489f
then min_complete has a slightly different meaning.  Passing a value
Packit d3489f
of 0 instructs the kernel to return any events which are already complete,
Packit d3489f
without blocking.  If
Packit d3489f
.I min_complete
Packit d3489f
is a non-zero value, the kernel will still return immediately if any
Packit d3489f
completion events are available.  If no event completions are
Packit d3489f
available, then the call will poll either until one or more
Packit d3489f
completions become available, or until the process has exceeded its
Packit d3489f
scheduler time slice.
Packit d3489f
Packit d3489f
Note that, for interrupt driven I/O (where
Packit d3489f
.B IORING_SETUP_IOPOLL
Packit d3489f
was not specified in the call to
Packit d3489f
.BR io_uring_setup (2)),
Packit d3489f
an application may check the completion queue for event completions
Packit d3489f
without entering the kernel at all.
Packit d3489f
.PP
Packit d3489f
When the system call returns that a certain amount of SQEs have been
Packit d3489f
consumed and submitted, it's safe to reuse SQE entries in the ring. This is
Packit d3489f
true even if the actual IO submission had to be punted to async context,
Packit d3489f
which means that the SQE may in fact not have been submitted yet. If the
Packit d3489f
kernel requires later use of a particular SQE entry, it will have made a
Packit d3489f
private copy of it.
Packit d3489f
Packit d3489f
.I sig
Packit d3489f
is a pointer to a signal mask (see
Packit d3489f
.BR sigprocmask (2));
Packit d3489f
if
Packit d3489f
.I sig
Packit d3489f
is not NULL,
Packit d3489f
.BR io_uring_enter ()
Packit d3489f
first replaces the current signal mask by the one pointed to by
Packit d3489f
.IR sig ,
Packit d3489f
then waits for events to become available in the completion queue, and
Packit d3489f
then restores the original signal mask.  The following
Packit d3489f
.BR io_uring_enter ()
Packit d3489f
call:
Packit d3489f
.PP
Packit d3489f
.in +4n
Packit d3489f
.EX
Packit d3489f
ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, &sig);
Packit d3489f
.EE
Packit d3489f
.in
Packit d3489f
.PP
Packit d3489f
is equivalent to
Packit d3489f
.I atomically
Packit d3489f
executing the following calls:
Packit d3489f
.PP
Packit d3489f
.in +4n
Packit d3489f
.EX
Packit d3489f
pthread_sigmask(SIG_SETMASK, &sig, &orig);
Packit d3489f
ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, NULL);
Packit d3489f
pthread_sigmask(SIG_SETMASK, &orig, NULL);
Packit d3489f
.EE
Packit d3489f
.in
Packit d3489f
.PP
Packit d3489f
See the description of
Packit d3489f
.BR pselect (2)
Packit d3489f
for an explanation of why the
Packit d3489f
.I sig
Packit d3489f
parameter is necessary.
Packit d3489f
Packit d3489f
Submission queue entries are represented using the following data
Packit d3489f
structure:
Packit d3489f
.PP
Packit d3489f
.in +4n
Packit d3489f
.EX
Packit d3489f
/*
Packit d3489f
 * IO submission data structure (Submission Queue Entry)
Packit d3489f
 */
Packit d3489f
struct io_uring_sqe {
Packit d3489f
    __u8    opcode;         /* type of operation for this sqe */
Packit d3489f
    __u8    flags;          /* IOSQE_ flags */
Packit d3489f
    __u16   ioprio;         /* ioprio for the request */
Packit d3489f
    __s32   fd;             /* file descriptor to do IO on */
Packit d3489f
    union {
Packit d3489f
        __u64   off;            /* offset into file */
Packit d3489f
        __u64   addr2;
Packit d3489f
    };
Packit d3489f
    union {
Packit d3489f
        __u64   addr;       /* pointer to buffer or iovecs */
Packit d3489f
        __u64   splice_off_in;
Packit d3489f
    }
Packit d3489f
    __u32   len;            /* buffer size or number of iovecs */
Packit d3489f
    union {
Packit d3489f
        __kernel_rwf_t  rw_flags;
Packit d3489f
        __u32    fsync_flags;
Packit d3489f
        __u16    poll_events;   /* compatibility */
Packit d3489f
        __u32    poll32_events; /* word-reversed for BE */
Packit d3489f
        __u32    sync_range_flags;
Packit d3489f
        __u32    msg_flags;
Packit d3489f
        __u32    timeout_flags;
Packit d3489f
        __u32    accept_flags;
Packit d3489f
        __u32    cancel_flags;
Packit d3489f
        __u32    open_flags;
Packit d3489f
        __u32    statx_flags;
Packit d3489f
        __u32    fadvise_advice;
Packit d3489f
        __u32    splice_flags;
Packit d3489f
    };
Packit d3489f
    __u64    user_data;     /* data to be passed back at completion time */
Packit d3489f
    union {
Packit d3489f
	struct {
Packit d3489f
	    /* index into fixed buffers, if used */
Packit d3489f
            union {
Packit d3489f
                /* index into fixed buffers, if used */
Packit d3489f
                __u16    buf_index;
Packit d3489f
                /* for grouped buffer selection */
Packit d3489f
                __u16    buf_group;
Packit d3489f
            }
Packit d3489f
	    /* personality to use, if used */
Packit d3489f
	    __u16    personality;
Packit d3489f
            __s32    splice_fd_in;
Packit d3489f
	};
Packit d3489f
        __u64    __pad2[3];
Packit d3489f
    };
Packit d3489f
};
Packit d3489f
.EE
Packit d3489f
.in
Packit d3489f
.PP
Packit d3489f
The
Packit d3489f
.I opcode
Packit d3489f
describes the operation to be performed.  It can be one of:
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_NOP
Packit d3489f
Do not perform any I/O.  This is useful for testing the performance of
Packit d3489f
the io_uring implementation itself.
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_READV
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_WRITEV
Packit d3489f
Vectored read and write operations, similar to
Packit d3489f
.BR preadv2 (2)
Packit d3489f
and
Packit d3489f
.BR pwritev2 (2).
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_READ_FIXED
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_WRITE_FIXED
Packit d3489f
Read from or write to pre-mapped buffers.  See
Packit d3489f
.BR io_uring_register (2)
Packit d3489f
for details on how to setup a context for fixed reads and writes.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_FSYNC
Packit d3489f
File sync.  See also
Packit d3489f
.BR fsync (2).
Packit d3489f
Note that, while I/O is initiated in the order in which it appears in
Packit d3489f
the submission queue, completions are unordered.  For example, an
Packit d3489f
application which places a write I/O followed by an fsync in the
Packit d3489f
submission queue cannot expect the fsync to apply to the write.  The
Packit d3489f
two operations execute in parallel, so the fsync may complete before
Packit d3489f
the write is issued to the storage.  The same is also true for
Packit d3489f
previously issued writes that have not completed prior to the fsync.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_POLL_ADD
Packit d3489f
Poll the
Packit d3489f
.I fd
Packit d3489f
specified in the submission queue entry for the events
Packit d3489f
specified in the
Packit d3489f
.I poll_events
Packit d3489f
field.  Unlike poll or epoll without
Packit d3489f
.BR EPOLLONESHOT ,
Packit d3489f
this interface always works in one shot mode.  That is, once the poll
Packit d3489f
operation is completed, it will have to be resubmitted.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_POLL_REMOVE
Packit d3489f
Remove an existing poll request.  If found, the
Packit d3489f
.I res
Packit d3489f
field of the
Packit d3489f
.I "struct io_uring_cqe"
Packit d3489f
will contain 0.  If not found,
Packit d3489f
.I res
Packit d3489f
will contain
Packit d3489f
.B -ENOENT.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_EPOLL_CTL
Packit d3489f
Add, remove or modify entries in the interest list of
Packit d3489f
.BR epoll (7).
Packit d3489f
See
Packit d3489f
.BR epoll_ctl (2)
Packit d3489f
for details of the system call.
Packit d3489f
.I fd
Packit d3489f
holds the file descriptor that represents the epoll instance,
Packit d3489f
.I addr
Packit d3489f
holds the file descriptor to add, remove or modify,
Packit d3489f
.I len
Packit d3489f
holds the operation (EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD) to perform and,
Packit d3489f
.I off
Packit d3489f
holds a pointer to the
Packit d3489f
.I epoll_events
Packit d3489f
structure. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_SYNC_FILE_RANGE
Packit d3489f
Issue the equivalent of a \fBsync_file_range\fR (2) on the file descriptor. The
Packit d3489f
.I fd
Packit d3489f
field is the file descriptor to sync, the
Packit d3489f
.I off
Packit d3489f
field holds the offset in bytes, the
Packit d3489f
.I len
Packit d3489f
field holds the length in bytes, and the
Packit d3489f
.I sync_range_flags
Packit d3489f
field holds the flags for the command. See also
Packit d3489f
.BR sync_file_range (2)
Packit d3489f
for the general description of the related system call. Available since 5.2.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_SENDMSG
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR sendmsg(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
must be set to the socket file descriptor,
Packit d3489f
.I addr
Packit d3489f
must contain a pointer to the msghdr structure, and
Packit d3489f
.I msg_flags
Packit d3489f
holds the flags associated with the system call. See also
Packit d3489f
.BR sendmsg (2)
Packit d3489f
for the general description of the related system call. Available since 5.3.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_RECVMSG
Packit d3489f
Works just like IORING_OP_SENDMSG, except for
Packit d3489f
.BR recvmsg(2)
Packit d3489f
instead. See the description of IORING_OP_SENDMSG. Available since 5.3.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_SEND
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR send(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
must be set to the socket file descriptor,
Packit d3489f
.I addr
Packit d3489f
must contain a pointer to the buffer,
Packit d3489f
.I len
Packit d3489f
denotes the length of the buffer to send, and
Packit d3489f
.I msg_flags
Packit d3489f
holds the flags associated with the system call. See also
Packit d3489f
.BR send(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_RECV
Packit d3489f
Works just like IORING_OP_SEND, except for
Packit d3489f
.BR recv(2)
Packit d3489f
instead. See the description of IORING_OP_SEND. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_TIMEOUT
Packit d3489f
This command will register a timeout operation. The
Packit d3489f
.I addr
Packit d3489f
field must contain a pointer to a struct timespec64 structure,
Packit d3489f
.I len
Packit d3489f
must contain 1 to signify one timespec64 structure,
Packit d3489f
.I timeout_flags
Packit d3489f
may contain IORING_TIMEOUT_ABS
Packit d3489f
for an absolute timeout value, or 0 for a relative timeout.
Packit d3489f
.I off
Packit d3489f
may contain a completion event count. A timeout
Packit d3489f
will trigger a wakeup event on the completion ring for anyone waiting for
Packit d3489f
events. A timeout condition is met when either the specified timeout expires,
Packit d3489f
or the specified number of events have completed. Either condition will
Packit d3489f
trigger the event. If set to 0, completed events are not counted, which
Packit d3489f
effectively acts like a timer. io_uring timeouts use the
Packit d3489f
.B CLOCK_MONOTONIC
Packit d3489f
clock source. The request will complete with
Packit d3489f
.I -ETIME
Packit d3489f
if the timeout got completed through expiration of the timer, or
Packit d3489f
.I 0
Packit d3489f
if the timeout got completed through requests completing on their own. If
Packit d3489f
the timeout was cancelled before it expired, the request will complete with
Packit d3489f
.I -ECANCELED.
Packit d3489f
Available since 5.4.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_TIMEOUT_REMOVE
Packit d3489f
Attempt to remove an existing timeout operation.
Packit d3489f
.I addr
Packit d3489f
must contain the
Packit d3489f
.I user_data
Packit d3489f
field of the previously issued timeout operation. If the specified timeout
Packit d3489f
request is found and cancelled successfully, this request will terminate
Packit d3489f
with a result value of
Packit d3489f
.I 0
Packit d3489f
If the timeout request was found but expiration was already in progress,
Packit d3489f
this request will terminate with a result value of
Packit d3489f
.I -EBUSY
Packit d3489f
If the timeout request wasn't found, the request will terminate with a result
Packit d3489f
value of
Packit d3489f
.I -ENOENT
Packit d3489f
Available since 5.5.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_ACCEPT
Packit d3489f
Issue the equivalent of an
Packit d3489f
.BR accept4(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
must be set to the socket file descriptor,
Packit d3489f
.I addr
Packit d3489f
must contain the pointer to the sockaddr structure, and
Packit d3489f
.I addr2
Packit d3489f
must contain a pointer to the socklen_t addrlen field. See also
Packit d3489f
.BR accept4(2)
Packit d3489f
for the general description of the related system call. Available since 5.5.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_ASYNC_CANCEL
Packit d3489f
Attempt to cancel an already issued request.
Packit d3489f
.I addr
Packit d3489f
must contain the
Packit d3489f
.I user_data
Packit d3489f
field of the request that should be cancelled. The cancellation request will
Packit d3489f
complete with one of the following results codes. If found, the
Packit d3489f
.I res
Packit d3489f
field of the cqe will contain 0. If not found,
Packit d3489f
.I res
Packit d3489f
will contain -ENOENT. If found and attempted cancelled, the
Packit d3489f
.I res
Packit d3489f
field will contain -EALREADY. In this case, the request may or may not
Packit d3489f
terminate. In general, requests that are interruptible (like socket IO) will
Packit d3489f
get cancelled, while disk IO requests cannot be cancelled if already started.
Packit d3489f
Available since 5.5.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_LINK_TIMEOUT
Packit d3489f
This request must be linked with another request through
Packit d3489f
.I IOSQE_IO_LINK
Packit d3489f
which is described below. Unlike
Packit d3489f
.I IORING_OP_TIMEOUT,
Packit d3489f
.I IORING_OP_LINK_TIMEOUT
Packit d3489f
acts on the linked request, not the completion queue. The format of the command
Packit d3489f
is otherwise like
Packit d3489f
.I IORING_OP_TIMEOUT,
Packit d3489f
except there's no completion event count as it's tied to a specific request.
Packit d3489f
If used, the timeout specified in the command will cancel the linked command,
Packit d3489f
unless the linked command completes before the timeout. The timeout will
Packit d3489f
complete with
Packit d3489f
.I -ETIME
Packit d3489f
if the timer expired and the linked request was attempted cancelled, or
Packit d3489f
.I -ECANCELED
Packit d3489f
if the timer got cancelled because of completion of the linked request. Like
Packit d3489f
.B IORING_OP_TIMEOUT
Packit d3489f
the clock source used is
Packit d3489f
.B CLOCK_MONOTONIC
Packit d3489f
Available since 5.5.
Packit d3489f
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_CONNECT
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR connect(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
must be set to the socket file descriptor,
Packit d3489f
.I addr
Packit d3489f
must contain the const pointer to the sockaddr structure, and
Packit d3489f
.I off
Packit d3489f
must contain the socklen_t addrlen field. See also
Packit d3489f
.BR connect(2)
Packit d3489f
for the general description of the related system call. Available since 5.5.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_FALLOCATE
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR fallocate(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
must be set to the file descriptor,
Packit d3489f
.I off
Packit d3489f
must contain the offset on which to operate, and
Packit d3489f
.I len
Packit d3489f
must contain the length. See also
Packit d3489f
.BR fallocate(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_FADVISE
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR posix_fadvise(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
must be set to the file descriptor,
Packit d3489f
.I off
Packit d3489f
must contain the offset on which to operate,
Packit d3489f
.I len
Packit d3489f
must contain the length, and
Packit d3489f
.I fadvise_advice
Packit d3489f
must contain the advice associated with the operation. See also
Packit d3489f
.BR posix_fadvise(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_MADVISE
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR madvise(2)
Packit d3489f
system call.
Packit d3489f
.I addr
Packit d3489f
must contain the address to operate on,
Packit d3489f
.I len
Packit d3489f
must contain the length on which to operate,
Packit d3489f
and
Packit d3489f
.I fadvise_advice
Packit d3489f
must contain the advice associated with the operation. See also
Packit d3489f
.BR madvise(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_OPENAT
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR openat(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
is the
Packit d3489f
.I dirfd
Packit d3489f
argument,
Packit d3489f
.I addr
Packit d3489f
must contain a pointer to the
Packit d3489f
.I *pathname
Packit d3489f
argument,
Packit d3489f
.I open_flags
Packit d3489f
should contain any flags passed in, and
Packit d3489f
.I mode
Packit d3489f
is access mode of the file. See also
Packit d3489f
.BR openat(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_OPENAT2
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR openat2(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
is the
Packit d3489f
.I dirfd
Packit d3489f
argument,
Packit d3489f
.I addr
Packit d3489f
must contain a pointer to the
Packit d3489f
.I *pathname
Packit d3489f
argument,
Packit d3489f
.I len
Packit d3489f
should contain the size of the open_how structure, and
Packit d3489f
.I off
Packit d3489f
should be set to the address of the open_how structure. See also
Packit d3489f
.BR openat2(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_CLOSE
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR close(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
is the file descriptor to be closed. See also
Packit d3489f
.BR close(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_STATX
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR statx(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
is the
Packit d3489f
.I dirfd
Packit d3489f
argument,
Packit d3489f
.I addr
Packit d3489f
must contain a pointer to the
Packit d3489f
.I *pathname
Packit d3489f
string,
Packit d3489f
.I statx_flags
Packit d3489f
is the
Packit d3489f
.I flags
Packit d3489f
argument,
Packit d3489f
.I len
Packit d3489f
should be the
Packit d3489f
.I mask
Packit d3489f
argument, and
Packit d3489f
.I off
Packit d3489f
must contain a pointer to the
Packit d3489f
.I statxbuf
Packit d3489f
to be filled in. See also
Packit d3489f
.BR statx(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_READ
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_WRITE
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR read(2)
Packit d3489f
or
Packit d3489f
.BR write(2)
Packit d3489f
system call.
Packit d3489f
.I fd
Packit d3489f
is the file descriptor to be operated on,
Packit d3489f
.I addr
Packit d3489f
contains the buffer in question, and
Packit d3489f
.I len
Packit d3489f
contains the length of the IO operation. These are non-vectored versions of the
Packit d3489f
.B IORING_OP_READV
Packit d3489f
and
Packit d3489f
.B IORING_OP_WRITEV
Packit d3489f
opcodes. See also
Packit d3489f
.BR read(2)
Packit d3489f
and
Packit d3489f
.BR write(2)
Packit d3489f
for the general description of the related system call. Available since 5.6.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_SPLICE
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR splice(2)
Packit d3489f
system call.
Packit d3489f
.I splice_fd_in
Packit d3489f
is the file descriptor to read from,
Packit d3489f
.I splice_off_in
Packit d3489f
is an offset to read from,
Packit d3489f
.I fd
Packit d3489f
is the file descriptor to write to,
Packit d3489f
.I off
Packit d3489f
is an offset from which to start writing to. A sentinel value of -1 is used
Packit d3489f
to pass the equivalent of a NULL for the offsets to
Packit d3489f
.BR splice(2).
Packit d3489f
.I len
Packit d3489f
contains the number of bytes to copy.
Packit d3489f
.I splice_flags
Packit d3489f
contains a bit mask for the flag field associated with the system call.
Packit d3489f
Please note that one of the file descriptors must refer to a pipe.
Packit d3489f
See also
Packit d3489f
.BR splice(2)
Packit d3489f
for the general description of the related system call. Available since 5.7.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_TEE
Packit d3489f
Issue the equivalent of a
Packit d3489f
.BR tee(2)
Packit d3489f
system call.
Packit d3489f
.I splice_fd_in
Packit d3489f
is the file descriptor to read from,
Packit d3489f
.I fd
Packit d3489f
is the file descriptor to write to,
Packit d3489f
.I len
Packit d3489f
contains the number of bytes to copy, and
Packit d3489f
.I splice_flags
Packit d3489f
contains a bit mask for the flag field associated with the system call.
Packit d3489f
Please note that both of the file descriptors must refer to a pipe.
Packit d3489f
See also
Packit d3489f
.BR tee(2)
Packit d3489f
for the general description of the related system call. Available since 5.8.
Packit d3489f
Packit d3489f
.TP
Packit d3489f
.B IORING_OP_FILES_UPDATE
Packit d3489f
This command is an alternative to using
Packit d3489f
.B IORING_REGISTER_FILES_UPDATE
Packit d3489f
which then works in an async fashion, like the rest of the io_uring commands.
Packit d3489f
The arguments passed in are the same.
Packit d3489f
.I addr
Packit d3489f
must contain a pointer to the array of file descriptors,
Packit d3489f
.I len
Packit d3489f
must contain the length of the array, and
Packit d3489f
.I off
Packit d3489f
must contain the offset at which to operate. Note that the array of file
Packit d3489f
descriptors pointed to in
Packit d3489f
.I addr
Packit d3489f
must remain valid until this operation has completed. Available since 5.6.
Packit d3489f
Packit d3489f
.PP
Packit d3489f
The
Packit d3489f
.I flags
Packit d3489f
field is a bit mask. The supported flags are:
Packit d3489f
.TP
Packit d3489f
.B IOSQE_FIXED_FILE
Packit d3489f
When this flag is specified,
Packit d3489f
.I fd
Packit d3489f
is an index into the files array registered with the io_uring instance (see the
Packit d3489f
.B IORING_REGISTER_FILES
Packit d3489f
section of the
Packit d3489f
.BR io_uring_register (2)
Packit d3489f
man page). Available since 5.1.
Packit d3489f
.TP
Packit d3489f
.B IOSQE_IO_DRAIN
Packit d3489f
When this flag is specified, the SQE will not be started before previously
Packit d3489f
submitted SQEs have completed, and new SQEs will not be started before this
Packit d3489f
one completes. Available since 5.2.
Packit d3489f
.TP
Packit d3489f
.B IOSQE_IO_LINK
Packit d3489f
When this flag is specified, it forms a link with the next SQE in the
Packit d3489f
submission ring. That next SQE will not be started before this one completes.
Packit d3489f
This, in effect, forms a chain of SQEs, which can be arbitrarily long. The tail
Packit d3489f
of the chain is denoted by the first SQE that does not have this flag set.
Packit d3489f
This flag has no effect on previous SQE submissions, nor does it impact SQEs
Packit d3489f
that are outside of the chain tail. This means that multiple chains can be
Packit d3489f
executing in parallel, or chains and individual SQEs. Only members inside the
Packit d3489f
chain are serialized. A chain of SQEs will be broken, if any request in that
Packit d3489f
chain ends in error. io_uring considers any unexpected result an error. This
Packit d3489f
means that, eg, a short read will also terminate the remainder of the chain.
Packit d3489f
If a chain of SQE links is broken, the remaining unstarted part of the chain
Packit d3489f
will be terminated and completed with
Packit d3489f
.B -ECANCELED
Packit d3489f
as the error code. Available since 5.3.
Packit d3489f
.TP
Packit d3489f
.B IOSQE_IO_HARDLINK
Packit d3489f
Like IOSQE_IO_LINK, but it doesn't sever regardless of the completion result.
Packit d3489f
Note that the link will still sever if we fail submitting the parent request,
Packit d3489f
hard links are only resilient in the presence of completion results for
Packit d3489f
requests that did submit correctly. IOSQE_IO_HARDLINK implies IOSQE_IO_LINK.
Packit d3489f
Available since 5.5.
Packit d3489f
.TP
Packit d3489f
.B IOSQE_ASYNC
Packit d3489f
Normal operation for io_uring is to try and issue an sqe as non-blocking first,
Packit d3489f
and if that fails, execute it in an async manner. To support more efficient
Packit d3489f
overlapped operation of requests that the application knows/assumes will
Packit d3489f
always (or most of the time) block, the application can ask for an sqe to be
Packit d3489f
issued async from the start. Available since 5.6.
Packit d3489f
Packit d3489f
Packit d3489f
.PP
Packit d3489f
.I ioprio
Packit d3489f
specifies the I/O priority.  See
Packit d3489f
.BR ioprio_get (2)
Packit d3489f
for a description of Linux I/O priorities.
Packit d3489f
Packit d3489f
.I fd
Packit d3489f
specifies the file descriptor against which the operation will be
Packit d3489f
performed, with the exception noted above.
Packit d3489f
Packit d3489f
If the operation is one of
Packit d3489f
.B IORING_OP_READ_FIXED
Packit d3489f
or
Packit d3489f
.BR IORING_OP_WRITE_FIXED ,
Packit d3489f
.I addr
Packit d3489f
and
Packit d3489f
.I len
Packit d3489f
must fall within the buffer located at
Packit d3489f
.I buf_index
Packit d3489f
in the fixed buffer array.  If the operation is either
Packit d3489f
.B IORING_OP_READV
Packit d3489f
or
Packit d3489f
.BR IORING_OP_WRITEV ,
Packit d3489f
then
Packit d3489f
.I addr
Packit d3489f
points to an iovec array of
Packit d3489f
.I len
Packit d3489f
entries.
Packit d3489f
Packit d3489f
.IR rw_flags ,
Packit d3489f
specified for read and write operations, contains a bitwise OR of
Packit d3489f
per-I/O flags, as described in the
Packit d3489f
.BR preadv2 (2)
Packit d3489f
man page.
Packit d3489f
Packit d3489f
The
Packit d3489f
.I fsync_flags
Packit d3489f
bit mask may contain either 0, for a normal file integrity sync, or
Packit d3489f
.B IORING_FSYNC_DATASYNC
Packit d3489f
to provide data sync only semantics.  See the descriptions of
Packit d3489f
.B O_SYNC
Packit d3489f
and
Packit d3489f
.B O_DSYNC
Packit d3489f
in the
Packit d3489f
.BR open (2)
Packit d3489f
manual page for more information.
Packit d3489f
Packit d3489f
The bits that may be set in
Packit d3489f
.I poll_events
Packit d3489f
are defined in \fI<poll.h>\fP, and documented in
Packit d3489f
.BR poll (2).
Packit d3489f
Packit d3489f
.I user_data
Packit d3489f
is an application-supplied value that will be copied into
Packit d3489f
the completion queue entry (see below).
Packit d3489f
.I buf_index
Packit d3489f
is an index into an array of fixed buffers, and is only valid if fixed
Packit d3489f
buffers were registered.
Packit d3489f
.I personality
Packit d3489f
is the credentials id to use for this operation. See
Packit d3489f
.BR io_uring_register(2)
Packit d3489f
for how to register personalities with io_uring. If set to 0, the current
Packit d3489f
personality of the submitting task is used.
Packit d3489f
.PP
Packit d3489f
Once the submission queue entry is initialized, I/O is submitted by
Packit d3489f
placing the index of the submission queue entry into the tail of the
Packit d3489f
submission queue.  After one or more indexes are added to the queue,
Packit d3489f
and the queue tail is advanced, the
Packit d3489f
.BR io_uring_enter (2)
Packit d3489f
system call can be invoked to initiate the I/O.
Packit d3489f
Packit d3489f
Completions use the following data structure:
Packit d3489f
.PP
Packit d3489f
.in +4n
Packit d3489f
.EX
Packit d3489f
/*
Packit d3489f
 * IO completion data structure (Completion Queue Entry)
Packit d3489f
 */
Packit d3489f
struct io_uring_cqe {
Packit d3489f
    __u64    user_data; /* sqe->data submission passed back */
Packit d3489f
    __s32    res;       /* result code for this event */
Packit d3489f
    __u32    flags;
Packit d3489f
};
Packit d3489f
.EE
Packit d3489f
.in
Packit d3489f
.PP
Packit d3489f
.I user_data
Packit d3489f
is copied from the field of the same name in the submission queue
Packit d3489f
entry.  The primary use case is to store data that the application
Packit d3489f
will need to access upon completion of this particular I/O.  The
Packit d3489f
.I flags
Packit d3489f
is reserved for future use.
Packit d3489f
.I res
Packit d3489f
is the operation-specific result.
Packit d3489f
.PP
Packit d3489f
For read and write opcodes, the
Packit d3489f
return values match those documented in the
Packit d3489f
.BR preadv2 (2)
Packit d3489f
and
Packit d3489f
.BR pwritev2 (2)
Packit d3489f
man pages.
Packit d3489f
Return codes for the io_uring-specific opcodes are documented in the
Packit d3489f
description of the opcodes above.
Packit d3489f
.PP
Packit d3489f
.SH RETURN VALUE
Packit d3489f
.BR io_uring_enter ()
Packit d3489f
returns the number of I/Os successfully consumed.  This can be zero
Packit d3489f
if
Packit d3489f
.I to_submit
Packit d3489f
was zero or if the submission queue was empty. The errors below that refer to
Packit d3489f
an error in a submission queue entry will be returned though a completion queue
Packit d3489f
entry, rather than through the system call itself.
Packit d3489f
Packit d3489f
Errors that occur not on behalf of a submission queue entry are returned via the
Packit d3489f
system call directly. On such an error, -1 is returned and
Packit d3489f
.I errno
Packit d3489f
is set appropriately.
Packit d3489f
.PP
Packit d3489f
.SH ERRORS
Packit d3489f
.TP
Packit d3489f
.B EAGAIN
Packit d3489f
The kernel was unable to allocate memory for the request, or otherwise ran out
Packit d3489f
of resources to handle it. The application should wait for some completions and
Packit d3489f
try again.
Packit d3489f
.TP
Packit d3489f
.B EBUSY
Packit d3489f
The application is attempting to overcommit the number of requests it can have
Packit d3489f
pending. The application should wait for some completions and try again. May
Packit d3489f
occur if the application tries to queue more requests than we have room for in
Packit d3489f
the CQ ring.
Packit d3489f
.TP
Packit d3489f
.B EBADF
Packit d3489f
The
Packit d3489f
.I fd
Packit d3489f
field in the submission queue entry is invalid, or the
Packit d3489f
.B IOSQE_FIXED_FILE
Packit d3489f
flag was set in the submission queue entry, but no files were registered
Packit d3489f
with the io_uring instance.
Packit d3489f
.TP
Packit d3489f
.B EFAULT
Packit d3489f
buffer is outside of the process' accessible address space
Packit d3489f
.TP
Packit d3489f
.B EFAULT
Packit d3489f
.B IORING_OP_READ_FIXED
Packit d3489f
or
Packit d3489f
.B IORING_OP_WRITE_FIXED
Packit d3489f
was specified in the
Packit d3489f
.I opcode
Packit d3489f
field of the submission queue entry, but either buffers were not
Packit d3489f
registered for this io_uring instance, or the address range described
Packit d3489f
by
Packit d3489f
.I addr
Packit d3489f
and
Packit d3489f
.I len
Packit d3489f
does not fit within the buffer registered at
Packit d3489f
.IR buf_index .
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
The
Packit d3489f
.I index
Packit d3489f
member of the submission queue entry is invalid.
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
The
Packit d3489f
.I flags
Packit d3489f
field or
Packit d3489f
.I opcode
Packit d3489f
in a submission queue entry is invalid.
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
.B IORING_OP_NOP
Packit d3489f
was specified in the submission queue entry, but the io_uring context
Packit d3489f
was setup for polling
Packit d3489f
.RB ( IORING_SETUP_IOPOLL
Packit d3489f
was specified in the call to io_uring_setup).
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
.B IORING_OP_READV
Packit d3489f
or
Packit d3489f
.B IORING_OP_WRITEV
Packit d3489f
was specified in the submission queue entry, but the io_uring instance
Packit d3489f
has fixed buffers registered.
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
.B IORING_OP_READ_FIXED
Packit d3489f
or
Packit d3489f
.B IORING_OP_WRITE_FIXED
Packit d3489f
was specified in the submission queue entry, and the
Packit d3489f
.I buf_index
Packit d3489f
is invalid.
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
.BR IORING_OP_READV ,
Packit d3489f
.BR IORING_OP_WRITEV ,
Packit d3489f
.BR IORING_OP_READ_FIXED ,
Packit d3489f
.B IORING_OP_WRITE_FIXED
Packit d3489f
or
Packit d3489f
.B IORING_OP_FSYNC
Packit d3489f
was specified in the submission queue entry, but the io_uring instance
Packit d3489f
was configured for IOPOLLing, or any of
Packit d3489f
.IR addr ,
Packit d3489f
.IR ioprio ,
Packit d3489f
.IR off ,
Packit d3489f
.IR len ,
Packit d3489f
or
Packit d3489f
.I buf_index
Packit d3489f
was set in the submission queue entry.
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
.B IORING_OP_POLL_ADD
Packit d3489f
or
Packit d3489f
.B IORING_OP_POLL_REMOVE
Packit d3489f
was specified in the
Packit d3489f
.I opcode
Packit d3489f
field of the submission queue entry, but the io_uring instance was
Packit d3489f
configured for busy-wait polling
Packit d3489f
.RB ( IORING_SETUP_IOPOLL ),
Packit d3489f
or any of
Packit d3489f
.IR ioprio ,
Packit d3489f
.IR off ,
Packit d3489f
.IR len ,
Packit d3489f
or
Packit d3489f
.I buf_index
Packit d3489f
was non-zero in the submission queue entry.
Packit d3489f
.TP
Packit d3489f
.B EINVAL
Packit d3489f
.B IORING_OP_POLL_ADD
Packit d3489f
was specified in the
Packit d3489f
.I opcode
Packit d3489f
field of the submission queue entry, and the
Packit d3489f
.I addr
Packit d3489f
field was non-zero.
Packit d3489f
.TP
Packit d3489f
.B ENXIO
Packit d3489f
The io_uring instance is in the process of being torn down.
Packit d3489f
.TP
Packit d3489f
.B EOPNOTSUPP
Packit d3489f
.I fd
Packit d3489f
does not refer to an io_uring instance.
Packit d3489f
.TP
Packit d3489f
.B EOPNOTSUPP
Packit d3489f
.I opcode
Packit d3489f
is valid, but not supported by this kernel.
Packit d3489f
.TP
Packit d3489f
.B EINTR
Packit d3489f
The operation was interrupted by a delivery of a signal before it could
Packit d3489f
complete; see
Packit d3489f
.BR signal(7).
Packit d3489f
Can happen while waiting for events with
Packit d3489f
.B IORING_ENTER_GETEVENTS.