diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5faee05 --- /dev/null +++ b/.gitignore @@ -0,0 +1,103 @@ +*.rej +*.orig +*~ +/*.patch + +*.o +*.o[ls] + +/src/liburing.a +/src/liburing.so* +/src/include/liburing/compat.h + +/examples/io_uring-cp +/examples/io_uring-test +/examples/link-cp +/examples/ucontext-cp + +/test/232c93d07b74-test +/test/35fa71a030ca-test +/test/500f9fbadef8-test +/test/7ad0e4b2f83c-test +/test/8a9973408177-test +/test/917257daa0fe-test +/test/a0908ae19763-test +/test/a4c0b3decb33-test +/test/accept +/test/accept-link +/test/accept-reuse +/test/accept-test +/test/across-fork +/test/b19062a56726-test +/test/b5837bd5311d-test +/test/ce593a6c480a-test +/test/connect +/test/close-opath +/test/cq-full +/test/cq-overflow +/test/cq-peek-batch +/test/cq-ready +/test/cq-size +/test/d4ae271dfaae-test +/test/d77a67ed5f27-test +/test/defer +/test/eeed8b54e0df-test +/test/eventfd +/test/eventfd-disable +/test/eventfd-ring +/test/fadvise +/test/fallocate +/test/fc2a85cb02ef-test +/test/file-register +/test/file-update +/test/fixed-link +/test/fsync +/test/io_uring_enter +/test/io_uring_register +/test/io_uring_setup +/test/io-cancel +/test/lfs-openat +/test/lfs-openat-write +/test/link +/test/link-timeout +/test/link_drain +/test/madvise +/test/nop +/test/open-close +/test/openat2 +/test/personality +/test/poll +/test/poll-cancel +/test/poll-cancel-ton +/test/poll-link +/test/poll-many +/test/poll-v-poll +/test/probe +/test/read-write +/test/ring-leak +/test/send_recv +/test/send_recvmsg +/test/shared-wq +/test/short-read +/test/socket-rw +/test/splice +/test/sq-full +/test/sq-full-cpp +/test/sq-poll-kthread +/test/sq-space_left +/test/statx +/test/stdout +/test/submit-reuse +/test/teardowns +/test/timeout +/test/timeout-overflow +/test/iopoll +/test/cq-overflow-peek +/test/config.local +/test/*.dmesg + +config-host.h +config-host.mak +config.log + +liburing.pc diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e02fdd0 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,22 @@ +language: cpp +os: + - linux +compiler: + - clang + - gcc +env: + matrix: + - BUILD_ARCH="x86" + - BUILD_ARCH="x86_64" + global: + - MAKEFLAGS="-j 2" +matrix: + exclude: + - os: linux + compiler: clang + env: BUILD_ARCH="x86" # Only do the gcc x86 build to reduce clutter +before_install: + - EXTRA_CFLAGS="-Werror" +script: + - ./configure && make + - sudo make runtests || true diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..e5ab03e --- /dev/null +++ b/COPYING @@ -0,0 +1,502 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/COPYING.GPL b/COPYING.GPL new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/COPYING.GPL @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ae941fa --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2020 Jens Axboe + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..948e004 --- /dev/null +++ b/Makefile @@ -0,0 +1,77 @@ +NAME=liburing +SPECFILE=$(NAME).spec +VERSION=$(shell awk '/Version:/ { print $$2 }' $(SPECFILE)) +TAG = $(NAME)-$(VERSION) +RPMBUILD=$(shell `which rpmbuild >&/dev/null` && echo "rpmbuild" || echo "rpm") + +INSTALL=install + +default: all + +all: + @$(MAKE) -C src + @$(MAKE) -C test + @$(MAKE) -C examples + +partcheck: all + @echo "make partcheck => TODO add tests with out kernel support" + +runtests: all + @$(MAKE) -C test runtests +runtests-loop: + @$(MAKE) -C test runtests-loop + +config-host.mak: configure + @if [ ! -e "$@" ]; then \ + echo "Running configure ..."; \ + ./configure; \ + else \ + echo "$@ is out-of-date, running configure"; \ + sed -n "/.*Configured with/s/[^:]*: //p" "$@" | sh; \ + fi + +ifneq ($(MAKECMDGOALS),clean) +include config-host.mak +endif + +%.pc: %.pc.in config-host.mak $(SPECFILE) + sed -e "s%@prefix@%$(prefix)%g" \ + -e "s%@libdir@%$(libdir)%g" \ + -e "s%@includedir@%$(includedir)%g" \ + -e "s%@NAME@%$(NAME)%g" \ + -e "s%@VERSION@%$(VERSION)%g" \ + $< >$@ + +install: $(NAME).pc + @$(MAKE) -C src install prefix=$(DESTDIR)$(prefix) \ + includedir=$(DESTDIR)$(includedir) \ + libdir=$(DESTDIR)$(libdir) \ + libdevdir=$(DESTDIR)$(libdevdir) \ + relativelibdir=$(relativelibdir) + $(INSTALL) -D -m 644 $(NAME).pc $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME).pc + $(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man2 + $(INSTALL) -m 644 man/*.2 $(DESTDIR)$(mandir)/man2 + +install-tests: + @$(MAKE) -C test install prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir) + +clean: + @rm -f config-host.mak config-host.h cscope.out $(NAME).pc test/*.dmesg + @$(MAKE) -C src clean + @$(MAKE) -C test clean + @$(MAKE) -C examples clean + +cscope: + @cscope -b -R + +tag-archive: + @git tag $(TAG) + +create-archive: + @git archive --prefix=$(NAME)-$(VERSION)/ -o $(NAME)-$(VERSION).tar.gz $(TAG) + @echo "The final archive is ./$(NAME)-$(VERSION).tar.gz." + +archive: clean tag-archive create-archive + +srpm: create-archive + $(RPMBUILD) --define "_sourcedir `pwd`" --define "_srcrpmdir `pwd`" --nodeps -bs $(SPECFILE) diff --git a/Makefile.quiet b/Makefile.quiet new file mode 100644 index 0000000..8eac349 --- /dev/null +++ b/Makefile.quiet @@ -0,0 +1,10 @@ +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifndef V + QUIET_CC = @echo ' ' CC $@; + QUIET_LINK = @echo ' ' LINK $@; + QUIET_AR = @echo ' ' AR $@; + QUIET_RANLIB = @echo '' RANLIB $@; +endif +endif + + diff --git a/README b/README new file mode 100644 index 0000000..a76021e --- /dev/null +++ b/README @@ -0,0 +1,46 @@ +liburing +-------- + +This is the io_uring library, liburing. liburing provides helpers to setup and +teardown io_uring instances, and also a simplified interface for +applications that don't need (or want) to deal with the full kernel +side implementation. + +For more info on io_uring, please see: + +https://kernel.dk/io_uring.pdf + +Subscribe to io-uring@vger.kernel.org for io_uring related discussions +and development for both kernel and userspace. The list is archived here: + +https://lore.kernel.org/io-uring/ + + +ulimit settings +--------------- + +io_uring accounts memory it needs under the rlimit memlocked option, which +can be quite low on some setups (64K). The default is usually enough for +most use cases, but bigger rings or things like registered buffers deplete +it quickly. root isn't under this restriction, but regular users are. Going +into detail on how to bump the limit on various systems is beyond the scope +of this little blurb, but check /etc/security/limits.conf for user specific +settings, or /etc/systemd/user.conf and /etc/systemd/system.conf for systemd +setups. + +Regressions tests +----------------- + +The bulk of liburing is actually regression/unit tests for both liburing and +the kernel io_uring support. Please note that this suite isn't expected to +pass on older kernels, and may even crash or hang older kernels! + +License +------- + +All software contained within this repo is dual licensed LGPL and MIT, see +COPYING and LICENSE, except for a header coming from the kernel which is +dual licensed GPL with a Linux-syscall-note exception and MIT, see +COPYING.GPL and . + +Jens Axboe 2020-01-20 diff --git a/configure b/configure new file mode 100755 index 0000000..518a5b0 --- /dev/null +++ b/configure @@ -0,0 +1,368 @@ +#!/bin/sh +# +# set temporary file name +if test ! -z "$TMPDIR" ; then + TMPDIR1="${TMPDIR}" +elif test ! -z "$TEMPDIR" ; then + TMPDIR1="${TEMPDIR}" +else + TMPDIR1="/tmp" +fi + +cc=${CC:-gcc} +cxx=${CXX:-g++} + +for opt do + optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)') + case "$opt" in + --help|-h) show_help=yes + ;; + --prefix=*) prefix="$optarg" + ;; + --includedir=*) includedir="$optarg" + ;; + --libdir=*) libdir="$optarg" + ;; + --libdevdir=*) libdevdir="$optarg" + ;; + --mandir=*) mandir="$optarg" + ;; + --datadir=*) datadir="$optarg" + ;; + --cc=*) cc="$optarg" + ;; + --cxx=*) cxx="$optarg" + ;; + *) + echo "ERROR: unknown option $opt" + echo "Try '$0 --help' for more information" + exit 1 + ;; + esac +done + +if test -z "$prefix"; then + prefix=/usr +fi +if test -z "$includedir"; then + includedir="$prefix/include" +fi +if test -z "$libdir"; then + libdir="$prefix/lib" +fi +if test -z "$libdevdir"; then + libdevdir="$prefix/lib" +fi +if test -z "$mandir"; then + mandir="$prefix/man" +fi +if test -z "$datadir"; then + datadir="$prefix/share" +fi + +if test x"$libdir" = x"$libdevdir"; then + relativelibdir="" +else + relativelibdir="$libdir/" +fi + +if test "$show_help" = "yes"; then +cat < +trap "rm -f $TMPC $TMPC2 $TMPO $TMPE" EXIT INT QUIT TERM + +rm -rf config.log + +config_host_mak="config-host.mak" +config_host_h="config-host.h" + +rm -rf $config_host_mak +rm -rf $config_host_h + +fatal() { + echo $@ + echo "Configure failed, check config.log and/or the above output" + rm -rf $config_host_mak + rm -rf $config_host_h + exit 1 +} + +# Print result for each configuration test +print_config() { + printf "%-30s%s\n" "$1" "$2" +} + +# Default CFLAGS +CFLAGS="-D_GNU_SOURCE -include config-host.h" +BUILD_CFLAGS="" + +# Print configure header at the top of $config_host_h +echo "/*" > $config_host_h +echo " * Automatically generated by configure - do not modify" >> $config_host_h +printf " * Configured with:" >> $config_host_h +printf " * '%s'" "$0" "$@" >> $config_host_h +echo "" >> $config_host_h +echo " */" >> $config_host_h + +echo "# Automatically generated by configure - do not modify" > $config_host_mak +printf "# Configured with:" >> $config_host_mak +printf " '%s'" "$0" "$@" >> $config_host_mak +echo >> $config_host_mak + +do_cxx() { + # Run the compiler, capturing its output to the log. + echo $cxx "$@" >> config.log + $cxx "$@" >> config.log 2>&1 || return $? + return 0 +} + +do_cc() { + # Run the compiler, capturing its output to the log. + echo $cc "$@" >> config.log + $cc "$@" >> config.log 2>&1 || return $? + # Test passed. If this is an --enable-werror build, rerun + # the test with -Werror and bail out if it fails. This + # makes warning-generating-errors in configure test code + # obvious to developers. + if test "$werror" != "yes"; then + return 0 + fi + # Don't bother rerunning the compile if we were already using -Werror + case "$*" in + *-Werror*) + return 0 + ;; + esac + echo $cc -Werror "$@" >> config.log + $cc -Werror "$@" >> config.log 2>&1 && return $? + echo "ERROR: configure test passed without -Werror but failed with -Werror." + echo "This is probably a bug in the configure script. The failing command" + echo "will be at the bottom of config.log." + fatal "You can run configure with --disable-werror to bypass this check." +} + +compile_prog() { + local_cflags="$1" + local_ldflags="$2 $LIBS" + echo "Compiling test case $3" >> config.log + do_cc $CFLAGS $local_cflags -o $TMPE $TMPC $LDFLAGS $local_ldflags +} + +compile_prog_cxx() { + local_cflags="$1" + local_ldflags="$2 $LIBS" + echo "Compiling test case $3" >> config.log + do_cxx $CFLAGS $local_cflags -o $TMPE $TMPC $LDFLAGS $local_ldflags +} + +has() { + type "$1" >/dev/null 2>&1 +} + +output_mak() { + echo "$1=$2" >> $config_host_mak +} + +output_sym() { + output_mak "$1" "y" + echo "#define $1" >> $config_host_h +} + +print_and_output_mak() { + print_config "$1" "$2" + output_mak "$1" "$2" +} +print_and_output_mak "prefix" "$prefix" +print_and_output_mak "includedir" "$includedir" +print_and_output_mak "libdir" "$libdir" +print_and_output_mak "libdevdir" "$libdevdir" +print_and_output_mak "relativelibdir" "$relativelibdir" +print_and_output_mak "mandir" "$mandir" +print_and_output_mak "datadir" "$datadir" + +########################################## +# check for __kernel_rwf_t +__kernel_rwf_t="no" +cat > $TMPC << EOF +#include +int main(int argc, char **argv) +{ + __kernel_rwf_t x; + x = 0; + return x; +} +EOF +if compile_prog "" "" "__kernel_rwf_t"; then + __kernel_rwf_t="yes" +fi +print_config "__kernel_rwf_t" "$__kernel_rwf_t" + +########################################## +# check for __kernel_timespec +__kernel_timespec="no" +cat > $TMPC << EOF +#include +#include +int main(int argc, char **argv) +{ + struct __kernel_timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 1; + return 0; +} +EOF +if compile_prog "" "" "__kernel_timespec"; then + __kernel_timespec="yes" +fi +print_config "__kernel_timespec" "$__kernel_timespec" + +########################################## +# check for open_how +open_how="no" +cat > $TMPC << EOF +#include +#include +#include +#include +int main(int argc, char **argv) +{ + struct open_how how; + how.flags = 0; + how.mode = 0; + how.resolve = 0; + return 0; +} +EOF +if compile_prog "" "" "open_how"; then + open_how="yes" +fi +print_config "open_how" "$open_how" + +########################################## +# check for statx +statx="no" +cat > $TMPC << EOF +#include +#include +#include +#include +#include +#include +int main(int argc, char **argv) +{ + struct statx x; + + return memset(&x, 0, sizeof(x)) != NULL; +} +EOF +if compile_prog "" "" "statx"; then + statx="yes" +fi +print_config "statx" "$statx" + +########################################## +# check for C++ +has_cxx="no" +cat > $TMPC << EOF +#include +int main(int argc, char **argv) +{ + std::cout << "Test"; + return 0; +} +EOF +if compile_prog_cxx "" "" "C++"; then + has_cxx="yes" +fi +print_config "C++" "$has_cxx" + +############################################################################# + +if test "$__kernel_rwf_t" = "yes"; then + output_sym "CONFIG_HAVE_KERNEL_RWF_T" +fi +if test "$__kernel_timespec" = "yes"; then + output_sym "CONFIG_HAVE_KERNEL_TIMESPEC" +fi +if test "$open_how" = "yes"; then + output_sym "CONFIG_HAVE_OPEN_HOW" +fi +if test "$statx" = "yes"; then + output_sym "CONFIG_HAVE_STATX" +fi +if test "$has_cxx" = "yes"; then + output_sym "CONFIG_HAVE_CXX" +fi + +echo "CC=$cc" >> $config_host_mak +print_config "CC" "$cc" +echo "CXX=$cxx" >> $config_host_mak +print_config "CXX" "$cxx" + +# generate compat.h +compat_h="src/include/liburing/compat.h" +cat > $compat_h << EOF +/* SPDX-License-Identifier: MIT */ +#ifndef LIBURING_COMPAT_H +#define LIBURING_COMPAT_H + +EOF + +if test "$__kernel_rwf_t" != "yes"; then +cat >> $compat_h << EOF +typedef int __kernel_rwf_t; + +EOF +fi +if test "$__kernel_timespec" != "yes"; then +cat >> $compat_h << EOF +#include + +struct __kernel_timespec { + int64_t tv_sec; + long long tv_nsec; +}; + +EOF +else +cat >> $compat_h << EOF +#include + +EOF +fi +if test "$open_how" != "yes"; then +cat >> $compat_h << EOF +#include + +struct open_how { + uint64_t flags; + uint64_t mode; + uint64_t resolve; +}; + +EOF +fi + +cat >> $compat_h << EOF +#endif +EOF diff --git a/debian/README.Debian b/debian/README.Debian new file mode 100644 index 0000000..15b9fd0 --- /dev/null +++ b/debian/README.Debian @@ -0,0 +1,7 @@ +liburing for Debian + +The newest Linux IO interface i.e. io_uring, need +userspace library to support it. This package +liburing is the library for io_uring. + + -- Liu Changcheng Thu, 14 Nov 2019 21:35:39 +0800 diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..f01b3a4 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,20 @@ +liburing (0.4-2) stable; urgency=low + + * Fix /usr/lib/*/liburing.so symlink to /lib/*/liburing.so.1.0.4 + + -- Stefan Metzmacher Fri, 07 Feb 2020 15:30:00 +0100 + +liburing (0.4-1) stable; urgency=low + + * Package liburing-0.4 using a packaging layout similar to libaio1 + + -- Stefan Metzmacher Thu, 06 Feb 2020 11:30:00 +0100 + +liburing (0.2-1ubuntu1) stable; urgency=low + + * Initial release. + * commit 4bce856d43ab1f9a64477aa5a8f9f02f53e64b74 + * Author: Jens Axboe + * Date: Mon Nov 11 16:00:58 2019 -0700 + + -- Liu Changcheng Fri, 15 Nov 2019 00:06:46 +0800 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..ec63514 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +9 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..831a314 --- /dev/null +++ b/debian/control @@ -0,0 +1,48 @@ +Source: liburing +Section: libs +Priority: optional +Maintainer: Liu Changcheng +Build-Depends: debhelper (>=9) +Standards-Version: 4.1.4 +Homepage: https://git.kernel.dk/cgit/liburing/tree/README +Vcs-Git: https://git.kernel.dk/liburing +Vcs-Browser: https://git.kernel.dk/cgit/liburing/ + +Package: liburing1 +Architecture: linux-any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: ${misc:Depends}, ${shlibs:Depends} +Description: userspace library for using io_uring + io_uring is kernel feature to improve development + The newese Linux IO interface, io_uring could improve + system performance a lot. liburing is the userpace + library to use io_uring feature. + . + This package contains the shared library. + +Package: liburing1-udeb +Package-Type: udeb +Section: debian-installer +Architecture: linux-any +Depends: ${misc:Depends}, ${shlibs:Depends}, +Description: userspace library for using io_uring + io_uring is kernel feature to improve development + The newese Linux IO interface, io_uring could improve + system performance a lot. liburing is the userpace + library to use io_uring feature. + . + This package contains the udeb shared library. + +Package: liburing-dev +Section: libdevel +Architecture: linux-any +Multi-Arch: same +Depends: ${misc:Depends}, liburing1 (= ${binary:Version}), +Description: userspace library for using io_uring + io_uring is kernel feature to improve development + The newese Linux IO interface, io_uring could improve + system performance a lot. liburing is the userpace + library to use io_uring feature. + . + This package contains the static library and the header files. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..0b3f3eb --- /dev/null +++ b/debian/copyright @@ -0,0 +1,49 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: liburing +Source: https://git.kernel.dk/cgit/liburing/ + +Files: * +Copyright: 2019 Jens Axboe +License: GPL-2+ / MIT + +Files: debian/* +Copyright: 2019 Changcheng Liu +License: GPL-2+ + +License: GPL-2+ + This package is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + . + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + . + You should have received a copy of the GNU General Public License + along with this program. If not, see + . + On Debian systems, the complete text of the GNU General + Public License version 2 can be found in "/usr/share/common-licenses/GPL-2". + +License: MIT + Copyright 2020 Jens Axboe + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. diff --git a/debian/liburing-dev.install b/debian/liburing-dev.install new file mode 100644 index 0000000..a00d956 --- /dev/null +++ b/debian/liburing-dev.install @@ -0,0 +1,4 @@ +usr/include +usr/lib/*/lib*.so +usr/lib/*/lib*.a +usr/lib/*/pkgconfig diff --git a/debian/liburing-dev.manpages b/debian/liburing-dev.manpages new file mode 100644 index 0000000..5683902 --- /dev/null +++ b/debian/liburing-dev.manpages @@ -0,0 +1,3 @@ +man/io_uring_setup.2 +man/io_uring_enter.2 +man/io_uring_register.2 diff --git a/debian/liburing1-udeb.install b/debian/liburing1-udeb.install new file mode 100644 index 0000000..622f9ef --- /dev/null +++ b/debian/liburing1-udeb.install @@ -0,0 +1 @@ +lib/*/lib*.so.* diff --git a/debian/liburing1.install b/debian/liburing1.install new file mode 100644 index 0000000..622f9ef --- /dev/null +++ b/debian/liburing1.install @@ -0,0 +1 @@ +lib/*/lib*.so.* diff --git a/debian/liburing1.symbols b/debian/liburing1.symbols new file mode 100644 index 0000000..cc4d504 --- /dev/null +++ b/debian/liburing1.symbols @@ -0,0 +1,28 @@ +liburing.so.1 liburing1 #MINVER# + (symver)LIBURING_0.1 0.1-1 + io_uring_get_sqe@LIBURING_0.1 0.1-1 + io_uring_queue_exit@LIBURING_0.1 0.1-1 + io_uring_queue_init@LIBURING_0.1 0.1-1 + io_uring_queue_mmap@LIBURING_0.1 0.1-1 + io_uring_register_buffers@LIBURING_0.1 0.1-1 + io_uring_register_eventfd@LIBURING_0.1 0.1-1 + io_uring_register_files@LIBURING_0.1 0.1-1 + io_uring_submit@LIBURING_0.1 0.1-1 + io_uring_submit_and_wait@LIBURING_0.1 0.1-1 + io_uring_unregister_buffers@LIBURING_0.1 0.1-1 + io_uring_unregister_files@LIBURING_0.1 0.1-1 + (symver)LIBURING_0.2 0.2-1 + __io_uring_get_cqe@LIBURING_0.2 0.2-1 + io_uring_queue_init_params@LIBURING_0.2 0.2-1 + io_uring_register_files_update@LIBURING_0.2 0.2-1 + io_uring_peek_batch_cqe@LIBURING_0.2 0.2-1 + io_uring_wait_cqe_timeout@LIBURING_0.2 0.2-1 + io_uring_wait_cqes@LIBURING_0.2 0.2-1 + (symver)LIBURING_0.3 0.3-1 + (symver)LIBURING_0.4 0.4-1 + io_uring_get_probe@LIBURING_0.4 0.4-1 + io_uring_get_probe_ring@LIBURING_0.4 0.4-1 + io_uring_register_personality@LIBURING_0.4 0.4-1 + io_uring_register_probe@LIBURING_0.4 0.4-1 + io_uring_ring_dontfork@LIBURING_0.4 0.4-1 + io_uring_unregister_personality@LIBURING_0.4 0.4-1 diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..4a97dfa --- /dev/null +++ b/debian/patches/series @@ -0,0 +1 @@ +# You must remove unused comment lines for the released package. diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..283d464 --- /dev/null +++ b/debian/rules @@ -0,0 +1,80 @@ +#!/usr/bin/make -f + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow +DEB_CFLAGS_MAINT_PREPEND = -Wall + +include /usr/share/dpkg/default.mk +include /usr/share/dpkg/buildtools.mk + +export CC + +lib := liburing1 +libdbg := $(lib)-dbg +libudeb := $(lib)-udeb +libdev := liburing-dev + +build-indep: + +build-arch: + dh_testdir + + $(MAKE) CPPFLAGS="$(CPPFLAGS)" CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" + +build: build-indep build-arch + +clean: + dh_testdir + dh_testroot + + $(MAKE) clean + + dh_clean + +check-arch: build-arch + dh_testdir + +ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) + $(MAKE) CPPFLAGS="$(CPPFLAGS)" CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" \ + partcheck +endif + +install-arch: check-arch + dh_testdir + dh_testroot + dh_clean + dh_installdirs + + $(MAKE) install \ + DESTDIR=$(CURDIR)/debian/tmp \ + libdir=/lib/$(DEB_HOST_MULTIARCH) \ + libdevdir=/usr/lib/$(DEB_HOST_MULTIARCH) + +binary: binary-indep binary-arch + +binary-indep: + # Nothing to do. + +binary-arch: install-arch + dh_testdir + dh_testroot + dh_install -a + dh_installdocs -a + dh_installexamples -a + dh_installman -a + dh_lintian -a + dh_link -a + dh_strip -a --ddeb-migration='$(libdbg) (<< 0.3)' + dh_compress -a + dh_fixperms -a + dh_makeshlibs -a --add-udeb '$(libudeb)' + dh_shlibdeps -a + dh_installdeb -a + dh_gencontrol -a + dh_md5sums -a + dh_builddeb -a + +.PHONY: clean build-indep build-arch build +.PHONY: install-arch binary-indep binary-arch binary diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/debian/source/local-options b/debian/source/local-options new file mode 100644 index 0000000..00131ee --- /dev/null +++ b/debian/source/local-options @@ -0,0 +1,2 @@ +#abort-on-upstream-changes +#unapply-patches diff --git a/debian/source/options b/debian/source/options new file mode 100644 index 0000000..51da836 --- /dev/null +++ b/debian/source/options @@ -0,0 +1 @@ +extend-diff-ignore = "(^|/)(config\.log|config-host\.h|config-host\.mak|liburing\.pc)$" diff --git a/debian/watch b/debian/watch new file mode 100644 index 0000000..f0e30c4 --- /dev/null +++ b/debian/watch @@ -0,0 +1,3 @@ +# Site Directory Pattern Version Script +version=4 +https://git.kernel.dk/cgit/liburing/ snapshot\/liburing-([\d\.]+)\.tar\.(?:gz|xz) debian uupdate diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..28456a9 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,23 @@ +CFLAGS ?= -g -O2 +XCFLAGS = +override CFLAGS += -Wall -D_GNU_SOURCE -L../src/ -I../src/include/ + +include ../Makefile.quiet + +ifneq ($(MAKECMDGOALS),clean) +include ../config-host.mak +endif + +all_targets += io_uring-test io_uring-cp link-cp ucontext-cp + +all: $(all_targets) + +test_srcs := io_uring-test.c io_uring-cp.c link-cp.c + +test_objs := $(patsubst %.c,%.ol,$(test_srcs)) + +%: %.c + $(QUIET_CC)$(CC) $(CFLAGS) -o $@ $< -luring $(XCFLAGS) + +clean: + @rm -f $(all_targets) $(test_objs) diff --git a/examples/io_uring-cp.c b/examples/io_uring-cp.c new file mode 100644 index 0000000..cc7a227 --- /dev/null +++ b/examples/io_uring-cp.c @@ -0,0 +1,260 @@ +/* SPDX-License-Identifier: MIT */ +/* + * gcc -Wall -O2 -D_GNU_SOURCE -o io_uring-cp io_uring-cp.c -luring + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" + +#define QD 64 +#define BS (32*1024) + +static int infd, outfd; + +struct io_data { + int read; + off_t first_offset, offset; + size_t first_len; + struct iovec iov; +}; + +static int setup_context(unsigned entries, struct io_uring *ring) +{ + int ret; + + ret = io_uring_queue_init(entries, ring, 0); + if (ret < 0) { + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); + return -1; + } + + return 0; +} + +static int get_file_size(int fd, off_t *size) +{ + struct stat st; + + if (fstat(fd, &st) < 0) + return -1; + if (S_ISREG(st.st_mode)) { + *size = st.st_size; + return 0; + } else if (S_ISBLK(st.st_mode)) { + unsigned long long bytes; + + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) + return -1; + + *size = bytes; + return 0; + } + + return -1; +} + +static void queue_prepped(struct io_uring *ring, struct io_data *data) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + assert(sqe); + + if (data->read) + io_uring_prep_readv(sqe, infd, &data->iov, 1, data->offset); + else + io_uring_prep_writev(sqe, outfd, &data->iov, 1, data->offset); + + io_uring_sqe_set_data(sqe, data); +} + +static int queue_read(struct io_uring *ring, off_t size, off_t offset) +{ + struct io_uring_sqe *sqe; + struct io_data *data; + + data = malloc(size + sizeof(*data)); + if (!data) + return 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + free(data); + return 1; + } + + data->read = 1; + data->offset = data->first_offset = offset; + + data->iov.iov_base = data + 1; + data->iov.iov_len = size; + data->first_len = size; + + io_uring_prep_readv(sqe, infd, &data->iov, 1, offset); + io_uring_sqe_set_data(sqe, data); + return 0; +} + +static void queue_write(struct io_uring *ring, struct io_data *data) +{ + data->read = 0; + data->offset = data->first_offset; + + data->iov.iov_base = data + 1; + data->iov.iov_len = data->first_len; + + queue_prepped(ring, data); + io_uring_submit(ring); +} + +static int copy_file(struct io_uring *ring, off_t insize) +{ + unsigned long reads, writes; + struct io_uring_cqe *cqe; + off_t write_left, offset; + int ret; + + write_left = insize; + writes = reads = offset = 0; + + while (insize || write_left) { + int had_reads, got_comp; + + /* + * Queue up as many reads as we can + */ + had_reads = reads; + while (insize) { + off_t this_size = insize; + + if (reads + writes >= QD) + break; + if (this_size > BS) + this_size = BS; + else if (!this_size) + break; + + if (queue_read(ring, this_size, offset)) + break; + + insize -= this_size; + offset += this_size; + reads++; + } + + if (had_reads != reads) { + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); + break; + } + } + + /* + * Queue is full at this point. Find at least one completion. + */ + got_comp = 0; + while (write_left) { + struct io_data *data; + + if (!got_comp) { + ret = io_uring_wait_cqe(ring, &cqe); + got_comp = 1; + } else { + ret = io_uring_peek_cqe(ring, &cqe); + if (ret == -EAGAIN) { + cqe = NULL; + ret = 0; + } + } + if (ret < 0) { + fprintf(stderr, "io_uring_peek_cqe: %s\n", + strerror(-ret)); + return 1; + } + if (!cqe) + break; + + data = io_uring_cqe_get_data(cqe); + if (cqe->res < 0) { + if (cqe->res == -EAGAIN) { + queue_prepped(ring, data); + io_uring_cqe_seen(ring, cqe); + continue; + } + fprintf(stderr, "cqe failed: %s\n", + strerror(-cqe->res)); + return 1; + } else if (cqe->res != data->iov.iov_len) { + /* Short read/write, adjust and requeue */ + data->iov.iov_base += cqe->res; + data->iov.iov_len -= cqe->res; + data->offset += cqe->res; + queue_prepped(ring, data); + io_uring_cqe_seen(ring, cqe); + continue; + } + + /* + * All done. if write, nothing else to do. if read, + * queue up corresponding write. + */ + if (data->read) { + queue_write(ring, data); + write_left -= data->first_len; + reads--; + writes++; + } else { + free(data); + writes--; + } + io_uring_cqe_seen(ring, cqe); + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + off_t insize; + int ret; + + if (argc < 3) { + printf("%s: infile outfile\n", argv[0]); + return 1; + } + + infd = open(argv[1], O_RDONLY); + if (infd < 0) { + perror("open infile"); + return 1; + } + outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (outfd < 0) { + perror("open outfile"); + return 1; + } + + if (setup_context(QD, &ring)) + return 1; + if (get_file_size(infd, &insize)) + return 1; + + ret = copy_file(&ring, insize); + + close(infd); + close(outfd); + io_uring_queue_exit(&ring); + return ret; +} diff --git a/examples/io_uring-test.c b/examples/io_uring-test.c new file mode 100644 index 0000000..1da8407 --- /dev/null +++ b/examples/io_uring-test.c @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Simple app that demonstrates how to setup an io_uring interface, + * submit and complete IO against it, and then tear it down. + * + * gcc -Wall -O2 -D_GNU_SOURCE -o io_uring-test io_uring-test.c -luring + */ +#include +#include +#include +#include +#include +#include "liburing.h" + +#define QD 4 + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int i, fd, ret, pending, done; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec *iovecs; + off_t offset; + void *buf; + + if (argc < 2) { + printf("%s: file\n", argv[0]); + return 1; + } + + ret = io_uring_queue_init(QD, &ring, 0); + if (ret < 0) { + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); + return 1; + } + + fd = open(argv[1], O_RDONLY | O_DIRECT); + if (fd < 0) { + perror("open"); + return 1; + } + + iovecs = calloc(QD, sizeof(struct iovec)); + for (i = 0; i < QD; i++) { + if (posix_memalign(&buf, 4096, 4096)) + return 1; + iovecs[i].iov_base = buf; + iovecs[i].iov_len = 4096; + } + + offset = 0; + i = 0; + do { + sqe = io_uring_get_sqe(&ring); + if (!sqe) + break; + io_uring_prep_readv(sqe, fd, &iovecs[i], 1, offset); + offset += iovecs[i].iov_len; + i++; + } while (1); + + ret = io_uring_submit(&ring); + if (ret < 0) { + fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); + return 1; + } + + done = 0; + pending = ret; + for (i = 0; i < pending; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "io_uring_wait_cqe: %s\n", strerror(-ret)); + return 1; + } + + done++; + ret = 0; + if (cqe->res != 4096) { + fprintf(stderr, "ret=%d, wanted 4096\n", cqe->res); + ret = 1; + } + io_uring_cqe_seen(&ring, cqe); + if (ret) + break; + } + + printf("Submitted=%d, completed=%d\n", pending, done); + close(fd); + io_uring_queue_exit(&ring); + return 0; +} diff --git a/examples/link-cp.c b/examples/link-cp.c new file mode 100644 index 0000000..6fa54ac --- /dev/null +++ b/examples/link-cp.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Very basic proof-of-concept for doing a copy with linked SQEs. Needs a + * bit of error handling and short read love. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" + +#define QD 64 +#define BS (32*1024) + +struct io_data { + size_t offset; + int index; + struct iovec iov; +}; + +static int infd, outfd; +static unsigned inflight; + +static int setup_context(unsigned entries, struct io_uring *ring) +{ + int ret; + + ret = io_uring_queue_init(entries, ring, 0); + if (ret < 0) { + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); + return -1; + } + + return 0; +} + +static int get_file_size(int fd, off_t *size) +{ + struct stat st; + + if (fstat(fd, &st) < 0) + return -1; + if (S_ISREG(st.st_mode)) { + *size = st.st_size; + return 0; + } else if (S_ISBLK(st.st_mode)) { + unsigned long long bytes; + + if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) + return -1; + + *size = bytes; + return 0; + } + + return -1; +} + +static void queue_rw_pair(struct io_uring *ring, off_t size, off_t offset) +{ + struct io_uring_sqe *sqe; + struct io_data *data; + void *ptr; + + ptr = malloc(size + sizeof(*data)); + data = ptr + size; + data->index = 0; + data->offset = offset; + data->iov.iov_base = ptr; + data->iov.iov_len = size; + + sqe = io_uring_get_sqe(ring); + io_uring_prep_readv(sqe, infd, &data->iov, 1, offset); + sqe->flags |= IOSQE_IO_LINK; + io_uring_sqe_set_data(sqe, data); + + sqe = io_uring_get_sqe(ring); + io_uring_prep_writev(sqe, outfd, &data->iov, 1, offset); + io_uring_sqe_set_data(sqe, data); +} + +static int handle_cqe(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + struct io_data *data = io_uring_cqe_get_data(cqe); + int ret = 0; + + data->index++; + + if (cqe->res < 0) { + if (cqe->res == -ECANCELED) { + queue_rw_pair(ring, BS, data->offset); + inflight += 2; + } else { + printf("cqe error: %s\n", strerror(cqe->res)); + ret = 1; + } + } + + if (data->index == 2) { + void *ptr = (void *) data - data->iov.iov_len; + + free(ptr); + } + io_uring_cqe_seen(ring, cqe); + return ret; +} + +static int copy_file(struct io_uring *ring, off_t insize) +{ + struct io_uring_cqe *cqe; + size_t this_size; + off_t offset; + + offset = 0; + while (insize) { + int has_inflight = inflight; + int depth; + + while (insize && inflight < QD) { + this_size = BS; + if (this_size > insize) + this_size = insize; + queue_rw_pair(ring, this_size, offset); + offset += this_size; + insize -= this_size; + inflight += 2; + } + + if (has_inflight != inflight) + io_uring_submit(ring); + + if (insize) + depth = QD; + else + depth = 1; + while (inflight >= depth) { + int ret; + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait cqe: %s\n", strerror(ret)); + return 1; + } + if (handle_cqe(ring, cqe)) + return 1; + inflight--; + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + off_t insize; + int ret; + + if (argc < 3) { + printf("%s: infile outfile\n", argv[0]); + return 1; + } + + infd = open(argv[1], O_RDONLY); + if (infd < 0) { + perror("open infile"); + return 1; + } + outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (outfd < 0) { + perror("open outfile"); + return 1; + } + + if (setup_context(QD, &ring)) + return 1; + if (get_file_size(infd, &insize)) + return 1; + + ret = copy_file(&ring, insize); + + close(infd); + close(outfd); + io_uring_queue_exit(&ring); + return ret; +} diff --git a/examples/ucontext-cp.c b/examples/ucontext-cp.c new file mode 100644 index 0000000..0b2a6b5 --- /dev/null +++ b/examples/ucontext-cp.c @@ -0,0 +1,267 @@ +/* SPDX-License-Identifier: MIT */ +/* + * gcc -Wall -O2 -D_GNU_SOURCE -o ucontext-cp ucontext-cp.c -luring + */ +#define _POSIX_C_SOURCE 199309L +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" + +#define QD 64 +#define BS 1024 + +#ifndef SIGSTKSZ +#define SIGSTKSZ 8192 +#endif + +typedef struct { + struct io_uring *ring; + unsigned char stack_buf[SIGSTKSZ]; + ucontext_t ctx_main, ctx_fnew; +} async_context; + +typedef struct { + async_context *pctx; + int *psuccess; + int *pfailure; + int infd; + int outfd; +} arguments_bundle; + +#define DEFINE_AWAIT_OP(operation) \ +static ssize_t await_##operation( \ + async_context *pctx, \ + int fd, \ + const struct iovec *ioves, \ + unsigned int nr_vecs, \ + off_t offset) \ +{ \ + struct io_uring_sqe *sqe = io_uring_get_sqe(pctx->ring); \ + struct io_uring_cqe *cqe; \ + \ + if (!sqe) \ + return -1; \ + \ + io_uring_prep_##operation(sqe, fd, ioves, nr_vecs, offset); \ + io_uring_sqe_set_data(sqe, pctx); \ + swapcontext(&pctx->ctx_fnew, &pctx->ctx_main); \ + io_uring_peek_cqe(pctx->ring, &cqe); \ + assert(cqe); \ + io_uring_cqe_seen(pctx->ring, cqe); \ + \ + return cqe->res; \ +} + +DEFINE_AWAIT_OP(readv) +DEFINE_AWAIT_OP(writev) +#undef DEFINE_AWAIT_OP + +int await_poll(async_context *pctx, int fd, short poll_mask) { + struct io_uring_sqe *sqe = io_uring_get_sqe(pctx->ring); + struct io_uring_cqe *cqe; + if (!sqe) + return -1; + + io_uring_prep_poll_add(sqe, fd, poll_mask); + io_uring_sqe_set_data(sqe, pctx); + swapcontext(&pctx->ctx_fnew, &pctx->ctx_main); + io_uring_peek_cqe(pctx->ring, &cqe); + assert(cqe); + io_uring_cqe_seen(pctx->ring, cqe); + + return cqe->res; +} + +int await_delay(async_context *pctx, time_t seconds) { + struct io_uring_sqe *sqe = io_uring_get_sqe(pctx->ring); + struct io_uring_cqe *cqe; + struct __kernel_timespec ts = { + .tv_sec = seconds, + .tv_nsec = 0 + }; + + if (!sqe) + return -1; + + io_uring_prep_timeout(sqe, &ts, 0, 0); + io_uring_sqe_set_data(sqe, pctx); + swapcontext(&pctx->ctx_fnew, &pctx->ctx_main); + io_uring_peek_cqe(pctx->ring, &cqe); + assert(cqe); + io_uring_cqe_seen(pctx->ring, cqe); + + return 0; +} + +static int setup_context(async_context *pctx, struct io_uring *ring) +{ + int ret; + + pctx->ring = ring; + ret = getcontext(&pctx->ctx_fnew); + if (ret < 0) { + perror("getcontext"); + return -1; + } + pctx->ctx_fnew.uc_stack.ss_sp = &pctx->stack_buf; + pctx->ctx_fnew.uc_stack.ss_size = sizeof(pctx->stack_buf); + pctx->ctx_fnew.uc_link = &pctx->ctx_main; + + return 0; +} + +static int copy_file(async_context *pctx, int infd, int outfd, struct iovec* piov) +{ + off_t offset = 0; + + for (;;) { + ssize_t bytes_read; + + printf("%d->%d: readv %ld bytes from %ld\n", infd, outfd, (long) piov->iov_len, (long) offset); + if ((bytes_read = await_readv(pctx, infd, piov, 1, offset)) < 0) { + perror("await_readv"); + return 1; + } + if (bytes_read == 0) + return 0; + + piov->iov_len = bytes_read; + + printf("%d->%d: writev %ld bytes from %ld\n", infd, outfd, (long) piov->iov_len, (long) offset); + if (await_writev(pctx, outfd, piov, 1, offset) != bytes_read) { + perror("await_writev"); + return 1; + } + if (bytes_read < BS) + return 0; + offset += bytes_read; + + printf("%d->%d: wait %ds\n", infd, outfd, 1); + await_delay(pctx, 1); + } +} + +static void copy_file_wrapper(arguments_bundle *pbundle) +{ + struct iovec iov = { + .iov_base = malloc(BS), + .iov_len = BS, + }; + async_context *pctx = pbundle->pctx; + + int ret = copy_file(pctx, pbundle->infd, pbundle->outfd, &iov); + + printf("%d->%d: done with ret code %d\n", pbundle->infd, pbundle->outfd, ret); + + if (ret == 0) { + ++*pbundle->psuccess; + } else { + ++*pbundle->pfailure; + } + + free(iov.iov_base); + close(pbundle->infd); + close(pbundle->outfd); + free(pbundle->pctx); + free(pbundle); + + swapcontext(&pctx->ctx_fnew, &pctx->ctx_main); +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int i, req_count, ret; + int success = 0, failure = 0; + + if (argc < 3) { + fprintf(stderr, "%s: infile1 outfile1 [infile2 outfile2 [...]]\n", argv[0]); + return 1; + } + + ret = io_uring_queue_init(QD, &ring, 0); + if (ret < 0) { + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); + return -1; + } + + req_count = (argc - 1) / 2; + printf("copying %d files...\n", req_count); + + for (i = 1; i < argc; i += 2) { + int infd, outfd; + + async_context *pctx = malloc(sizeof(*pctx)); + + if (!pctx || setup_context(pctx, &ring)) + return 1; + + infd = open(argv[i], O_RDONLY); + if (infd < 0) { + perror("open infile"); + return 1; + } + outfd = open(argv[i + 1], O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (outfd < 0) { + perror("open outfile"); + return 1; + } + + arguments_bundle *pbundle = malloc(sizeof(*pbundle)); + pbundle->pctx = pctx; + pbundle->psuccess = &success; + pbundle->pfailure = &failure; + pbundle->infd = infd; + pbundle->outfd = outfd; + + makecontext(&pctx->ctx_fnew, (void (*)(void)) copy_file_wrapper, 1, pbundle); + + if (swapcontext(&pctx->ctx_main, &pctx->ctx_fnew)) { + perror("swapcontext"); + return 1; + } + } + + /* event loop */ + while (success + failure < req_count) { + struct io_uring_cqe *cqe; + + /* usually be timed waiting */ + ret = io_uring_submit_and_wait(&ring, 1); + if (ret < 0) { + fprintf(stderr, "submit_and_wait: %s\n", strerror(-ret)); + return 1; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait_cqe: %s\n", strerror(-ret)); + return 1; + } + + async_context *pctx = io_uring_cqe_get_data(cqe); + + if (swapcontext(&pctx->ctx_main, &pctx->ctx_fnew)) { + perror("swapcontext"); + return 1; + } + } + + io_uring_queue_exit(&ring); + + printf("finished with %d success(es) and %d failure(s)\n", success, failure); + + return failure > 0; +} diff --git a/liburing.pc.in b/liburing.pc.in new file mode 100644 index 0000000..e621939 --- /dev/null +++ b/liburing.pc.in @@ -0,0 +1,12 @@ +prefix=@prefix@ +exec_prefix=${prefix} +libdir=@libdir@ +includedir=@includedir@ + +Name: @NAME@ +Version: @VERSION@ +Description: io_uring library +URL: http://git.kernel.dk/cgit/liburing/ + +Libs: -L${libdir} -luring +Cflags: -I${includedir} diff --git a/liburing.spec b/liburing.spec new file mode 100644 index 0000000..bfe46bd --- /dev/null +++ b/liburing.spec @@ -0,0 +1,63 @@ +Name: liburing +Version: 0.7 +Release: 1%{?dist} +Summary: Linux-native io_uring I/O access library +License: (GPLv2 with exceptions and LGPLv2+) or MIT +Source0: https://brick.kernel.dk/snaps/%{name}-%{version}.tar.gz +Source1: https://brick.kernel.dk/snaps/%{name}-%{version}.tar.gz.asc +URL: https://git.kernel.dk/cgit/liburing/ +BuildRequires: gcc + +%description +Provides native async IO for the Linux kernel, in a fast and efficient +manner, for both buffered and O_DIRECT. + +%package devel +Summary: Development files for Linux-native io_uring I/O access library +Requires: %{name}%{_isa} = %{version}-%{release} +Requires: pkgconfig + +%description devel +This package provides header files to include and libraries to link with +for the Linux-native io_uring. + +%prep +%autosetup + +%build +%set_build_flags +./configure --prefix=%{_prefix} --libdir=/%{_libdir} --libdevdir=/%{_libdir} --mandir=%{_mandir} --includedir=%{_includedir} + +%make_build + +%install +%make_install + +%files +%attr(0755,root,root) %{_libdir}/liburing.so.* +%license COPYING + +%files devel +%{_includedir}/liburing/ +%{_includedir}/liburing.h +%{_libdir}/liburing.so +%exclude %{_libdir}/liburing.a +%{_libdir}/pkgconfig/* +%{_mandir}/man2/* + +%changelog +* Thu Oct 31 2019 Jeff Moyer - 0.2-1 +- Add io_uring_cq_ready() +- Add io_uring_peek_batch_cqe() +- Add io_uring_prep_accept() +- Add io_uring_prep_{recv,send}msg() +- Add io_uring_prep_timeout_remove() +- Add io_uring_queue_init_params() +- Add io_uring_register_files_update() +- Add io_uring_sq_space_left() +- Add io_uring_wait_cqe_timeout() +- Add io_uring_wait_cqes() +- Add io_uring_wait_cqes_timeout() + +* Tue Jan 8 2019 Jens Axboe - 0.1 +- Initial version diff --git a/make-debs.sh b/make-debs.sh new file mode 100755 index 0000000..01d563c --- /dev/null +++ b/make-debs.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Copyright (C) 2019 Liu Changcheng +# Author: Liu Changcheng +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +set -xe + +# Create dir for build +base=${1:-/tmp/release} +codename=$(lsb_release -sc) +releasedir=$base/$(lsb_release -si)/liburing +rm -rf $releasedir +mkdir -p $releasedir + +src_dir=$(readlink -e `basename $0`) +liburing_dir=$(dirname $src_dir) +basename=$(basename $liburing_dir) +dirname=$(dirname $liburing_dir) +version=$(git describe --match "lib*" | cut -d '-' -f 2) +outfile="liburing-$version" +orgfile=$(echo $outfile | tr '-' '_') + +# Prepare source code +cp -arf ${dirname}/${basename} ${releasedir}/${outfile} +cd ${releasedir}/${outfile} +git clean -dxf + +# Change changelog if it's needed +cur_ver=`head -l debian/changelog | sed -n -e 's/.* (\(.*\)) .*/\1/p'` +if [ "$cur_ver" != "$version-1" ]; then + dch -D $codename --force-distribution -b -v "$version-1" "new version" +fi + +# Create tar archieve +cd ../ +tar cvzf ${outfile}.tar.gz ${outfile} +ln -s ${outfile}.tar.gz ${orgfile}.orig.tar.gz + +# Build debian package +cd - +debuild diff --git a/man/io_uring_enter.2 b/man/io_uring_enter.2 new file mode 100644 index 0000000..b4f5f5a --- /dev/null +++ b/man/io_uring_enter.2 @@ -0,0 +1,915 @@ +.\" Copyright (C) 2019 Jens Axboe +.\" Copyright (C) 2019 Red Hat, Inc. +.\" +.\" SPDX-License-Identifier: LGPL-2.0-or-later +.\" +.TH IO_URING_ENTER 2 2019-01-22 "Linux" "Linux Programmer's Manual" +.SH NAME +io_uring_enter \- initiate and/or complete asynchronous I/O +.SH SYNOPSIS +.nf +.BR "#include " +.PP +.BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit , +.BI " unsigned int " min_complete ", unsigned int " flags , +.BI " sigset_t *" sig ); +.fi +.PP +.SH DESCRIPTION +.PP +.BR io_uring_enter () +is used to initiate and complete I/O using the shared submission and +completion queues setup by a call to +.BR io_uring_setup (2). +A single call can both submit new I/O and wait for completions of I/O +initiated by this call or previous calls to +.BR io_uring_enter (). + +.I fd +is the file descriptor returned by +.BR io_uring_setup (2). +.I to_submit +specifies the number of I/Os to submit from the submission queue. If +the +.B IORING_ENTER_GETEVENTS +bit is set in +.IR flags , +then the system call will attempt to wait for +.I min_complete +event completions before returning. If the io_uring instance was +configured for polling, by specifying +.B IORING_SETUP_IOPOLL +in the call to +.BR io_uring_setup (2), +then min_complete has a slightly different meaning. Passing a value +of 0 instructs the kernel to return any events which are already complete, +without blocking. If +.I min_complete +is a non-zero value, the kernel will still return immediately if any +completion events are available. If no event completions are +available, then the call will poll either until one or more +completions become available, or until the process has exceeded its +scheduler time slice. + +Note that, for interrupt driven I/O (where +.B IORING_SETUP_IOPOLL +was not specified in the call to +.BR io_uring_setup (2)), +an application may check the completion queue for event completions +without entering the kernel at all. +.PP +When the system call returns that a certain amount of SQEs have been +consumed and submitted, it's safe to reuse SQE entries in the ring. This is +true even if the actual IO submission had to be punted to async context, +which means that the SQE may in fact not have been submitted yet. If the +kernel requires later use of a particular SQE entry, it will have made a +private copy of it. + +.I sig +is a pointer to a signal mask (see +.BR sigprocmask (2)); +if +.I sig +is not NULL, +.BR io_uring_enter () +first replaces the current signal mask by the one pointed to by +.IR sig , +then waits for events to become available in the completion queue, and +then restores the original signal mask. The following +.BR io_uring_enter () +call: +.PP +.in +4n +.EX +ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, &sig); +.EE +.in +.PP +is equivalent to +.I atomically +executing the following calls: +.PP +.in +4n +.EX +pthread_sigmask(SIG_SETMASK, &sig, &orig); +ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, NULL); +pthread_sigmask(SIG_SETMASK, &orig, NULL); +.EE +.in +.PP +See the description of +.BR pselect (2) +for an explanation of why the +.I sig +parameter is necessary. + +Submission queue entries are represented using the following data +structure: +.PP +.in +4n +.EX +/* + * IO submission data structure (Submission Queue Entry) + */ +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* IOSQE_ flags */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + union { + __u64 off; /* offset into file */ + __u64 addr2; + }; + union { + __u64 addr; /* pointer to buffer or iovecs */ + __u64 splice_off_in; + } + __u32 len; /* buffer size or number of iovecs */ + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; /* compatibility */ + __u32 poll32_events; /* word-reversed for BE */ + __u32 sync_range_flags; + __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; + __u32 fadvise_advice; + __u32 splice_flags; + }; + __u64 user_data; /* data to be passed back at completion time */ + union { + struct { + /* index into fixed buffers, if used */ + union { + /* index into fixed buffers, if used */ + __u16 buf_index; + /* for grouped buffer selection */ + __u16 buf_group; + } + /* personality to use, if used */ + __u16 personality; + __s32 splice_fd_in; + }; + __u64 __pad2[3]; + }; +}; +.EE +.in +.PP +The +.I opcode +describes the operation to be performed. It can be one of: +.TP +.B IORING_OP_NOP +Do not perform any I/O. This is useful for testing the performance of +the io_uring implementation itself. +.TP +.B IORING_OP_READV +.TP +.B IORING_OP_WRITEV +Vectored read and write operations, similar to +.BR preadv2 (2) +and +.BR pwritev2 (2). + +.TP +.B IORING_OP_READ_FIXED +.TP +.B IORING_OP_WRITE_FIXED +Read from or write to pre-mapped buffers. See +.BR io_uring_register (2) +for details on how to setup a context for fixed reads and writes. + +.TP +.B IORING_OP_FSYNC +File sync. See also +.BR fsync (2). +Note that, while I/O is initiated in the order in which it appears in +the submission queue, completions are unordered. For example, an +application which places a write I/O followed by an fsync in the +submission queue cannot expect the fsync to apply to the write. The +two operations execute in parallel, so the fsync may complete before +the write is issued to the storage. The same is also true for +previously issued writes that have not completed prior to the fsync. + +.TP +.B IORING_OP_POLL_ADD +Poll the +.I fd +specified in the submission queue entry for the events +specified in the +.I poll_events +field. Unlike poll or epoll without +.BR EPOLLONESHOT , +this interface always works in one shot mode. That is, once the poll +operation is completed, it will have to be resubmitted. + +.TP +.B IORING_OP_POLL_REMOVE +Remove an existing poll request. If found, the +.I res +field of the +.I "struct io_uring_cqe" +will contain 0. If not found, +.I res +will contain +.B -ENOENT. + +.TP +.B IORING_OP_EPOLL_CTL +Add, remove or modify entries in the interest list of +.BR epoll (7). +See +.BR epoll_ctl (2) +for details of the system call. +.I fd +holds the file descriptor that represents the epoll instance, +.I addr +holds the file descriptor to add, remove or modify, +.I len +holds the operation (EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD) to perform and, +.I off +holds a pointer to the +.I epoll_events +structure. Available since 5.6. + +.TP +.B IORING_OP_SYNC_FILE_RANGE +Issue the equivalent of a \fBsync_file_range\fR (2) on the file descriptor. The +.I fd +field is the file descriptor to sync, the +.I off +field holds the offset in bytes, the +.I len +field holds the length in bytes, and the +.I sync_range_flags +field holds the flags for the command. See also +.BR sync_file_range (2) +for the general description of the related system call. Available since 5.2. + +.TP +.B IORING_OP_SENDMSG +Issue the equivalent of a +.BR sendmsg(2) +system call. +.I fd +must be set to the socket file descriptor, +.I addr +must contain a pointer to the msghdr structure, and +.I msg_flags +holds the flags associated with the system call. See also +.BR sendmsg (2) +for the general description of the related system call. Available since 5.3. + +.TP +.B IORING_OP_RECVMSG +Works just like IORING_OP_SENDMSG, except for +.BR recvmsg(2) +instead. See the description of IORING_OP_SENDMSG. Available since 5.3. + +.TP +.B IORING_OP_SEND +Issue the equivalent of a +.BR send(2) +system call. +.I fd +must be set to the socket file descriptor, +.I addr +must contain a pointer to the buffer, +.I len +denotes the length of the buffer to send, and +.I msg_flags +holds the flags associated with the system call. See also +.BR send(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_RECV +Works just like IORING_OP_SEND, except for +.BR recv(2) +instead. See the description of IORING_OP_SEND. Available since 5.6. + +.TP +.B IORING_OP_TIMEOUT +This command will register a timeout operation. The +.I addr +field must contain a pointer to a struct timespec64 structure, +.I len +must contain 1 to signify one timespec64 structure, +.I timeout_flags +may contain IORING_TIMEOUT_ABS +for an absolute timeout value, or 0 for a relative timeout. +.I off +may contain a completion event count. A timeout +will trigger a wakeup event on the completion ring for anyone waiting for +events. A timeout condition is met when either the specified timeout expires, +or the specified number of events have completed. Either condition will +trigger the event. If set to 0, completed events are not counted, which +effectively acts like a timer. io_uring timeouts use the +.B CLOCK_MONOTONIC +clock source. The request will complete with +.I -ETIME +if the timeout got completed through expiration of the timer, or +.I 0 +if the timeout got completed through requests completing on their own. If +the timeout was cancelled before it expired, the request will complete with +.I -ECANCELED. +Available since 5.4. + +.TP +.B IORING_OP_TIMEOUT_REMOVE +Attempt to remove an existing timeout operation. +.I addr +must contain the +.I user_data +field of the previously issued timeout operation. If the specified timeout +request is found and cancelled successfully, this request will terminate +with a result value of +.I 0 +If the timeout request was found but expiration was already in progress, +this request will terminate with a result value of +.I -EBUSY +If the timeout request wasn't found, the request will terminate with a result +value of +.I -ENOENT +Available since 5.5. + +.TP +.B IORING_OP_ACCEPT +Issue the equivalent of an +.BR accept4(2) +system call. +.I fd +must be set to the socket file descriptor, +.I addr +must contain the pointer to the sockaddr structure, and +.I addr2 +must contain a pointer to the socklen_t addrlen field. See also +.BR accept4(2) +for the general description of the related system call. Available since 5.5. + +.TP +.B IORING_OP_ASYNC_CANCEL +Attempt to cancel an already issued request. +.I addr +must contain the +.I user_data +field of the request that should be cancelled. The cancellation request will +complete with one of the following results codes. If found, the +.I res +field of the cqe will contain 0. If not found, +.I res +will contain -ENOENT. If found and attempted cancelled, the +.I res +field will contain -EALREADY. In this case, the request may or may not +terminate. In general, requests that are interruptible (like socket IO) will +get cancelled, while disk IO requests cannot be cancelled if already started. +Available since 5.5. + +.TP +.B IORING_OP_LINK_TIMEOUT +This request must be linked with another request through +.I IOSQE_IO_LINK +which is described below. Unlike +.I IORING_OP_TIMEOUT, +.I IORING_OP_LINK_TIMEOUT +acts on the linked request, not the completion queue. The format of the command +is otherwise like +.I IORING_OP_TIMEOUT, +except there's no completion event count as it's tied to a specific request. +If used, the timeout specified in the command will cancel the linked command, +unless the linked command completes before the timeout. The timeout will +complete with +.I -ETIME +if the timer expired and the linked request was attempted cancelled, or +.I -ECANCELED +if the timer got cancelled because of completion of the linked request. Like +.B IORING_OP_TIMEOUT +the clock source used is +.B CLOCK_MONOTONIC +Available since 5.5. + + +.TP +.B IORING_OP_CONNECT +Issue the equivalent of a +.BR connect(2) +system call. +.I fd +must be set to the socket file descriptor, +.I addr +must contain the const pointer to the sockaddr structure, and +.I off +must contain the socklen_t addrlen field. See also +.BR connect(2) +for the general description of the related system call. Available since 5.5. + +.TP +.B IORING_OP_FALLOCATE +Issue the equivalent of a +.BR fallocate(2) +system call. +.I fd +must be set to the file descriptor, +.I off +must contain the offset on which to operate, and +.I len +must contain the length. See also +.BR fallocate(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_FADVISE +Issue the equivalent of a +.BR posix_fadvise(2) +system call. +.I fd +must be set to the file descriptor, +.I off +must contain the offset on which to operate, +.I len +must contain the length, and +.I fadvise_advice +must contain the advice associated with the operation. See also +.BR posix_fadvise(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_MADVISE +Issue the equivalent of a +.BR madvise(2) +system call. +.I addr +must contain the address to operate on, +.I len +must contain the length on which to operate, +and +.I fadvise_advice +must contain the advice associated with the operation. See also +.BR madvise(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_OPENAT +Issue the equivalent of a +.BR openat(2) +system call. +.I fd +is the +.I dirfd +argument, +.I addr +must contain a pointer to the +.I *pathname +argument, +.I open_flags +should contain any flags passed in, and +.I mode +is access mode of the file. See also +.BR openat(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_OPENAT2 +Issue the equivalent of a +.BR openat2(2) +system call. +.I fd +is the +.I dirfd +argument, +.I addr +must contain a pointer to the +.I *pathname +argument, +.I len +should contain the size of the open_how structure, and +.I off +should be set to the address of the open_how structure. See also +.BR openat2(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_CLOSE +Issue the equivalent of a +.BR close(2) +system call. +.I fd +is the file descriptor to be closed. See also +.BR close(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_STATX +Issue the equivalent of a +.BR statx(2) +system call. +.I fd +is the +.I dirfd +argument, +.I addr +must contain a pointer to the +.I *pathname +string, +.I statx_flags +is the +.I flags +argument, +.I len +should be the +.I mask +argument, and +.I off +must contain a pointer to the +.I statxbuf +to be filled in. See also +.BR statx(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_READ +.TP +.B IORING_OP_WRITE +Issue the equivalent of a +.BR read(2) +or +.BR write(2) +system call. +.I fd +is the file descriptor to be operated on, +.I addr +contains the buffer in question, and +.I len +contains the length of the IO operation. These are non-vectored versions of the +.B IORING_OP_READV +and +.B IORING_OP_WRITEV +opcodes. See also +.BR read(2) +and +.BR write(2) +for the general description of the related system call. Available since 5.6. + +.TP +.B IORING_OP_SPLICE +Issue the equivalent of a +.BR splice(2) +system call. +.I splice_fd_in +is the file descriptor to read from, +.I splice_off_in +is an offset to read from, +.I fd +is the file descriptor to write to, +.I off +is an offset from which to start writing to. A sentinel value of -1 is used +to pass the equivalent of a NULL for the offsets to +.BR splice(2). +.I len +contains the number of bytes to copy. +.I splice_flags +contains a bit mask for the flag field associated with the system call. +Please note that one of the file descriptors must refer to a pipe. +See also +.BR splice(2) +for the general description of the related system call. Available since 5.7. + +.TP +.B IORING_OP_TEE +Issue the equivalent of a +.BR tee(2) +system call. +.I splice_fd_in +is the file descriptor to read from, +.I fd +is the file descriptor to write to, +.I len +contains the number of bytes to copy, and +.I splice_flags +contains a bit mask for the flag field associated with the system call. +Please note that both of the file descriptors must refer to a pipe. +See also +.BR tee(2) +for the general description of the related system call. Available since 5.8. + +.TP +.B IORING_OP_FILES_UPDATE +This command is an alternative to using +.B IORING_REGISTER_FILES_UPDATE +which then works in an async fashion, like the rest of the io_uring commands. +The arguments passed in are the same. +.I addr +must contain a pointer to the array of file descriptors, +.I len +must contain the length of the array, and +.I off +must contain the offset at which to operate. Note that the array of file +descriptors pointed to in +.I addr +must remain valid until this operation has completed. Available since 5.6. + +.PP +The +.I flags +field is a bit mask. The supported flags are: +.TP +.B IOSQE_FIXED_FILE +When this flag is specified, +.I fd +is an index into the files array registered with the io_uring instance (see the +.B IORING_REGISTER_FILES +section of the +.BR io_uring_register (2) +man page). Available since 5.1. +.TP +.B IOSQE_IO_DRAIN +When this flag is specified, the SQE will not be started before previously +submitted SQEs have completed, and new SQEs will not be started before this +one completes. Available since 5.2. +.TP +.B IOSQE_IO_LINK +When this flag is specified, it forms a link with the next SQE in the +submission ring. That next SQE will not be started before this one completes. +This, in effect, forms a chain of SQEs, which can be arbitrarily long. The tail +of the chain is denoted by the first SQE that does not have this flag set. +This flag has no effect on previous SQE submissions, nor does it impact SQEs +that are outside of the chain tail. This means that multiple chains can be +executing in parallel, or chains and individual SQEs. Only members inside the +chain are serialized. A chain of SQEs will be broken, if any request in that +chain ends in error. io_uring considers any unexpected result an error. This +means that, eg, a short read will also terminate the remainder of the chain. +If a chain of SQE links is broken, the remaining unstarted part of the chain +will be terminated and completed with +.B -ECANCELED +as the error code. Available since 5.3. +.TP +.B IOSQE_IO_HARDLINK +Like IOSQE_IO_LINK, but it doesn't sever regardless of the completion result. +Note that the link will still sever if we fail submitting the parent request, +hard links are only resilient in the presence of completion results for +requests that did submit correctly. IOSQE_IO_HARDLINK implies IOSQE_IO_LINK. +Available since 5.5. +.TP +.B IOSQE_ASYNC +Normal operation for io_uring is to try and issue an sqe as non-blocking first, +and if that fails, execute it in an async manner. To support more efficient +overlapped operation of requests that the application knows/assumes will +always (or most of the time) block, the application can ask for an sqe to be +issued async from the start. Available since 5.6. + + +.PP +.I ioprio +specifies the I/O priority. See +.BR ioprio_get (2) +for a description of Linux I/O priorities. + +.I fd +specifies the file descriptor against which the operation will be +performed, with the exception noted above. + +If the operation is one of +.B IORING_OP_READ_FIXED +or +.BR IORING_OP_WRITE_FIXED , +.I addr +and +.I len +must fall within the buffer located at +.I buf_index +in the fixed buffer array. If the operation is either +.B IORING_OP_READV +or +.BR IORING_OP_WRITEV , +then +.I addr +points to an iovec array of +.I len +entries. + +.IR rw_flags , +specified for read and write operations, contains a bitwise OR of +per-I/O flags, as described in the +.BR preadv2 (2) +man page. + +The +.I fsync_flags +bit mask may contain either 0, for a normal file integrity sync, or +.B IORING_FSYNC_DATASYNC +to provide data sync only semantics. See the descriptions of +.B O_SYNC +and +.B O_DSYNC +in the +.BR open (2) +manual page for more information. + +The bits that may be set in +.I poll_events +are defined in \fI\fP, and documented in +.BR poll (2). + +.I user_data +is an application-supplied value that will be copied into +the completion queue entry (see below). +.I buf_index +is an index into an array of fixed buffers, and is only valid if fixed +buffers were registered. +.I personality +is the credentials id to use for this operation. See +.BR io_uring_register(2) +for how to register personalities with io_uring. If set to 0, the current +personality of the submitting task is used. +.PP +Once the submission queue entry is initialized, I/O is submitted by +placing the index of the submission queue entry into the tail of the +submission queue. After one or more indexes are added to the queue, +and the queue tail is advanced, the +.BR io_uring_enter (2) +system call can be invoked to initiate the I/O. + +Completions use the following data structure: +.PP +.in +4n +.EX +/* + * IO completion data structure (Completion Queue Entry) + */ +struct io_uring_cqe { + __u64 user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags; +}; +.EE +.in +.PP +.I user_data +is copied from the field of the same name in the submission queue +entry. The primary use case is to store data that the application +will need to access upon completion of this particular I/O. The +.I flags +is reserved for future use. +.I res +is the operation-specific result. +.PP +For read and write opcodes, the +return values match those documented in the +.BR preadv2 (2) +and +.BR pwritev2 (2) +man pages. +Return codes for the io_uring-specific opcodes are documented in the +description of the opcodes above. +.PP +.SH RETURN VALUE +.BR io_uring_enter () +returns the number of I/Os successfully consumed. This can be zero +if +.I to_submit +was zero or if the submission queue was empty. The errors below that refer to +an error in a submission queue entry will be returned though a completion queue +entry, rather than through the system call itself. + +Errors that occur not on behalf of a submission queue entry are returned via the +system call directly. On such an error, -1 is returned and +.I errno +is set appropriately. +.PP +.SH ERRORS +.TP +.B EAGAIN +The kernel was unable to allocate memory for the request, or otherwise ran out +of resources to handle it. The application should wait for some completions and +try again. +.TP +.B EBUSY +The application is attempting to overcommit the number of requests it can have +pending. The application should wait for some completions and try again. May +occur if the application tries to queue more requests than we have room for in +the CQ ring. +.TP +.B EBADF +The +.I fd +field in the submission queue entry is invalid, or the +.B IOSQE_FIXED_FILE +flag was set in the submission queue entry, but no files were registered +with the io_uring instance. +.TP +.B EFAULT +buffer is outside of the process' accessible address space +.TP +.B EFAULT +.B IORING_OP_READ_FIXED +or +.B IORING_OP_WRITE_FIXED +was specified in the +.I opcode +field of the submission queue entry, but either buffers were not +registered for this io_uring instance, or the address range described +by +.I addr +and +.I len +does not fit within the buffer registered at +.IR buf_index . +.TP +.B EINVAL +The +.I index +member of the submission queue entry is invalid. +.TP +.B EINVAL +The +.I flags +field or +.I opcode +in a submission queue entry is invalid. +.TP +.B EINVAL +.B IORING_OP_NOP +was specified in the submission queue entry, but the io_uring context +was setup for polling +.RB ( IORING_SETUP_IOPOLL +was specified in the call to io_uring_setup). +.TP +.B EINVAL +.B IORING_OP_READV +or +.B IORING_OP_WRITEV +was specified in the submission queue entry, but the io_uring instance +has fixed buffers registered. +.TP +.B EINVAL +.B IORING_OP_READ_FIXED +or +.B IORING_OP_WRITE_FIXED +was specified in the submission queue entry, and the +.I buf_index +is invalid. +.TP +.B EINVAL +.BR IORING_OP_READV , +.BR IORING_OP_WRITEV , +.BR IORING_OP_READ_FIXED , +.B IORING_OP_WRITE_FIXED +or +.B IORING_OP_FSYNC +was specified in the submission queue entry, but the io_uring instance +was configured for IOPOLLing, or any of +.IR addr , +.IR ioprio , +.IR off , +.IR len , +or +.I buf_index +was set in the submission queue entry. +.TP +.B EINVAL +.B IORING_OP_POLL_ADD +or +.B IORING_OP_POLL_REMOVE +was specified in the +.I opcode +field of the submission queue entry, but the io_uring instance was +configured for busy-wait polling +.RB ( IORING_SETUP_IOPOLL ), +or any of +.IR ioprio , +.IR off , +.IR len , +or +.I buf_index +was non-zero in the submission queue entry. +.TP +.B EINVAL +.B IORING_OP_POLL_ADD +was specified in the +.I opcode +field of the submission queue entry, and the +.I addr +field was non-zero. +.TP +.B ENXIO +The io_uring instance is in the process of being torn down. +.TP +.B EOPNOTSUPP +.I fd +does not refer to an io_uring instance. +.TP +.B EOPNOTSUPP +.I opcode +is valid, but not supported by this kernel. +.TP +.B EINTR +The operation was interrupted by a delivery of a signal before it could +complete; see +.BR signal(7). +Can happen while waiting for events with +.B IORING_ENTER_GETEVENTS. diff --git a/man/io_uring_register.2 b/man/io_uring_register.2 new file mode 100644 index 0000000..5022c03 --- /dev/null +++ b/man/io_uring_register.2 @@ -0,0 +1,323 @@ +.\" Copyright (C) 2019 Jens Axboe +.\" Copyright (C) 2019 Red Hat, Inc. +.\" +.\" SPDX-License-Identifier: LGPL-2.0-or-later +.\" +.TH IO_URING_REGISTER 2 2019-01-17 "Linux" "Linux Programmer's Manual" +.SH NAME +io_uring_register \- register files or user buffers for asynchronous I/O +.SH SYNOPSIS +.nf +.BR "#include " +.PP +.BI "int io_uring_register(unsigned int " fd ", unsigned int " opcode , +.BI " void *" arg ", unsigned int " nr_args ); +.fi +.PP +.SH DESCRIPTION +.PP + +The +.BR io_uring_register () +system call registers user buffers or files for use in an +.BR io_uring (7) +instance referenced by +.IR fd . +Registering files or user buffers allows the kernel to take long term +references to internal data structures or create long term mappings of +application memory, greatly reducing per-I/O overhead. + +.I fd +is the file descriptor returned by a call to +.BR io_uring_setup (2). +.I opcode +can be one of: + +.TP +.B IORING_REGISTER_BUFFERS +.I arg +points to a +.I struct iovec +array of +.I nr_args +entries. The buffers associated with the iovecs will be locked in +memory and charged against the user's +.B RLIMIT_MEMLOCK +resource limit. See +.BR getrlimit (2) +for more information. Additionally, there is a size limit of 1GiB per +buffer. Currently, the buffers must be anonymous, non-file-backed +memory, such as that returned by +.BR malloc (3) +or +.BR mmap (2) +with the +.B MAP_ANONYMOUS +flag set. It is expected that this limitation will be lifted in the +future. Huge pages are supported as well. Note that the entire huge +page will be pinned in the kernel, even if only a portion of it is +used. + +After a successful call, the supplied buffers are mapped into the +kernel and eligible for I/O. To make use of them, the application +must specify the +.B IORING_OP_READ_FIXED +or +.B IORING_OP_WRITE_FIXED +opcodes in the submission queue entry (see the +.I struct io_uring_sqe +definition in +.BR io_uring_enter (2)), +and set the +.I buf_index +field to the desired buffer index. The memory range described by the +submission queue entry's +.I addr +and +.I len +fields must fall within the indexed buffer. + +It is perfectly valid to setup a large buffer and then only use part +of it for an I/O, as long as the range is within the originally mapped +region. + +An application can increase or decrease the size or number of +registered buffers by first unregistering the existing buffers, and +then issuing a new call to +.BR io_uring_register () +with the new buffers. + +Note that registering buffers will wait for the ring to idle. If the application +currently has requests in-flight, the registration will wait for those to +finish before proceeding. + +An application need not unregister buffers explicitly before shutting +down the io_uring instance. Available since 5.1. + +.TP +.B IORING_UNREGISTER_BUFFERS +This operation takes no argument, and +.I arg +must be passed as NULL. All previously registered buffers associated +with the io_uring instance will be released. Available since 5.1. + +.TP +.B IORING_REGISTER_FILES +Register files for I/O. +.I arg +contains a pointer to an array of +.I nr_args +file descriptors (signed 32 bit integers). + +To make use of the registered files, the +.B IOSQE_FIXED_FILE +flag must be set in the +.I flags +member of the +.IR "struct io_uring_sqe" , +and the +.I fd +member is set to the index of the file in the file descriptor array. + +The file set may be sparse, meaning that the +.B fd +field in the array may be set to +.B -1. +See +.B IORING_REGISTER_FILES_UPDATE +for how to update files in place. + +Note that registering files will wait for the ring to idle. If the application +currently has requests in-flight, the registration will wait for those to +finish before proceeding. See +.B IORING_REGISTER_FILES_UPDATE +for how to update an existing set without that limitation. + +Files are automatically unregistered when the io_uring instance is +torn down. An application need only unregister if it wishes to +register a new set of fds. Available since 5.1. + +.TP +.B IORING_REGISTER_FILES_UPDATE +This operation replaces existing files in the registered file set with new +ones, either turning a sparse entry (one where fd is equal to -1) into a +real one, removing an existing entry (new one is set to -1), or replacing +an existing entry with a new existing entry. +.I arg +must contain a pointer to a struct io_uring_files_update, which contains +an offset on which to start the update, and an array of file descriptors to +use for the update. +.I nr_args +must contain the number of descriptors in the passed in array. Available +since 5.5. + +.TP +.B IORING_UNREGISTER_FILES +This operation requires no argument, and +.I arg +must be passed as NULL. All previously registered files associated +with the io_uring instance will be unregistered. Available since 5.1. + +.TP +.B IORING_REGISTER_EVENTFD +It's possible to use eventfd(2) to get notified of completion events on an +io_uring instance. If this is desired, an eventfd file descriptor can be +registered through this operation. +.I arg +must contain a pointer to the eventfd file descriptor, and +.I nr_args +must be 1. Available since 5.2. + +An application can temporarily disable notifications, coming through the +registered eventfd, by setting the +.B IORING_CQ_EVENTFD_DISABLED +bit in the +.I flags +field of the CQ ring. +Available since 5.8. + +.TP +.B IORING_REGISTER_EVENTFD_ASYNC +This works just like +.B IORING_REGISTER_EVENTFD +, except notifications are only posted for events that complete in an async +manner. This means that events that complete inline while being submitted +do not trigger a notification event. The arguments supplied are the same as +for +.B IORING_REGISTER_EVENTFD. +Available since 5.6. + +.TP +.B IORING_UNREGISTER_EVENTFD +Unregister an eventfd file descriptor to stop notifications. Since only one +eventfd descriptor is currently supported, this operation takes no argument, +and +.I arg +must be passed as NULL and +.I nr_args +must be zero. Available since 5.2. + +.TP +.B IORING_REGISTER_PROBE +This operation returns a structure, io_uring_probe, which contains information +about the opcodes supported by io_uring on the running kernel. +.I arg +must contain a pointer to a struct io_uring_probe, and +.I nr_args +must contain the size of the ops array in that probe struct. The ops array +is of the type io_uring_probe_op, which holds the value of the opcode and +a flags field. If the flags field has +.B IO_URING_OP_SUPPORTED +set, then this opcode is supported on the running kernel. Available since 5.6. + +.TP +.B IORING_REGISTER_PERSONALITY +This operation registers credentials of the running application with io_uring, +and returns an id associated with these credentials. Applications wishing to +share a ring between separate users/processes can pass in this credential id +in the sqe +.B personality +field. If set, that particular sqe will be issued with these credentials. Must +be invoked with +.I arg +set to NULL and +.I nr_args +set to zero. Available since 5.6. + +.TP +.B IORING_UNREGISTER_PERSONALITY +This operation unregisters a previously registered personality with io_uring. +.I nr_args +must be set to the id in question, and +.I arg +must be set to NULL. Available since 5.6. + +.SH RETURN VALUE + +On success, +.BR io_uring_register () +returns 0. On error, -1 is returned, and +.I errno +is set accordingly. + +.SH ERRORS +.TP +.B EBADF +One or more fds in the +.I fd +array are invalid. +.TP +.B EBUSY +.B IORING_REGISTER_BUFFERS +or +.B IORING_REGISTER_FILES +was specified, but there were already buffers or files registered. +.TP +.B EFAULT +buffer is outside of the process' accessible address space, or +.I iov_len +is greater than 1GiB. +.TP +.B EINVAL +.B IORING_REGISTER_BUFFERS +or +.B IORING_REGISTER_FILES +was specified, but +.I nr_args +is 0. +.TP +.B EINVAL +.B IORING_REGISTER_BUFFERS +was specified, but +.I nr_args +exceeds +.B UIO_MAXIOV +.TP +.B EINVAL +.B IORING_UNREGISTER_BUFFERS +or +.B IORING_UNREGISTER_FILES +was specified, and +.I nr_args +is non-zero or +.I arg +is non-NULL. +.TP +.B EMFILE +.B IORING_REGISTER_FILES +was specified and +.I nr_args +exceeds the maximum allowed number of files in a fixed file set. +.TP +.B EMFILE +.B IORING_REGISTER_FILES +was specified and adding +.I nr_args +file references would exceed the maximum allowed number of files the user +is allowed to have according to the +.B +RLIMIT_NOFILE +resource limit and the caller does not have +.B CAP_SYS_RESOURCE +capability. Note that this is a per user limit, not per process. +.TP +.B ENOMEM +Insufficient kernel resources are available, or the caller had a +non-zero +.B RLIMIT_MEMLOCK +soft resource limit, but tried to lock more memory than the limit +permitted. This limit is not enforced if the process is privileged +.RB ( CAP_IPC_LOCK ). +.TP +.B ENXIO +.B IORING_UNREGISTER_BUFFERS +or +.B IORING_UNREGISTER_FILES +was specified, but there were no buffers or files registered. +.TP +.B ENXIO +Attempt to register files or buffers on an io_uring instance that is already +undergoing file or buffer registration, or is being torn down. +.TP +.B EOPNOTSUPP +User buffers point to file-backed memory. diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2 new file mode 100644 index 0000000..c929cb7 --- /dev/null +++ b/man/io_uring_setup.2 @@ -0,0 +1,403 @@ +.\" Copyright (C) 2019 Jens Axboe +.\" Copyright (C) 2019 Jon Corbet +.\" Copyright (C) 2019 Red Hat, Inc. +.\" +.\" SPDX-License-Identifier: LGPL-2.0-or-later +.\" +.TH IO_URING_SETUP 2 2019-01-29 "Linux" "Linux Programmer's Manual" +.SH NAME +io_uring_setup \- setup a context for performing asynchronous I/O +.SH SYNOPSIS +.nf +.BR "#include " +.PP +.BI "int io_uring_setup(u32 " entries ", struct io_uring_params *" p ); +.fi +.PP +.SH DESCRIPTION +.PP +The io_uring_setup() system call sets up a submission queue (SQ) and +completion queue (CQ) with at least +.I entries +entries, and returns a file descriptor which can be used to perform +subsequent operations on the io_uring instance. The submission and +completion queues are shared between userspace and the kernel, which +eliminates the need to copy data when initiating and completing I/O. + +.I params +is used by the application to pass options to the kernel, and by the +kernel to convey information about the ring buffers. +.PP +.in +4n +.EX +struct io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 features; + __u32 resv[4]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; +.EE +.in +.PP +The +.IR flags , +.IR sq_thread_cpu , +and +.I sq_thread_idle +fields are used to configure the io_uring instance. +.I flags +is a bit mask of 0 or more of the following values ORed +together: +.TP +.B IORING_SETUP_IOPOLL +Perform busy-waiting for an I/O completion, as opposed to getting +notifications via an asynchronous IRQ (Interrupt Request). The file +system (if any) and block device must support polling in order for +this to work. Busy-waiting provides lower latency, but may consume +more CPU resources than interrupt driven I/O. Currently, this feature +is usable only on a file descriptor opened using the +.B O_DIRECT +flag. When a read or write is submitted to a polled context, the +application must poll for completions on the CQ ring by calling +.BR io_uring_enter (2). +It is illegal to mix and match polled and non-polled I/O on an io_uring +instance. + +.TP +.B IORING_SETUP_SQPOLL +When this flag is specified, a kernel thread is created to perform +submission queue polling. An io_uring instance configured in this way +enables an application to issue I/O without ever context switching +into the kernel. By using the submission queue to fill in new +submission queue entries and watching for completions on the +completion queue, the application can submit and reap I/Os without +doing a single system call. + +If the kernel thread is idle for more than +.I sq_thread_idle +milliseconds, it will set the +.B IORING_SQ_NEED_WAKEUP +bit in the +.I flags +field of the +.IR "struct io_sq_ring" . +When this happens, the application must call +.BR io_uring_enter (2) +to wake the kernel thread. If I/O is kept busy, the kernel thread +will never sleep. An application making use of this feature will need +to guard the +.BR io_uring_enter (2) +call with the following code sequence: + +.in +4n +.EX +/* + * Ensure that the wakeup flag is read after the tail pointer has been + * written. + */ +smp_mb(); +if (*sq_ring->flags & IORING_SQ_NEED_WAKEUP) + io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP); +.EE +.in + +where +.I sq_ring +is a submission queue ring setup using the +.I struct io_sqring_offsets +described below. +.TP +.BR +To successfully use this feature, the application must register a set of files +to be used for IO through +.BR io_uring_register (2) +using the +.B IORING_REGISTER_FILES +opcode. Failure to do so will result in submitted IO being errored with +.B EBADF. +.TP +.B IORING_SETUP_SQ_AFF +If this flag is specified, then the poll thread will be bound to the +cpu set in the +.I sq_thread_cpu +field of the +.IR "struct io_uring_params" . +This flag is only meaningful when +.B IORING_SETUP_SQPOLL +is specified. +.TP +.B IORING_SETUP_CQSIZE +Create the completion queue with +.IR "struct io_uring_params.cq_entries" +entries. The value must be greater than +.IR entries , +and may be rounded up to the next power-of-two. +.PP +If no flags are specified, the io_uring instance is setup for +interrupt driven I/O. I/O may be submitted using +.BR io_uring_enter (2) +and can be reaped by polling the completion queue. + +The +.I resv +array must be initialized to zero. + +.I features +is filled in by the kernel, which specifies various features supported +by current kernel version. +.TP +.B IORING_FEAT_SINGLE_MMAP +If this flag is set, the two SQ and CQ rings can be mapped with a single +.I mmap(2) +call. The SQEs must still be allocated separately. This brings the necessary +.I mmap(2) +calls down from three to two. +.TP +.B IORING_FEAT_NODROP +If this flag is set, io_uring supports never dropping completion events. +If a completion event occurs and the CQ ring is full, the kernel stores +the event internally until such a time that the CQ ring has room for more +entries. If this overflow condition is entered, attempting to submit more +IO with fail with the +.B -EBUSY +error value, if it can't flush the overflown events to the CQ ring. If this +happens, the application must reap events from the CQ ring and attempt the +submit again. +.TP +.B IORING_FEAT_SUBMIT_STABLE +If this flag is set, applications can be certain that any data for +async offload has been consumed when the kernel has consumed the SQE. +.TP +.B IORING_FEAT_RW_CUR_POS +If this flag is set, applications can specify +.I offset +== -1 with +.B IORING_OP_{READV,WRITEV} +, +.B IORING_OP_{READ,WRITE}_FIXED +, and +.B IORING_OP_{READ,WRITE} +to mean current file position, which behaves like +.I preadv2(2) +and +.I pwritev2(2) +with +.I offset +== -1. It'll use (and update) the current file position. This obviously comes +with the caveat that if the application has multiple reads or writes in flight, +then the end result will not be as expected. This is similar to threads sharing +a file descriptor and doing IO using the current file position. +.TP +.B IORING_FEAT_CUR_PERSONALITY +If this flag is set, then io_uring guarantees that both sync and async +execution of a request assumes the credentials of the task that called +.I +io_uring_enter(2) +to queue the requests. If this flag isn't set, then requests are issued with +the credentials of the task that originally registered the io_uring. If only +one task is using a ring, then this flag doesn't matter as the credentials +will always be the same. Note that this is the default behavior, tasks can +still register different personalities through +.I +io_uring_register(2) +with +.B IORING_REGISTER_PERSONALITY +and specify the personality to use in the sqe. + +.PP +The rest of the fields in the +.I struct io_uring_params +are filled in by the kernel, and provide the information necessary to +memory map the submission queue, completion queue, and the array of +submission queue entries. +.I sq_entries +specifies the number of submission queue entries allocated. +.I sq_off +describes the offsets of various ring buffer fields: +.PP +.in +4n +.EX +struct io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv[3]; +}; +.EE +.in +.PP +Taken together, +.I sq_entries +and +.I sq_off +provide all of the information necessary for accessing the submission +queue ring buffer and the submission queue entry array. The +submission queue can be mapped with a call like: +.PP +.in +4n +.EX +ptr = mmap(0, sq_off.array + sq_entries * sizeof(__u32), + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, + ring_fd, IORING_OFF_SQ_RING); +.EE +.in +.PP +where +.I sq_off +is the +.I io_sqring_offsets +structure, and +.I ring_fd +is the file descriptor returned from +.BR io_uring_setup (2). +The addition of +.I sq_off.array +to the length of the region accounts for the fact that the ring +located at the end of the data structure. As an example, the ring +buffer head pointer can be accessed by adding +.I sq_off.head +to the address returned from +.BR mmap (2): +.PP +.in +4n +.EX +head = ptr + sq_off.head; +.EE +.in + +The +.I flags +field is used by the kernel to communicate state information to the +application. Currently, it is used to inform the application when a +call to +.BR io_uring_enter (2) +is necessary. See the documentation for the +.B IORING_SETUP_SQPOLL +flag above. +The +.I dropped +member is incremented for each invalid submission queue entry +encountered in the ring buffer. + +The head and tail track the ring buffer state. The tail is +incremented by the application when submitting new I/O, and the head +is incremented by the kernel when the I/O has been successfully +submitted. Determining the index of the head or tail into the ring is +accomplished by applying a mask: +.PP +.in +4n +.EX +index = tail & ring_mask; +.EE +.in +.PP +The array of submission queue entries is mapped with: +.PP +.in +4n +.EX +sqentries = mmap(0, sq_entries * sizeof(struct io_uring_sqe), + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, + ring_fd, IORING_OFF_SQES); +.EE +.in +.PP +The completion queue is described by +.I cq_entries +and +.I cq_off +shown here: +.PP +.in +4n +.EX +struct io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u32 flags; + __u32 resv[3]; +}; +.EE +.in +.PP +The completion queue is simpler, since the entries are not separated +from the queue itself, and can be mapped with: +.PP +.in +4n +.EX +ptr = mmap(0, cq_off.cqes + cq_entries * sizeof(struct io_uring_cqe), + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd, + IORING_OFF_CQ_RING); +.EE +.in +.PP +Closing the file descriptor returned by +.BR io_uring_setup (2) +will free all resources associated with the io_uring context. +.PP +.SH RETURN VALUE +.BR io_uring_setup (2) +returns a new file descriptor on success. The application may then +provide the file descriptor in a subsequent +.BR mmap (2) +call to map the submission and completion queues, or to the +.BR io_uring_register (2) +or +.BR io_uring_enter (2) +system calls. + +On error, -1 is returned and +.I errno +is set appropriately. +.PP +.SH ERRORS +.TP +.B EFAULT +params is outside your accessible address space. +.TP +.B EINVAL +The resv array contains non-zero data, p.flags contains an unsupported +flag, +.I entries +is out of bounds, +.B IORING_SETUP_SQ_AFF +was specified, but +.B IORING_SETUP_SQPOLL +was not, or +.B IORING_SETUP_CQSIZE +was specified, but +.I io_uring_params.cq_entries +was invalid. +.TP +.B EMFILE +The per-process limit on the number of open file descriptors has been +reached (see the description of +.B RLIMIT_NOFILE +in +.BR getrlimit (2)). +.TP +.B ENFILE +The system-wide limit on the total number of open files has been +reached. +.TP +.B ENOMEM +Insufficient kernel resources are available. +.TP +.B EPERM +.B IORING_SETUP_SQPOLL +was specified, but the effective user ID of the caller did not have sufficient +privileges. +.SH SEE ALSO +.BR io_uring_register (2), +.BR io_uring_enter (2) diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..44a95ad --- /dev/null +++ b/src/Makefile @@ -0,0 +1,73 @@ +prefix ?= /usr +includedir ?= $(prefix)/include +libdir ?= $(prefix)/lib +libdevdir ?= $(prefix)/lib + +CFLAGS ?= -g -fomit-frame-pointer -O2 +override CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare\ + -Iinclude/ -include ../config-host.h +SO_CFLAGS=-fPIC $(CFLAGS) +L_CFLAGS=$(CFLAGS) +LINK_FLAGS= +LINK_FLAGS+=$(LDFLAGS) +ENABLE_SHARED ?= 1 + +soname=liburing.so.1 +minor=0 +micro=7 +libname=$(soname).$(minor).$(micro) +all_targets += liburing.a + +ifeq ($(ENABLE_SHARED),1) +all_targets += $(libname) +endif + +include ../Makefile.quiet + +ifneq ($(MAKECMDGOALS),clean) +include ../config-host.mak +endif + +all: $(all_targets) + +liburing_srcs := setup.c queue.c syscall.c register.c + +liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs)) +liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs)) + +$(liburing_objs) $(liburing_sobjs): include/liburing/io_uring.h + +%.os: %.c + $(QUIET_CC)$(CC) $(SO_CFLAGS) -c -o $@ $< + +%.ol: %.c + $(QUIET_CC)$(CC) $(L_CFLAGS) -c -o $@ $< + +AR ?= ar +RANLIB ?= ranlib +liburing.a: $(liburing_objs) + @rm -f liburing.a + $(QUIET_AR)$(AR) r liburing.a $^ + $(QUIET_RANLIB)$(RANLIB) liburing.a + +$(libname): $(liburing_sobjs) liburing.map + $(QUIET_CC)$(CC) $(SO_CFLAGS) -shared -Wl,--version-script=liburing.map -Wl,-soname=$(soname) -o $@ $(liburing_sobjs) $(LINK_FLAGS) + +install: $(all_targets) + install -D -m 644 include/liburing/io_uring.h $(includedir)/liburing/io_uring.h + install -D -m 644 include/liburing.h $(includedir)/liburing.h + install -D -m 644 include/liburing/compat.h $(includedir)/liburing/compat.h + install -D -m 644 include/liburing/barrier.h $(includedir)/liburing/barrier.h + install -D -m 644 liburing.a $(libdevdir)/liburing.a +ifeq ($(ENABLE_SHARED),1) + install -D -m 755 $(libname) $(libdir)/$(libname) + ln -sf $(libname) $(libdir)/$(soname) + ln -sf $(relativelibdir)$(libname) $(libdevdir)/liburing.so +endif + +$(liburing_objs): include/liburing.h + +clean: + @rm -f $(all_targets) $(liburing_objs) $(liburing_sobjs) $(soname).new + @rm -f *.so* *.a *.o + @rm -f include/liburing/compat.h diff --git a/src/include/liburing.h b/src/include/liburing.h new file mode 100644 index 0000000..0505a4f --- /dev/null +++ b/src/include/liburing.h @@ -0,0 +1,521 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef LIB_URING_H +#define LIB_URING_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing/compat.h" +#include "liburing/io_uring.h" +#include "liburing/barrier.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Library interface to io_uring + */ +struct io_uring_sq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *kflags; + unsigned *kdropped; + unsigned *array; + struct io_uring_sqe *sqes; + + unsigned sqe_head; + unsigned sqe_tail; + + size_t ring_sz; + void *ring_ptr; +}; + +struct io_uring_cq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *kflags; + unsigned *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; + void *ring_ptr; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + unsigned flags; + int ring_fd; +}; + +/* + * Library interface + */ + +/* + * return an allocated io_uring_probe structure, or NULL if probe fails (for + * example, if it is not available). The caller is responsible for freeing it + */ +extern struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring); +/* same as io_uring_get_probe_ring, but takes care of ring init and teardown */ +extern struct io_uring_probe *io_uring_get_probe(void); + +static inline int io_uring_opcode_supported(struct io_uring_probe *p, int op) +{ + if (op > p->last_op) + return 0; + return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0; +} + +extern int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, + struct io_uring_params *p); +extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, + unsigned flags); +extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, + struct io_uring *ring); +extern int io_uring_ring_dontfork(struct io_uring *ring); +extern void io_uring_queue_exit(struct io_uring *ring); +unsigned io_uring_peek_batch_cqe(struct io_uring *ring, + struct io_uring_cqe **cqes, unsigned count); +extern int io_uring_wait_cqes(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, unsigned wait_nr, + struct __kernel_timespec *ts, sigset_t *sigmask); +extern int io_uring_wait_cqe_timeout(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts); +extern int io_uring_submit(struct io_uring *ring); +extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr); +extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); + +extern int io_uring_register_buffers(struct io_uring *ring, + const struct iovec *iovecs, + unsigned nr_iovecs); +extern int io_uring_unregister_buffers(struct io_uring *ring); +extern int io_uring_register_files(struct io_uring *ring, const int *files, + unsigned nr_files); +extern int io_uring_unregister_files(struct io_uring *ring); +extern int io_uring_register_files_update(struct io_uring *ring, unsigned off, + int *files, unsigned nr_files); +extern int io_uring_register_eventfd(struct io_uring *ring, int fd); +extern int io_uring_register_eventfd_async(struct io_uring *ring, int fd); +extern int io_uring_unregister_eventfd(struct io_uring *ring); +extern int io_uring_register_probe(struct io_uring *ring, + struct io_uring_probe *p, unsigned nr); +extern int io_uring_register_personality(struct io_uring *ring); +extern int io_uring_unregister_personality(struct io_uring *ring, int id); + +/* + * Helper for the peek/wait single cqe functions. Exported because of that, + * but probably shouldn't be used directly in an application. + */ +extern int __io_uring_get_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, unsigned submit, + unsigned wait_nr, sigset_t *sigmask); + +#define LIBURING_UDATA_TIMEOUT ((__u64) -1) + +#define io_uring_for_each_cqe(ring, head, cqe) \ + /* \ + * io_uring_smp_load_acquire() enforces the order of tail \ + * and CQE reads. \ + */ \ + for (head = *(ring)->cq.khead; \ + (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \ + &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \ + head++) \ + +/* + * Must be called after io_uring_for_each_cqe() + */ +static inline void io_uring_cq_advance(struct io_uring *ring, + unsigned nr) +{ + if (nr) { + struct io_uring_cq *cq = &ring->cq; + + /* + * Ensure that the kernel only sees the new value of the head + * index after the CQEs have been read. + */ + io_uring_smp_store_release(cq->khead, *cq->khead + nr); + } +} + +/* + * Must be called after io_uring_{peek,wait}_cqe() after the cqe has + * been processed by the application. + */ +static inline void io_uring_cqe_seen(struct io_uring *ring, + struct io_uring_cqe *cqe) +{ + if (cqe) + io_uring_cq_advance(ring, 1); +} + +/* + * Command prep helpers + */ +static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) +{ + sqe->user_data = (unsigned long) data; +} + +static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe) +{ + return (void *) (uintptr_t) cqe->user_data; +} + +static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe, + unsigned flags) +{ + sqe->flags = flags; +} + +static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, + const void *addr, unsigned len, + __u64 offset) +{ + sqe->opcode = op; + sqe->flags = 0; + sqe->ioprio = 0; + sqe->fd = fd; + sqe->off = offset; + sqe->addr = (unsigned long) addr; + sqe->len = len; + sqe->rw_flags = 0; + sqe->user_data = 0; + sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; +} + +static inline void io_uring_prep_splice(struct io_uring_sqe *sqe, + int fd_in, uint64_t off_in, + int fd_out, uint64_t off_out, + unsigned int nbytes, + unsigned int splice_flags) +{ + io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes, off_out); + sqe->splice_off_in = off_in; + sqe->splice_fd_in = fd_in; + sqe->splice_flags = splice_flags; +} + +static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, + const struct iovec *iovecs, + unsigned nr_vecs, off_t offset) +{ + io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, + void *buf, unsigned nbytes, + off_t offset, int buf_index) +{ + io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset); + sqe->buf_index = buf_index; +} + +static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, + const struct iovec *iovecs, + unsigned nr_vecs, off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, + const void *buf, unsigned nbytes, + off_t offset, int buf_index) +{ + io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); + sqe->buf_index = buf_index; +} + +static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd, + struct msghdr *msg, unsigned flags) +{ + io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0); + sqe->msg_flags = flags; +} + +static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd, + const struct msghdr *msg, unsigned flags) +{ + io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0); + sqe->msg_flags = flags; +} + +static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, + unsigned poll_mask) +{ + io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0); +#if __BYTE_ORDER == __BIG_ENDIAN + poll_mask = __swahw32(poll_mask); +#endif + sqe->poll32_events = poll_mask; +} + +static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, + void *user_data) +{ + io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, user_data, 0, 0); +} + +static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, + unsigned fsync_flags) +{ + io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0); + sqe->fsync_flags = fsync_flags; +} + +static inline void io_uring_prep_nop(struct io_uring_sqe *sqe) +{ + io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0); +} + +static inline void io_uring_prep_timeout(struct io_uring_sqe *sqe, + struct __kernel_timespec *ts, + unsigned count, unsigned flags) +{ + io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count); + sqe->timeout_flags = flags; +} + +static inline void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe, + __u64 user_data, unsigned flags) +{ + io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, + (void *)(unsigned long)user_data, 0, 0); + sqe->timeout_flags = flags; +} + +static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd, + struct sockaddr *addr, + socklen_t *addrlen, int flags) +{ + io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0, + (__u64) (unsigned long) addrlen); + sqe->accept_flags = flags; +} + +static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe, void *user_data, + int flags) +{ + io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, user_data, 0, 0); + sqe->cancel_flags = flags; +} + +static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe, + struct __kernel_timespec *ts, + unsigned flags) +{ + io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0); + sqe->timeout_flags = flags; +} + +static inline void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd, + const struct sockaddr *addr, + socklen_t addrlen) +{ + io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen); +} + +static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe, + int *fds, unsigned nr_fds, + int offset) +{ + io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds, offset); +} + +static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd, + int mode, off_t offset, off_t len) +{ + + io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd, + (const uintptr_t *) (unsigned long) len, mode, offset); +} + +static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd, + const char *path, int flags, mode_t mode) +{ + io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0); + sqe->open_flags = flags; +} + +static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd) +{ + io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0); +} + +static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd, + void *buf, unsigned nbytes, off_t offset) +{ + io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset); +} + +static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd, + const void *buf, unsigned nbytes, off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset); +} + +struct statx; +static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd, + const char *path, int flags, unsigned mask, + struct statx *statxbuf) +{ + io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask, + (__u64) (unsigned long) statxbuf); + sqe->statx_flags = flags; +} + +static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd, + off_t offset, off_t len, int advice) +{ + io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, len, offset); + sqe->fadvise_advice = advice; +} + +static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr, + off_t length, int advice) +{ + io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, length, 0); + sqe->fadvise_advice = advice; +} + +static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags) +{ + io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, len, 0); + sqe->msg_flags = flags; +} + +static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd, + void *buf, size_t len, int flags) +{ + io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, len, 0); + sqe->msg_flags = flags; +} + +static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd, + const char *path, struct open_how *how) +{ + io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how), + (uint64_t) (uintptr_t) how); +} + +struct epoll_event; +static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd, + int fd, int op, + struct epoll_event *ev) +{ + io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev, op, fd); +} + +static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe, + void *addr, int len, int nr, + int bgid, int bid) +{ + io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, len, bid); + sqe->buf_group = bgid; +} + +static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe, + int nr, int bgid) +{ + io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0); + sqe->buf_group = bgid; +} + +static inline unsigned io_uring_sq_ready(struct io_uring *ring) +{ + /* always use real head, to avoid losing sync for short submit */ + return ring->sq.sqe_tail - *ring->sq.khead; +} + +static inline unsigned io_uring_sq_space_left(struct io_uring *ring) +{ + return *ring->sq.kring_entries - io_uring_sq_ready(ring); +} + +static inline unsigned io_uring_cq_ready(struct io_uring *ring) +{ + return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead; +} + +static inline bool io_uring_cq_eventfd_enabled(struct io_uring *ring) +{ + if (!ring->cq.kflags) + return true; + + return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED); +} + +static inline int io_uring_cq_eventfd_toggle(struct io_uring *ring, + bool enabled) +{ + uint32_t flags; + + if (!!enabled == io_uring_cq_eventfd_enabled(ring)) + return 0; + + if (!ring->cq.kflags) + return -EOPNOTSUPP; + + flags = *ring->cq.kflags; + + if (enabled) + flags &= ~IORING_CQ_EVENTFD_DISABLED; + else + flags |= IORING_CQ_EVENTFD_DISABLED; + + IO_URING_WRITE_ONCE(*ring->cq.kflags, flags); + + return 0; +} + +/* + * Return an IO completion, waiting for 'wait_nr' completions if one isn't + * readily available. Returns 0 with cqe_ptr filled in on success, -errno on + * failure. + */ +static inline int io_uring_wait_cqe_nr(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, + unsigned wait_nr) +{ + return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL); +} + +/* + * Return an IO completion, if one is readily available. Returns 0 with + * cqe_ptr filled in on success, -errno on failure. + */ +static inline int io_uring_peek_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr) +{ + return io_uring_wait_cqe_nr(ring, cqe_ptr, 0); +} + +/* + * Return an IO completion, waiting for it if necessary. Returns 0 with + * cqe_ptr filled in on success, -errno on failure. + */ +static inline int io_uring_wait_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr) +{ + return io_uring_wait_cqe_nr(ring, cqe_ptr, 1); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/include/liburing/barrier.h b/src/include/liburing/barrier.h new file mode 100644 index 0000000..a4a59fb --- /dev/null +++ b/src/include/liburing/barrier.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef LIBURING_BARRIER_H +#define LIBURING_BARRIER_H + +/* +From the kernel documentation file refcount-vs-atomic.rst: + +A RELEASE memory ordering guarantees that all prior loads and +stores (all po-earlier instructions) on the same CPU are completed +before the operation. It also guarantees that all po-earlier +stores on the same CPU and all propagated stores from other CPUs +must propagate to all other CPUs before the release operation +(A-cumulative property). This is implemented using +:c:func:`smp_store_release`. + +An ACQUIRE memory ordering guarantees that all post loads and +stores (all po-later instructions) on the same CPU are +completed after the acquire operation. It also guarantees that all +po-later stores on the same CPU must propagate to all other CPUs +after the acquire operation executes. This is implemented using +:c:func:`smp_acquire__after_ctrl_dep`. +*/ + +#ifdef __cplusplus +#include + +template +static inline void IO_URING_WRITE_ONCE(T &var, T val) +{ + std::atomic_store_explicit(reinterpret_cast *>(&var), + val, std::memory_order_relaxed); +} +template +static inline T IO_URING_READ_ONCE(const T &var) +{ + return std::atomic_load_explicit( + reinterpret_cast *>(&var), + std::memory_order_relaxed); +} + +template +static inline void io_uring_smp_store_release(T *p, T v) +{ + std::atomic_store_explicit(reinterpret_cast *>(p), v, + std::memory_order_release); +} + +template +static inline T io_uring_smp_load_acquire(const T *p) +{ + return std::atomic_load_explicit( + reinterpret_cast *>(p), + std::memory_order_acquire); +} +#else +#include + +#define IO_URING_WRITE_ONCE(var, val) \ + atomic_store_explicit((_Atomic typeof(var) *)&(var), \ + (val), memory_order_relaxed) +#define IO_URING_READ_ONCE(var) \ + atomic_load_explicit((_Atomic typeof(var) *)&(var), \ + memory_order_relaxed) + +#define io_uring_smp_store_release(p, v) \ + atomic_store_explicit((_Atomic typeof(*(p)) *)(p), (v), \ + memory_order_release) +#define io_uring_smp_load_acquire(p) \ + atomic_load_explicit((_Atomic typeof(*(p)) *)(p), \ + memory_order_acquire) +#endif + +#endif /* defined(LIBURING_BARRIER_H) */ diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h new file mode 100644 index 0000000..d39b45f --- /dev/null +++ b/src/include/liburing/io_uring.h @@ -0,0 +1,301 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Header file for the io_uring interface. + * + * Copyright (C) 2019 Jens Axboe + * Copyright (C) 2019 Christoph Hellwig + */ +#ifndef LINUX_IO_URING_H +#define LINUX_IO_URING_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * IO submission data structure (Submission Queue Entry) + */ +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* IOSQE_ flags */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + union { + __u64 off; /* offset into file */ + __u64 addr2; + }; + union { + __u64 addr; /* pointer to buffer or iovecs */ + __u64 splice_off_in; + }; + __u32 len; /* buffer size or number of iovecs */ + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; /* compatibility */ + __u32 poll32_events; /* word-reversed for BE */ + __u32 sync_range_flags; + __u32 msg_flags; + __u32 timeout_flags; + __u32 accept_flags; + __u32 cancel_flags; + __u32 open_flags; + __u32 statx_flags; + __u32 fadvise_advice; + __u32 splice_flags; + }; + __u64 user_data; /* data to be passed back at completion time */ + union { + struct { + /* pack this to avoid bogus arm OABI complaints */ + union { + /* index into fixed buffers, if used */ + __u16 buf_index; + /* for grouped buffer selection */ + __u16 buf_group; + } __attribute__((packed)); + /* personality to use, if used */ + __u16 personality; + __s32 splice_fd_in; + }; + __u64 __pad2[3]; + }; +}; + +enum { + IOSQE_FIXED_FILE_BIT, + IOSQE_IO_DRAIN_BIT, + IOSQE_IO_LINK_BIT, + IOSQE_IO_HARDLINK_BIT, + IOSQE_ASYNC_BIT, + IOSQE_BUFFER_SELECT_BIT, +}; + +/* + * sqe->flags + */ +/* use fixed fileset */ +#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT) +/* issue after inflight IO */ +#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT) +/* links next sqe */ +#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT) +/* like LINK, but stronger */ +#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT) +/* always go async */ +#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) +/* select buffer from sqe->buf_group */ +#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) + +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ +#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ +#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ +#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ + +enum { + IORING_OP_NOP, + IORING_OP_READV, + IORING_OP_WRITEV, + IORING_OP_FSYNC, + IORING_OP_READ_FIXED, + IORING_OP_WRITE_FIXED, + IORING_OP_POLL_ADD, + IORING_OP_POLL_REMOVE, + IORING_OP_SYNC_FILE_RANGE, + IORING_OP_SENDMSG, + IORING_OP_RECVMSG, + IORING_OP_TIMEOUT, + IORING_OP_TIMEOUT_REMOVE, + IORING_OP_ACCEPT, + IORING_OP_ASYNC_CANCEL, + IORING_OP_LINK_TIMEOUT, + IORING_OP_CONNECT, + IORING_OP_FALLOCATE, + IORING_OP_OPENAT, + IORING_OP_CLOSE, + IORING_OP_FILES_UPDATE, + IORING_OP_STATX, + IORING_OP_READ, + IORING_OP_WRITE, + IORING_OP_FADVISE, + IORING_OP_MADVISE, + IORING_OP_SEND, + IORING_OP_RECV, + IORING_OP_OPENAT2, + IORING_OP_EPOLL_CTL, + IORING_OP_SPLICE, + IORING_OP_PROVIDE_BUFFERS, + IORING_OP_REMOVE_BUFFERS, + IORING_OP_TEE, + + /* this goes last, obviously */ + IORING_OP_LAST, +}; + +/* + * sqe->fsync_flags + */ +#define IORING_FSYNC_DATASYNC (1U << 0) + +/* + * sqe->timeout_flags + */ +#define IORING_TIMEOUT_ABS (1U << 0) + +/* + * sqe->splice_flags + * extends splice(2) flags + */ +#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ + +/* + * IO completion data structure (Completion Queue Entry) + */ +struct io_uring_cqe { + __u64 user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags; +}; + +/* + * cqe->flags + * + * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID + */ +#define IORING_CQE_F_BUFFER (1U << 0) + +enum { + IORING_CQE_BUFFER_SHIFT = 16, +}; + +/* + * Magic offsets for the application to mmap the data it needs + */ +#define IORING_OFF_SQ_RING 0ULL +#define IORING_OFF_CQ_RING 0x8000000ULL +#define IORING_OFF_SQES 0x10000000ULL + +/* + * Filled with the offset for mmap(2) + */ +struct io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 resv2; +}; + +/* + * sq_ring->flags + */ +#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ +#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ + +struct io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u32 flags; + __u32 resv1; + __u64 resv2; +}; + +/* + * cq_ring->flags + */ + +/* disable eventfd notifications */ +#define IORING_CQ_EVENTFD_DISABLED (1U << 0) + +/* + * io_uring_enter(2) flags + */ +#define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) + +/* + * Passed in for io_uring_setup(2). Copied back with updated info on success + */ +struct io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 features; + __u32 wq_fd; + __u32 resv[3]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +/* + * io_uring_params->features flags + */ +#define IORING_FEAT_SINGLE_MMAP (1U << 0) +#define IORING_FEAT_NODROP (1U << 1) +#define IORING_FEAT_SUBMIT_STABLE (1U << 2) +#define IORING_FEAT_RW_CUR_POS (1U << 3) +#define IORING_FEAT_CUR_PERSONALITY (1U << 4) +#define IORING_FEAT_FAST_POLL (1U << 5) +#define IORING_FEAT_POLL_32BITS (1U << 6) + +/* + * io_uring_register(2) opcodes and arguments + */ +#define IORING_REGISTER_BUFFERS 0 +#define IORING_UNREGISTER_BUFFERS 1 +#define IORING_REGISTER_FILES 2 +#define IORING_UNREGISTER_FILES 3 +#define IORING_REGISTER_EVENTFD 4 +#define IORING_UNREGISTER_EVENTFD 5 +#define IORING_REGISTER_FILES_UPDATE 6 +#define IORING_REGISTER_EVENTFD_ASYNC 7 +#define IORING_REGISTER_PROBE 8 +#define IORING_REGISTER_PERSONALITY 9 +#define IORING_UNREGISTER_PERSONALITY 10 + +struct io_uring_files_update { + __u32 offset; + __u32 resv; + __aligned_u64 /* __s32 * */ fds; +}; + +#define IO_URING_OP_SUPPORTED (1U << 0) + +struct io_uring_probe_op { + __u8 op; + __u8 resv; + __u16 flags; /* IO_URING_OP_* flags */ + __u32 resv2; +}; + +struct io_uring_probe { + __u8 last_op; /* last opcode supported */ + __u8 ops_len; /* length of ops[] array below */ + __u16 resv; + __u32 resv2[3]; + struct io_uring_probe_op ops[0]; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/liburing.map b/src/liburing.map new file mode 100644 index 0000000..38bd558 --- /dev/null +++ b/src/liburing.map @@ -0,0 +1,59 @@ +LIBURING_0.1 { + global: + io_uring_queue_init; + io_uring_queue_mmap; + io_uring_queue_exit; + io_uring_peek_cqe; + io_uring_wait_cqe; + io_uring_submit; + io_uring_submit_and_wait; + io_uring_get_sqe; + io_uring_register; + io_uring_setup; + io_uring_enter; + io_uring_mmap; + io_uring_register_buffers; + io_uring_unregister_buffers; + io_uring_register_files; + io_uring_unregister_files; + io_uring_register_eventfd; + io_uring_unregister_eventfd; + local: + *; +}; + +LIBURING_0.2 { + global: + io_uring_peek_batch_cqe; + io_uring_wait_cqe_timeout; + io_uring_wait_cqes; + + __io_uring_get_cqe; + + io_uring_queue_init_params; + io_uring_register_files_update; +} LIBURING_0.1; + +LIBURING_0.3 { +} LIBURING_0.2; + +LIBURING_0.4 { + global: + io_uring_ring_dontfork; + io_uring_register_probe; + io_uring_register_personality; + io_uring_unregister_personality; + io_uring_get_probe; + io_uring_get_probe_ring; +} LIBURING_0.3; + +LIBURING_0.5 { +} LIBURING_0.4; + +LIBURING_0.6 { + global: + io_uring_register_eventfd_async; +} LIBURING_0.5; + +LIBURING_0.7 { +} LIBURING_0.6; diff --git a/src/queue.c b/src/queue.c new file mode 100644 index 0000000..be80d7a --- /dev/null +++ b/src/queue.c @@ -0,0 +1,323 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include + +#include "liburing/compat.h" +#include "liburing/io_uring.h" +#include "liburing.h" +#include "liburing/barrier.h" + +#include "syscall.h" + +/* + * Returns true if we're not using SQ thread (thus nobody submits but us) + * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly + * awakened. For the latter case, we set the thread wakeup flag. + */ +static inline bool sq_ring_needs_enter(struct io_uring *ring, + unsigned submitted, unsigned *flags) +{ + if (!(ring->flags & IORING_SETUP_SQPOLL) && submitted) + return true; + if (IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_NEED_WAKEUP) { + *flags |= IORING_ENTER_SQ_WAKEUP; + return true; + } + + return false; +} + +static inline bool cq_ring_needs_flush(struct io_uring *ring) +{ + return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW; +} + +static int __io_uring_peek_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr) +{ + struct io_uring_cqe *cqe; + unsigned head; + int err = 0; + + do { + io_uring_for_each_cqe(ring, head, cqe) + break; + if (cqe) { + if (cqe->user_data == LIBURING_UDATA_TIMEOUT) { + if (cqe->res < 0) + err = cqe->res; + io_uring_cq_advance(ring, 1); + if (!err) + continue; + cqe = NULL; + } + } + break; + } while (1); + + *cqe_ptr = cqe; + return err; +} + +int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, + unsigned submit, unsigned wait_nr, sigset_t *sigmask) +{ + struct io_uring_cqe *cqe = NULL; + const int to_wait = wait_nr; + int ret = 0, err; + + do { + bool cq_overflow_flush = false; + unsigned flags = 0; + + err = __io_uring_peek_cqe(ring, &cqe); + if (err) + break; + if (!cqe && !to_wait && !submit) { + if (!cq_ring_needs_flush(ring)) { + err = -EAGAIN; + break; + } + cq_overflow_flush = true; + } + if (wait_nr && cqe) + wait_nr--; + if (wait_nr || cq_overflow_flush) + flags = IORING_ENTER_GETEVENTS; + if (submit) + sq_ring_needs_enter(ring, submit, &flags); + if (wait_nr || submit || cq_overflow_flush) + ret = __sys_io_uring_enter(ring->ring_fd, submit, + wait_nr, flags, sigmask); + if (ret < 0) { + err = -errno; + } else if (ret == (int)submit) { + submit = 0; + /* + * When SETUP_IOPOLL is set, __sys_io_uring enter() + * must be called to reap new completions but the call + * won't be made if both wait_nr and submit are zero + * so preserve wait_nr. + */ + if (!(ring->flags & IORING_SETUP_IOPOLL)) + wait_nr = 0; + } else { + submit -= ret; + } + if (cqe) + break; + } while (!err); + + *cqe_ptr = cqe; + return err; +} + +/* + * Fill in an array of IO completions up to count, if any are available. + * Returns the amount of IO completions filled. + */ +unsigned io_uring_peek_batch_cqe(struct io_uring *ring, + struct io_uring_cqe **cqes, unsigned count) +{ + unsigned ready; + bool overflow_checked = false; + +again: + ready = io_uring_cq_ready(ring); + if (ready) { + unsigned head = *ring->cq.khead; + unsigned mask = *ring->cq.kring_mask; + unsigned last; + int i = 0; + + count = count > ready ? ready : count; + last = head + count; + for (;head != last; head++, i++) + cqes[i] = &ring->cq.cqes[head & mask]; + + return count; + } + + if (overflow_checked) + goto done; + + if (cq_ring_needs_flush(ring)) { + __sys_io_uring_enter(ring->ring_fd, 0, 0, + IORING_ENTER_GETEVENTS, NULL); + overflow_checked = true; + goto again; + } + +done: + return 0; +} + +/* + * Sync internal state with kernel ring state on the SQ side. Returns the + * number of pending items in the SQ ring, for the shared ring. + */ +static int __io_uring_flush_sq(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + const unsigned mask = *sq->kring_mask; + unsigned ktail, to_submit; + + if (sq->sqe_head == sq->sqe_tail) { + ktail = *sq->ktail; + goto out; + } + + /* + * Fill in sqes that we have queued up, adding them to the kernel ring + */ + ktail = *sq->ktail; + to_submit = sq->sqe_tail - sq->sqe_head; + while (to_submit--) { + sq->array[ktail & mask] = sq->sqe_head & mask; + ktail++; + sq->sqe_head++; + } + + /* + * Ensure that the kernel sees the SQE updates before it sees the tail + * update. + */ + io_uring_smp_store_release(sq->ktail, ktail); +out: + return ktail - *sq->khead; +} + +/* + * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note + * that an sqe is used internally to handle the timeout. Applications using + * this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT! + * + * If 'ts' is specified, the application need not call io_uring_submit() before + * calling this function, as we will do that on its behalf. From this it also + * follows that this function isn't safe to use for applications that split SQ + * and CQ handling between two threads and expect that to work without + * synchronization, as this function manipulates both the SQ and CQ side. + */ +int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, + unsigned wait_nr, struct __kernel_timespec *ts, + sigset_t *sigmask) +{ + unsigned to_submit = 0; + + if (ts) { + struct io_uring_sqe *sqe; + int ret; + + /* + * If the SQ ring is full, we may need to submit IO first + */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + ret = io_uring_submit(ring); + if (ret < 0) + return ret; + sqe = io_uring_get_sqe(ring); + if (!sqe) + return -EAGAIN; + } + io_uring_prep_timeout(sqe, ts, wait_nr, 0); + sqe->user_data = LIBURING_UDATA_TIMEOUT; + to_submit = __io_uring_flush_sq(ring); + } + + return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask); +} + +/* + * See io_uring_wait_cqes() - this function is the same, it just always uses + * '1' as the wait_nr. + */ +int io_uring_wait_cqe_timeout(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, + struct __kernel_timespec *ts) +{ + return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL); +} + +/* + * Submit sqes acquired from io_uring_get_sqe() to the kernel. + * + * Returns number of sqes submitted + */ +static int __io_uring_submit(struct io_uring *ring, unsigned submitted, + unsigned wait_nr) +{ + unsigned flags; + int ret; + + flags = 0; + if (sq_ring_needs_enter(ring, submitted, &flags) || wait_nr) { + if (wait_nr || (ring->flags & IORING_SETUP_IOPOLL)) + flags |= IORING_ENTER_GETEVENTS; + + ret = __sys_io_uring_enter(ring->ring_fd, submitted, wait_nr, + flags, NULL); + if (ret < 0) + return -errno; + } else + ret = submitted; + + return ret; +} + +static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) +{ + return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr); +} + +/* + * Submit sqes acquired from io_uring_get_sqe() to the kernel. + * + * Returns number of sqes submitted + */ +int io_uring_submit(struct io_uring *ring) +{ + return __io_uring_submit_and_wait(ring, 0); +} + +/* + * Like io_uring_submit(), but allows waiting for events as well. + * + * Returns number of sqes submitted + */ +int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) +{ + return __io_uring_submit_and_wait(ring, wait_nr); +} + +static inline struct io_uring_sqe * +__io_uring_get_sqe(struct io_uring_sq *sq, unsigned int __head) +{ + unsigned int __next = (sq)->sqe_tail + 1; + struct io_uring_sqe *__sqe = NULL; + + if (__next - __head <= *(sq)->kring_entries) { + __sqe = &(sq)->sqes[(sq)->sqe_tail & *(sq)->kring_mask]; + (sq)->sqe_tail = __next; + } + return __sqe; +} + +/* + * Return an sqe to fill. Application must later call io_uring_submit() + * when it's ready to tell the kernel about it. The caller may call this + * function multiple times before calling io_uring_submit(). + * + * Returns a vacant sqe, or NULL if we're full. + */ +struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + + return __io_uring_get_sqe(sq, io_uring_smp_load_acquire(sq->khead)); +} diff --git a/src/register.c b/src/register.c new file mode 100644 index 0000000..327a8ce --- /dev/null +++ b/src/register.c @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include + +#include "liburing/compat.h" +#include "liburing/io_uring.h" +#include "liburing.h" + +#include "syscall.h" + +int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs, + unsigned nr_iovecs) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_BUFFERS, + iovecs, nr_iovecs); + if (ret < 0) + return -errno; + + return 0; +} + +int io_uring_unregister_buffers(struct io_uring *ring) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_BUFFERS, + NULL, 0); + if (ret < 0) + return -errno; + + return 0; +} + +/* + * Register an update for an existing file set. The updates will start at + * 'off' in the original array, and 'nr_files' is the number of files we'll + * update. + * + * Returns number of files updated on success, -ERROR on failure. + */ +int io_uring_register_files_update(struct io_uring *ring, unsigned off, + int *files, unsigned nr_files) +{ + struct io_uring_files_update up = { + .offset = off, + .fds = (unsigned long) files, + }; + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, + IORING_REGISTER_FILES_UPDATE, &up, + nr_files); + if (ret < 0) + return -errno; + + return ret; +} + +int io_uring_register_files(struct io_uring *ring, const int *files, + unsigned nr_files) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_FILES, + files, nr_files); + if (ret < 0) + return -errno; + + return 0; +} + +int io_uring_unregister_files(struct io_uring *ring) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_FILES, + NULL, 0); + if (ret < 0) + return -errno; + + return 0; +} + +int io_uring_register_eventfd(struct io_uring *ring, int event_fd) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_EVENTFD, + &event_fd, 1); + if (ret < 0) + return -errno; + + return 0; +} + +int io_uring_unregister_eventfd(struct io_uring *ring) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_EVENTFD, + NULL, 0); + if (ret < 0) + return -errno; + + return 0; +} + +int io_uring_register_eventfd_async(struct io_uring *ring, int event_fd) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_EVENTFD_ASYNC, + &event_fd, 1); + if (ret < 0) + return -errno; + + return 0; +} + +int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p, + unsigned int nr_ops) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_PROBE, + p, nr_ops); + if (ret < 0) + return -errno; + + return 0; +} + +int io_uring_register_personality(struct io_uring *ring) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_PERSONALITY, + NULL, 0); + if (ret < 0) + return -errno; + + return ret; +} + +int io_uring_unregister_personality(struct io_uring *ring, int id) +{ + int ret; + + ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_PERSONALITY, + NULL, id); + if (ret < 0) + return -errno; + + return ret; +} diff --git a/src/setup.c b/src/setup.c new file mode 100644 index 0000000..2b17b94 --- /dev/null +++ b/src/setup.c @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include + +#include "liburing/compat.h" +#include "liburing/io_uring.h" +#include "liburing.h" + +#include "syscall.h" + +static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq) +{ + munmap(sq->ring_ptr, sq->ring_sz); + if (cq->ring_ptr && cq->ring_ptr != sq->ring_ptr) + munmap(cq->ring_ptr, cq->ring_sz); +} + +static int io_uring_mmap(int fd, struct io_uring_params *p, + struct io_uring_sq *sq, struct io_uring_cq *cq) +{ + size_t size; + int ret; + + sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); + cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + + if (p->features & IORING_FEAT_SINGLE_MMAP) { + if (cq->ring_sz > sq->ring_sz) + sq->ring_sz = cq->ring_sz; + cq->ring_sz = sq->ring_sz; + } + sq->ring_ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); + if (sq->ring_ptr == MAP_FAILED) + return -errno; + + if (p->features & IORING_FEAT_SINGLE_MMAP) { + cq->ring_ptr = sq->ring_ptr; + } else { + cq->ring_ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); + if (cq->ring_ptr == MAP_FAILED) { + cq->ring_ptr = NULL; + ret = -errno; + goto err; + } + } + + sq->khead = sq->ring_ptr + p->sq_off.head; + sq->ktail = sq->ring_ptr + p->sq_off.tail; + sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask; + sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries; + sq->kflags = sq->ring_ptr + p->sq_off.flags; + sq->kdropped = sq->ring_ptr + p->sq_off.dropped; + sq->array = sq->ring_ptr + p->sq_off.array; + + size = p->sq_entries * sizeof(struct io_uring_sqe); + sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, + IORING_OFF_SQES); + if (sq->sqes == MAP_FAILED) { + ret = -errno; +err: + io_uring_unmap_rings(sq, cq); + return ret; + } + + cq->khead = cq->ring_ptr + p->cq_off.head; + cq->ktail = cq->ring_ptr + p->cq_off.tail; + cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask; + cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries; + cq->koverflow = cq->ring_ptr + p->cq_off.overflow; + cq->cqes = cq->ring_ptr + p->cq_off.cqes; + if (p->cq_off.flags) + cq->kflags = cq->ring_ptr + p->cq_off.flags; + return 0; +} + +/* + * For users that want to specify sq_thread_cpu or sq_thread_idle, this + * interface is a convenient helper for mmap()ing the rings. + * Returns -errno on error, or zero on success. On success, 'ring' + * contains the necessary information to read/write to the rings. + */ +int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring) +{ + int ret; + + memset(ring, 0, sizeof(*ring)); + ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); + if (!ret) { + ring->flags = p->flags; + ring->ring_fd = fd; + } + return ret; +} + +/* + * Ensure that the mmap'ed rings aren't available to a child after a fork(2). + * This uses madvise(..., MADV_DONTFORK) on the mmap'ed ranges. + */ +int io_uring_ring_dontfork(struct io_uring *ring) +{ + size_t len; + int ret; + + if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr) + return -EINVAL; + + len = *ring->sq.kring_entries * sizeof(struct io_uring_sqe); + ret = madvise(ring->sq.sqes, len, MADV_DONTFORK); + if (ret == -1) + return -errno; + + len = ring->sq.ring_sz; + ret = madvise(ring->sq.ring_ptr, len, MADV_DONTFORK); + if (ret == -1) + return -errno; + + if (ring->cq.ring_ptr != ring->sq.ring_ptr) { + len = ring->cq.ring_sz; + ret = madvise(ring->cq.ring_ptr, len, MADV_DONTFORK); + if (ret == -1) + return -errno; + } + + return 0; +} + +int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, + struct io_uring_params *p) +{ + int fd, ret; + + fd = __sys_io_uring_setup(entries, p); + if (fd < 0) + return -errno; + + ret = io_uring_queue_mmap(fd, p, ring); + if (ret) + close(fd); + + return ret; +} + +/* + * Returns -errno on error, or zero on success. On success, 'ring' + * contains the necessary information to read/write to the rings. + */ +int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags) +{ + struct io_uring_params p; + + memset(&p, 0, sizeof(p)); + p.flags = flags; + + return io_uring_queue_init_params(entries, ring, &p); +} + +void io_uring_queue_exit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + struct io_uring_cq *cq = &ring->cq; + + munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); + io_uring_unmap_rings(sq, cq); + close(ring->ring_fd); +} + +struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring) +{ + struct io_uring_probe *probe; + int r; + + size_t len = sizeof(*probe) + 256 * sizeof(struct io_uring_probe_op); + probe = malloc(len); + memset(probe, 0, len); + r = io_uring_register_probe(ring, probe, 256); + if (r < 0) + goto fail; + + return probe; +fail: + free(probe); + return NULL; +} + +struct io_uring_probe *io_uring_get_probe(void) +{ + struct io_uring ring; + struct io_uring_probe* probe = NULL; + + int r = io_uring_queue_init(2, &ring, 0); + if (r < 0) + return NULL; + + probe = io_uring_get_probe_ring(&ring); + io_uring_queue_exit(&ring); + return probe; +} diff --git a/src/syscall.c b/src/syscall.c new file mode 100644 index 0000000..c41e099 --- /dev/null +++ b/src/syscall.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Will go away once libc support is there + */ +#include +#include +#include +#include +#include "liburing/compat.h" +#include "liburing/io_uring.h" +#include "syscall.h" + +#ifdef __alpha__ +/* + * alpha is the only exception, all other architectures + * have common numbers for new system calls. + */ +# ifndef __NR_io_uring_setup +# define __NR_io_uring_setup 535 +# endif +# ifndef __NR_io_uring_enter +# define __NR_io_uring_enter 536 +# endif +# ifndef __NR_io_uring_register +# define __NR_io_uring_register 537 +# endif +#else /* !__alpha__ */ +# ifndef __NR_io_uring_setup +# define __NR_io_uring_setup 425 +# endif +# ifndef __NR_io_uring_enter +# define __NR_io_uring_enter 426 +# endif +# ifndef __NR_io_uring_register +# define __NR_io_uring_register 427 +# endif +#endif + +int __sys_io_uring_register(int fd, unsigned opcode, const void *arg, + unsigned nr_args) +{ + return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args); +} + +int __sys_io_uring_setup(unsigned entries, struct io_uring_params *p) +{ + return syscall(__NR_io_uring_setup, entries, p); +} + +int __sys_io_uring_enter(int fd, unsigned to_submit, unsigned min_complete, + unsigned flags, sigset_t *sig) +{ + return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, + flags, sig, _NSIG / 8); +} diff --git a/src/syscall.h b/src/syscall.h new file mode 100644 index 0000000..7e299d4 --- /dev/null +++ b/src/syscall.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef LIBURING_SYSCALL_H +#define LIBURING_SYSCALL_H + +/* + * System calls + */ +extern int __sys_io_uring_setup(unsigned entries, struct io_uring_params *p); +extern int __sys_io_uring_enter(int fd, unsigned to_submit, + unsigned min_complete, unsigned flags, sigset_t *sig); +extern int __sys_io_uring_register(int fd, unsigned int opcode, const void *arg, + unsigned int nr_args); + +#endif diff --git a/test/232c93d07b74-test.c b/test/232c93d07b74-test.c new file mode 100644 index 0000000..a0da3fd --- /dev/null +++ b/test/232c93d07b74-test.c @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Test case for socket read/write through IORING_OP_READV and + * IORING_OP_WRITEV, using both TCP and sockets and blocking and + * non-blocking IO. + * + * Heavily based on a test case from Hrvoje Zeba + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define RECV_BUFF_SIZE 2 +#define SEND_BUFF_SIZE 3 + +#define PORT 0x1235 + +struct params { + int tcp; + int non_blocking; +}; + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +int rcv_ready = 0; + +static void set_rcv_ready(void) +{ + pthread_mutex_lock(&mutex); + + rcv_ready = 1; + pthread_cond_signal(&cond); + + pthread_mutex_unlock(&mutex); +} + +static void wait_for_rcv_ready(void) +{ + pthread_mutex_lock(&mutex); + + while (!rcv_ready) + pthread_cond_wait(&cond, &mutex); + + pthread_mutex_unlock(&mutex); +} + +static void *rcv(void *arg) +{ + struct params *p = arg; + int s0; + + if (p->tcp) { + int val = 1; + + s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + assert(setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) != -1); + assert(setsockopt(s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) != -1); + + struct sockaddr_in addr; + + addr.sin_family = AF_INET; + addr.sin_port = PORT; + addr.sin_addr.s_addr = 0x0100007fU; + assert(bind(s0, (struct sockaddr *) &addr, sizeof(addr)) != -1); + } else { + s0 = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + assert(s0 != -1); + + struct sockaddr_un addr; + memset(&addr, 0, sizeof(addr)); + + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, "\0sock", 6); + assert(bind(s0, (struct sockaddr *) &addr, sizeof(addr)) != -1); + } + + assert(listen(s0, 128) != -1); + + set_rcv_ready(); + + int s1 = accept(s0, NULL, NULL); + assert(s1 != -1); + + if (p->non_blocking) { + int flags = fcntl(s1, F_GETFL, 0); + assert(flags != -1); + + flags |= O_NONBLOCK; + assert(fcntl(s1, F_SETFL, flags) != -1); + } + + struct io_uring m_io_uring; + void *ret = NULL; + + assert(io_uring_queue_init(32, &m_io_uring, 0) >= 0); + + int bytes_read = 0; + int expected_byte = 0; + int done = 0; + + while (!done && bytes_read != 33) { + char buff[RECV_BUFF_SIZE]; + struct iovec iov; + + iov.iov_base = buff; + iov.iov_len = sizeof(buff); + + struct io_uring_sqe *sqe = io_uring_get_sqe(&m_io_uring); + assert(sqe != NULL); + + io_uring_prep_readv(sqe, s1, &iov, 1, 0); + + assert(io_uring_submit(&m_io_uring) != -1); + + struct io_uring_cqe *cqe; + unsigned head; + unsigned count = 0; + + while (!done && count != 1) { + io_uring_for_each_cqe(&m_io_uring, head, cqe) { + if (cqe->res < 0) + assert(cqe->res == -EAGAIN); + else { + int i; + + for (i = 0; i < cqe->res; i++) { + if (buff[i] != expected_byte) { + fprintf(stderr, + "Received %d, wanted %d\n", + buff[i], expected_byte); + ret++; + done = 1; + } + expected_byte++; + } + bytes_read += cqe->res; + } + + count++; + } + + assert(count <= 1); + io_uring_cq_advance(&m_io_uring, count); + } + } + + shutdown(s1, SHUT_RDWR); + close(s1); + close(s0); + io_uring_queue_exit(&m_io_uring); + return ret; +} + +static void *snd(void *arg) +{ + struct params *p = arg; + int s0; + + wait_for_rcv_ready(); + + if (p->tcp) { + int val = 1; + + s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + assert(setsockopt(s0, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) != -1); + + struct sockaddr_in addr; + + addr.sin_family = AF_INET; + addr.sin_port = PORT; + addr.sin_addr.s_addr = 0x0100007fU; + assert(connect(s0, (struct sockaddr*) &addr, sizeof(addr)) != -1); + } else { + s0 = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + assert(s0 != -1); + + struct sockaddr_un addr; + memset(&addr, 0, sizeof(addr)); + + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, "\0sock", 6); + assert(connect(s0, (struct sockaddr*) &addr, sizeof(addr)) != -1); + } + + if (p->non_blocking) { + int flags = fcntl(s0, F_GETFL, 0); + assert(flags != -1); + + flags |= O_NONBLOCK; + assert(fcntl(s0, F_SETFL, flags) != -1); + } + + struct io_uring m_io_uring; + + assert(io_uring_queue_init(32, &m_io_uring, 0) >= 0); + + int bytes_written = 0; + int done = 0; + + while (!done && bytes_written != 33) { + char buff[SEND_BUFF_SIZE]; + int i; + + for (i = 0; i < SEND_BUFF_SIZE; i++) + buff[i] = i + bytes_written; + + struct iovec iov; + + iov.iov_base = buff; + iov.iov_len = sizeof(buff); + + struct io_uring_sqe *sqe = io_uring_get_sqe(&m_io_uring); + assert(sqe != NULL); + + io_uring_prep_writev(sqe, s0, &iov, 1, 0); + + assert(io_uring_submit(&m_io_uring) != -1); + + struct io_uring_cqe *cqe; + unsigned head; + unsigned count = 0; + + while (!done && count != 1) { + io_uring_for_each_cqe(&m_io_uring, head, cqe) { + if (cqe->res < 0) { + if (cqe->res == -EPIPE) { + done = 1; + break; + } + assert(cqe->res == -EAGAIN); + } else { + bytes_written += cqe->res; + } + + count++; + } + + assert(count <= 1); + io_uring_cq_advance(&m_io_uring, count); + } + usleep(100000); + } + + shutdown(s0, SHUT_RDWR); + close(s0); + io_uring_queue_exit(&m_io_uring); + return NULL; +} + +int main(int argc, char *argv[]) +{ + struct params p; + pthread_t t1, t2; + void *res1, *res2; + int i, exit_val = 0; + + if (argc > 1) + return 0; + + for (i = 0; i < 4; i++) { + p.tcp = i & 1; + p.non_blocking = (i & 2) >> 1; + + rcv_ready = 0; + + pthread_create(&t1, NULL, rcv, &p); + pthread_create(&t2, NULL, snd, &p); + pthread_join(t1, &res1); + pthread_join(t2, &res2); + if (res1 || res2) { + fprintf(stderr, "Failed tcp=%d, non_blocking=%d\n", p.tcp, p.non_blocking); + exit_val = 1; + } + } + + return exit_val; +} diff --git a/test/35fa71a030ca-test.c b/test/35fa71a030ca-test.c new file mode 100644 index 0000000..4ecf211 --- /dev/null +++ b/test/35fa71a030ca-test.c @@ -0,0 +1,326 @@ +/* SPDX-License-Identifier: MIT */ +// autogenerated by syzkaller (https://github.com/google/syzkaller) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void sleep_ms(uint64_t ms) +{ + usleep(ms * 1000); +} + +static uint64_t current_time_ms(void) +{ + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + exit(1); + return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; +} + +static void thread_start(void* (*fn)(void*), void* arg) +{ + pthread_t th; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 128 << 10); + int i; + for (i = 0; i < 100; i++) { + if (pthread_create(&th, &attr, fn, arg) == 0) { + pthread_attr_destroy(&attr); + return; + } + if (errno == EAGAIN) { + usleep(50); + continue; + } + break; + } + exit(1); +} + +typedef struct { + int state; +} event_t; + +static void event_init(event_t* ev) +{ + ev->state = 0; +} + +static void event_reset(event_t* ev) +{ + ev->state = 0; +} + +static void event_set(event_t* ev) +{ + if (ev->state) + exit(1); + __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); + syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG); +} + +static void event_wait(event_t* ev) +{ + while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) + syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0); +} + +static int event_isset(event_t* ev) +{ + return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); +} + +static int event_timedwait(event_t* ev, uint64_t timeout) +{ + uint64_t start = current_time_ms(); + uint64_t now = start; + for (;;) { + uint64_t remain = timeout - (now - start); + struct timespec ts; + ts.tv_sec = remain / 1000; + ts.tv_nsec = (remain % 1000) * 1000 * 1000; + syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts); + if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED)) + return 1; + now = current_time_ms(); + if (now - start > timeout) + return 0; + } +} + +static bool write_file(const char* file, const char* what, ...) +{ + char buf[1024]; + va_list args; + va_start(args, what); + vsnprintf(buf, sizeof(buf), what, args); + va_end(args); + buf[sizeof(buf) - 1] = 0; + int len = strlen(buf); + int fd = open(file, O_WRONLY | O_CLOEXEC); + if (fd == -1) + return false; + if (write(fd, buf, len) != len) { + int err = errno; + close(fd); + errno = err; + return false; + } + close(fd); + return true; +} + +static void kill_and_wait(int pid, int* status) +{ + kill(-pid, SIGKILL); + kill(pid, SIGKILL); + int i; + for (i = 0; i < 100; i++) { + if (waitpid(-1, status, WNOHANG | __WALL) == pid) + return; + usleep(1000); + } + DIR* dir = opendir("/sys/fs/fuse/connections"); + if (dir) { + for (;;) { + struct dirent* ent = readdir(dir); + if (!ent) + break; + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) + continue; + char abort[300]; + snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", + ent->d_name); + int fd = open(abort, O_WRONLY); + if (fd == -1) { + continue; + } + if (write(fd, abort, 1) < 0) { + } + close(fd); + } + closedir(dir); + } else { + } + while (waitpid(-1, status, __WALL) != pid) { + } +} + +#define SYZ_HAVE_SETUP_TEST 1 +static void setup_test() +{ + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + setpgrp(); + write_file("/proc/self/oom_score_adj", "1000"); +} + +struct thread_t { + int created, call; + event_t ready, done; +}; + +static struct thread_t threads[16]; +static void execute_call(int call); +static int running; + +static void* thr(void* arg) +{ + struct thread_t* th = (struct thread_t*)arg; + for (;;) { + event_wait(&th->ready); + event_reset(&th->ready); + execute_call(th->call); + __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED); + event_set(&th->done); + } + return 0; +} + +static void execute_one(void) +{ + int i, call, thread; + for (call = 0; call < 3; call++) { + for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); + thread++) { + struct thread_t* th = &threads[thread]; + if (!th->created) { + th->created = 1; + event_init(&th->ready); + event_init(&th->done); + event_set(&th->done); + thread_start(thr, th); + } + if (!event_isset(&th->done)) + continue; + event_reset(&th->done); + th->call = call; + __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED); + event_set(&th->ready); + event_timedwait(&th->done, 45); + break; + } + } + for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++) + sleep_ms(1); +} + +static void execute_one(void); + +#define WAIT_FLAGS __WALL + +static void loop(void) +{ + int iter; + for (iter = 0;; iter++) { + int pid = fork(); + if (pid < 0) + exit(1); + if (pid == 0) { + setup_test(); + execute_one(); + exit(0); + } + int status = 0; + uint64_t start = current_time_ms(); + for (;;) { + if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) + break; + sleep_ms(1); + if (current_time_ms() - start < 5 * 1000) + continue; + kill_and_wait(pid, &status); + break; + } + } +} + +#ifndef __NR_io_uring_register +#define __NR_io_uring_register 427 +#endif +#ifndef __NR_io_uring_setup +#define __NR_io_uring_setup 425 +#endif + +uint64_t r[1] = {0xffffffffffffffff}; + +void execute_call(int call) +{ + long res; + switch (call) { + case 0: + *(uint32_t*)0x20000040 = 0; + *(uint32_t*)0x20000044 = 0; + *(uint32_t*)0x20000048 = 0; + *(uint32_t*)0x2000004c = 0; + *(uint32_t*)0x20000050 = 0; + *(uint32_t*)0x20000054 = 0; + *(uint32_t*)0x20000058 = 0; + *(uint32_t*)0x2000005c = 0; + *(uint32_t*)0x20000060 = 0; + *(uint32_t*)0x20000064 = 0; + *(uint32_t*)0x20000068 = 0; + *(uint32_t*)0x2000006c = 0; + *(uint32_t*)0x20000070 = 0; + *(uint32_t*)0x20000074 = 0; + *(uint32_t*)0x20000078 = 0; + *(uint32_t*)0x2000007c = 0; + *(uint32_t*)0x20000080 = 0; + *(uint32_t*)0x20000084 = 0; + *(uint64_t*)0x20000088 = 0; + *(uint32_t*)0x20000090 = 0; + *(uint32_t*)0x20000094 = 0; + *(uint32_t*)0x20000098 = 0; + *(uint32_t*)0x2000009c = 0; + *(uint32_t*)0x200000a0 = 0; + *(uint32_t*)0x200000a4 = 0; + *(uint32_t*)0x200000a8 = 0; + *(uint32_t*)0x200000ac = 0; + *(uint64_t*)0x200000b0 = 0; + res = syscall(__NR_io_uring_setup, 0x64, 0x20000040); + if (res != -1) + r[0] = res; + break; + case 1: + syscall(__NR_io_uring_register, (long)r[0], 0, 0, 0); + break; + case 2: + syscall(__NR_io_uring_register, (long)r[0], 0, 0, 0); + break; + } +} + +static void sig_int(int sig) +{ + exit(0); +} + +int main(int argc, char *argv[]) +{ + if (argc > 1) + return 0; + signal(SIGINT, sig_int); + mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0); + loop(); + return 0; +} diff --git a/test/500f9fbadef8-test.c b/test/500f9fbadef8-test.c new file mode 100644 index 0000000..9ebff43 --- /dev/null +++ b/test/500f9fbadef8-test.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: Single depth submit+wait poll hang test + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define BLOCKS 4096 + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec iov; + char buf[32]; + off_t offset; + unsigned blocks; + int ret, fd; + + if (argc > 1) + return 0; + + if (posix_memalign(&iov.iov_base, 4096, 4096)) { + fprintf(stderr, "memalign failed\n"); + return 1; + } + iov.iov_len = 4096; + + ret = io_uring_queue_init(1, &ring, IORING_SETUP_IOPOLL); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + + } + + sprintf(buf, "./XXXXXX"); + fd = mkostemp(buf, O_WRONLY | O_DIRECT | O_CREAT); + if (fd < 0) { + perror("mkostemp"); + return 1; + } + + offset = 0; + blocks = BLOCKS; + do { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_writev(sqe, fd, &iov, 1, offset); + ret = io_uring_submit_and_wait(&ring, 1); + if (ret < 0) { + fprintf(stderr, "submit_and_wait: %d\n", ret); + goto err; + } + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion: %d\n", ret); + goto err; + } + if (cqe->res != 4096) { + if (cqe->res == -EOPNOTSUPP) + goto skipped; + goto err; + } + io_uring_cqe_seen(&ring, cqe); + offset += 4096; + } while (--blocks); + + close(fd); + unlink(buf); + return 0; +err: + close(fd); + unlink(buf); + return 1; +skipped: + fprintf(stderr, "Polling not supported in current dir, test skipped\n"); + close(fd); + unlink(buf); + return 0; +} diff --git a/test/7ad0e4b2f83c-test.c b/test/7ad0e4b2f83c-test.c new file mode 100644 index 0000000..4d760e1 --- /dev/null +++ b/test/7ad0e4b2f83c-test.c @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include "liburing.h" + +static unsigned long long mtime_since(const struct timeval *s, + const struct timeval *e) +{ + long long sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = (e->tv_usec - s->tv_usec); + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= 1000; + usec /= 1000; + return sec + usec; +} + +static unsigned long long mtime_since_now(struct timeval *tv) +{ + struct timeval end; + + gettimeofday(&end, NULL); + return mtime_since(tv, &end); +} + +int main(int argc, char *argv[]) +{ + struct __kernel_timespec ts1, ts2; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct io_uring ring; + unsigned long msec; + struct timeval tv; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(32, &ring, 0); + if (ret) { + fprintf(stderr, "io_uring_queue_init=%d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_nop(sqe); + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "io_uring_submit1=%d\n", ret); + return 1; + } + + + ts1.tv_sec = 5, + ts1.tv_nsec = 0; + ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts1); + if (ret) { + fprintf(stderr, "io_uring_wait_cqe_timeout=%d\n", ret); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + gettimeofday(&tv, NULL); + + ts2.tv_sec = 1; + ts2.tv_nsec = 0; + sqe = io_uring_get_sqe(&ring); + io_uring_prep_timeout(sqe, &ts2, 0, 0); + sqe->user_data = 89; + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "io_uring_submit2=%d\n", ret); + return 1; + } + + io_uring_wait_cqe(&ring, &cqe); + io_uring_cqe_seen(&ring, cqe); + msec = mtime_since_now(&tv); + if (msec >= 900 && msec <= 1100) { + io_uring_queue_exit(&ring); + return 0; + } + + fprintf(stderr, "%s: Timeout seems wonky (got %lu)\n", __FUNCTION__, + msec); + io_uring_queue_exit(&ring); + return 1; +} diff --git a/test/8a9973408177-test.c b/test/8a9973408177-test.c new file mode 100644 index 0000000..94bf781 --- /dev/null +++ b/test/8a9973408177-test.c @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int register_file(struct io_uring *ring) +{ + char buf[32]; + int ret, fd; + + sprintf(buf, "./XXXXXX"); + fd = mkstemp(buf); + if (fd < 0) { + perror("open"); + return 1; + } + + ret = io_uring_register_files(ring, &fd, 1); + if (ret) { + fprintf(stderr, "file register %d\n", ret); + return 1; + } + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "file register %d\n", ret); + return 1; + } + + unlink(buf); + close(fd); + return 0; +} + +static int test_single_fsync(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + char buf[32]; + int fd, ret; + + sprintf(buf, "./XXXXXX"); + fd = mkstemp(buf); + if (fd < 0) { + perror("open"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_fsync(sqe, fd, 0); + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + + io_uring_cqe_seen(ring, cqe); + unlink(buf); + return 0; +err: + unlink(buf); + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + } + + ret = register_file(&ring); + if (ret) + return ret; + ret = test_single_fsync(&ring); + if (ret) { + printf("test_single_fsync failed\n"); + return ret; + } + + return 0; +} diff --git a/test/917257daa0fe-test.c b/test/917257daa0fe-test.c new file mode 100644 index 0000000..2a3cb93 --- /dev/null +++ b/test/917257daa0fe-test.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: MIT */ +// autogenerated by syzkaller (https://github.com/google/syzkaller) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __NR_io_uring_setup +#define __NR_io_uring_setup 425 +#endif + +int main(int argc, char *argv[]) +{ + if (argc > 1) + return 0; + + mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0); + + *(uint32_t*)0x20000000 = 0; + *(uint32_t*)0x20000004 = 0; + *(uint32_t*)0x20000008 = 6; + *(uint32_t*)0x2000000c = 0; + *(uint32_t*)0x20000010 = 0x3af; + *(uint32_t*)0x20000014 = 0; + *(uint32_t*)0x20000018 = 0; + *(uint32_t*)0x2000001c = 0; + *(uint32_t*)0x20000020 = 0; + *(uint32_t*)0x20000024 = 0; + *(uint32_t*)0x20000028 = 0; + *(uint32_t*)0x2000002c = 0; + *(uint32_t*)0x20000030 = 0; + *(uint32_t*)0x20000034 = 0; + *(uint32_t*)0x20000038 = 0; + *(uint32_t*)0x2000003c = 0; + *(uint32_t*)0x20000040 = 0; + *(uint32_t*)0x20000044 = 0; + *(uint64_t*)0x20000048 = 0; + *(uint32_t*)0x20000050 = 0; + *(uint32_t*)0x20000054 = 0; + *(uint32_t*)0x20000058 = 0; + *(uint32_t*)0x2000005c = 0; + *(uint32_t*)0x20000060 = 0; + *(uint32_t*)0x20000064 = 0; + *(uint32_t*)0x20000068 = 0; + *(uint32_t*)0x2000006c = 0; + *(uint64_t*)0x20000070 = 0; + syscall(__NR_io_uring_setup, 0x7a6, 0x20000000); + return 0; +} diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..cbbd400 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,102 @@ +prefix ?= /usr +datadir ?= $(prefix)/share + +INSTALL=install + +CFLAGS ?= -g -O2 +XCFLAGS = +override CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare\ + -D_GNU_SOURCE -D__SANE_USERSPACE_TYPES__ -L../src/ \ + -I../src/include/ -include ../config-host.h +CXXFLAGS += $(CFLAGS) -std=c++11 + +all_targets += poll poll-cancel ring-leak fsync io_uring_setup io_uring_register \ + io_uring_enter nop sq-full cq-full 35fa71a030ca-test \ + 917257daa0fe-test b19062a56726-test eeed8b54e0df-test link \ + send_recvmsg a4c0b3decb33-test 500f9fbadef8-test timeout \ + sq-space_left stdout cq-ready cq-peek-batch file-register \ + cq-size 8a9973408177-test a0908ae19763-test 232c93d07b74-test \ + socket-rw accept timeout-overflow defer read-write io-cancel \ + link-timeout cq-overflow link_drain fc2a85cb02ef-test \ + poll-link accept-link fixed-link poll-cancel-ton teardowns \ + poll-many b5837bd5311d-test accept-test d77a67ed5f27-test \ + connect 7ad0e4b2f83c-test submit-reuse fallocate open-close \ + file-update accept-reuse poll-v-poll fadvise madvise \ + short-read openat2 probe shared-wq personality eventfd \ + send_recv eventfd-ring across-fork sq-poll-kthread splice \ + lfs-openat lfs-openat-write iopoll d4ae271dfaae-test \ + eventfd-disable close-opath ce593a6c480a-test cq-overflow-peek + +include ../Makefile.quiet + +ifneq ($(MAKECMDGOALS),clean) +include ../config-host.mak +endif + +ifdef CONFIG_HAVE_STATX +all_targets += statx +endif + +ifdef CONFIG_HAVE_CXX +all_targets += sq-full-cpp +endif + +all: $(all_targets) + +%: %.c + $(QUIET_CC)$(CC) $(CFLAGS) -o $@ $< -luring $(XCFLAGS) + +%: %.cc + $(QUIET_CC)$(CXX) $(CXXFLAGS) -o $@ $< -luring $(XCFLAGS) + +test_srcs := poll.c poll-cancel.c ring-leak.c fsync.c io_uring_setup.c \ + io_uring_register.c io_uring_enter.c nop.c sq-full.c cq-full.c \ + 35fa71a030ca-test.c 917257daa0fe-test.c b19062a56726-test.c \ + eeed8b54e0df-test.c link.c send_recvmsg.c a4c0b3decb33-test.c \ + 500f9fbadef8-test.c timeout.c sq-space_left.c stdout.c cq-ready.c\ + cq-peek-batch.c file-register.c cq-size.c 8a9973408177-test.c \ + a0908ae19763-test.c 232c93d07b74-test.c socket-rw.c accept.c \ + timeout-overflow.c defer.c read-write.c io-cancel.c link-timeout.c \ + cq-overflow.c link_drain.c fc2a85cb02ef-test.c poll-link.c \ + accept-link.c fixed-link.c poll-cancel-ton.c teardowns.c poll-many.c \ + b5837bd5311d-test.c accept-test.c d77a67ed5f27-test.c connect.c \ + 7ad0e4b2f83c-test.c submit-reuse.c fallocate.c open-close.c \ + file-update.c accept-reuse.c poll-v-poll.c fadvise.c \ + madvise.c short-read.c openat2.c probe.c shared-wq.c \ + personality.c eventfd.c eventfd-ring.c across-fork.c sq-poll-kthread.c \ + splice.c lfs-openat.c lfs-openat-write.c iopoll.c d4ae271dfaae-test.c \ + eventfd-disable.c close-opath.c ce593a6c480a-test.c cq-overflow-peek.c + +ifdef CONFIG_HAVE_STATX +test_srcs += statx.c +endif + +ifdef CONFIG_HAVE_CXX +test_srcs += sq-full-cpp +endif + +test_objs := $(patsubst %.c,%.ol,$(test_srcs)) + +35fa71a030ca-test: XCFLAGS = -lpthread +232c93d07b74-test: XCFLAGS = -lpthread +send_recv: XCFLAGS = -lpthread +send_recvmsg: XCFLAGS = -lpthread +poll-link: XCFLAGS = -lpthread +accept-link: XCFLAGS = -lpthread +submit-reuse: XCFLAGS = -lpthread +poll-v-poll: XCFLAGS = -lpthread +across-fork: XCFLAGS = -lpthread +ce593a6c480a-test: XCFLAGS = -lpthread + +install: $(all_targets) runtests.sh runtests-loop.sh + $(INSTALL) -D -d -m 755 $(datadir)/liburing-test/ + $(INSTALL) -D -m 755 $(all_targets) $(datadir)/liburing-test/ + $(INSTALL) -D -m 755 runtests.sh $(datadir)/liburing-test/ + $(INSTALL) -D -m 755 runtests-loop.sh $(datadir)/liburing-test/ +clean: + @rm -f $(all_targets) $(test_objs) + +runtests: all + @./runtests.sh $(all_targets) +runtests-loop: all + @./runtests-loop.sh $(all_targets) diff --git a/test/a0908ae19763-test.c b/test/a0908ae19763-test.c new file mode 100644 index 0000000..1d5741d --- /dev/null +++ b/test/a0908ae19763-test.c @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +// autogenerated by syzkaller (https://github.com/google/syzkaller) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __NR_io_uring_register +#define __NR_io_uring_register 427 +#endif +#ifndef __NR_io_uring_setup +#define __NR_io_uring_setup 425 +#endif + +uint64_t r[1] = {0xffffffffffffffff}; + +int main(int argc, char *argv[]) +{ + if (argc > 1) + return 0; + mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0); + intptr_t res = 0; + *(uint32_t*)0x20000080 = 0; + *(uint32_t*)0x20000084 = 0; + *(uint32_t*)0x20000088 = 0; + *(uint32_t*)0x2000008c = 0; + *(uint32_t*)0x20000090 = 0; + *(uint32_t*)0x20000094 = 0; + *(uint32_t*)0x20000098 = 0; + *(uint32_t*)0x2000009c = 0; + *(uint32_t*)0x200000a0 = 0; + *(uint32_t*)0x200000a4 = 0; + *(uint32_t*)0x200000a8 = 0; + *(uint32_t*)0x200000ac = 0; + *(uint32_t*)0x200000b0 = 0; + *(uint32_t*)0x200000b4 = 0; + *(uint32_t*)0x200000b8 = 0; + *(uint32_t*)0x200000bc = 0; + *(uint32_t*)0x200000c0 = 0; + *(uint32_t*)0x200000c4 = 0; + *(uint64_t*)0x200000c8 = 0; + *(uint32_t*)0x200000d0 = 0; + *(uint32_t*)0x200000d4 = 0; + *(uint32_t*)0x200000d8 = 0; + *(uint32_t*)0x200000dc = 0; + *(uint32_t*)0x200000e0 = 0; + *(uint32_t*)0x200000e4 = 0; + *(uint32_t*)0x200000e8 = 0; + *(uint32_t*)0x200000ec = 0; + *(uint64_t*)0x200000f0 = 0; + res = syscall(__NR_io_uring_setup, 0xa4, 0x20000080); + if (res != -1) + r[0] = res; + *(uint32_t*)0x20000280 = -1; + syscall(__NR_io_uring_register, r[0], 2, 0x20000280, 1); + return 0; +} diff --git a/test/a4c0b3decb33-test.c b/test/a4c0b3decb33-test.c new file mode 100644 index 0000000..e64118e --- /dev/null +++ b/test/a4c0b3decb33-test.c @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: MIT */ +// autogenerated by syzkaller (https://github.com/google/syzkaller) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void sleep_ms(uint64_t ms) +{ + usleep(ms * 1000); +} + +static uint64_t current_time_ms(void) +{ + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + exit(1); + return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; +} + +static bool write_file(const char* file, const char* what, ...) +{ + char buf[1024]; + va_list args; + va_start(args, what); + vsnprintf(buf, sizeof(buf), what, args); + va_end(args); + buf[sizeof(buf) - 1] = 0; + int len = strlen(buf); + int fd = open(file, O_WRONLY | O_CLOEXEC); + if (fd == -1) + return false; + if (write(fd, buf, len) != len) { + int err = errno; + close(fd); + errno = err; + return false; + } + close(fd); + return true; +} + +static void kill_and_wait(int pid, int* status) +{ + kill(-pid, SIGKILL); + kill(pid, SIGKILL); + int i; + for (i = 0; i < 100; i++) { + if (waitpid(-1, status, WNOHANG | __WALL) == pid) + return; + usleep(1000); + } + DIR* dir = opendir("/sys/fs/fuse/connections"); + if (dir) { + for (;;) { + struct dirent* ent = readdir(dir); + if (!ent) + break; + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) + continue; + char abort[300]; + snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", + ent->d_name); + int fd = open(abort, O_WRONLY); + if (fd == -1) { + continue; + } + if (write(fd, abort, 1) < 0) { + } + close(fd); + } + closedir(dir); + } else { + } + while (waitpid(-1, status, __WALL) != pid) { + } +} + +static void setup_test() +{ + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + setpgrp(); + write_file("/proc/self/oom_score_adj", "1000"); +} + +static void execute_one(void); + +#define WAIT_FLAGS __WALL + +static void loop(void) +{ + int iter; + for (iter = 0;; iter++) { + int pid = fork(); + if (pid < 0) + exit(1); + if (pid == 0) { + setup_test(); + execute_one(); + exit(0); + } + int status = 0; + uint64_t start = current_time_ms(); + for (;;) { + if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) + break; + sleep_ms(1); + if (current_time_ms() - start < 5 * 1000) + continue; + kill_and_wait(pid, &status); + break; + } + } +} + +#ifndef __NR_io_uring_setup +#define __NR_io_uring_setup 425 +#endif + +void execute_one(void) +{ + *(uint32_t*)0x20000080 = 0; + *(uint32_t*)0x20000084 = 0; + *(uint32_t*)0x20000088 = 3; + *(uint32_t*)0x2000008c = 3; + *(uint32_t*)0x20000090 = 0x175; + *(uint32_t*)0x20000094 = 0; + *(uint32_t*)0x20000098 = 0; + *(uint32_t*)0x2000009c = 0; + *(uint32_t*)0x200000a0 = 0; + *(uint32_t*)0x200000a4 = 0; + *(uint32_t*)0x200000a8 = 0; + *(uint32_t*)0x200000ac = 0; + *(uint32_t*)0x200000b0 = 0; + *(uint32_t*)0x200000b4 = 0; + *(uint32_t*)0x200000b8 = 0; + *(uint32_t*)0x200000bc = 0; + *(uint32_t*)0x200000c0 = 0; + *(uint32_t*)0x200000c4 = 0; + *(uint64_t*)0x200000c8 = 0; + *(uint32_t*)0x200000d0 = 0; + *(uint32_t*)0x200000d4 = 0; + *(uint32_t*)0x200000d8 = 0; + *(uint32_t*)0x200000dc = 0; + *(uint32_t*)0x200000e0 = 0; + *(uint32_t*)0x200000e4 = 0; + *(uint32_t*)0x200000e8 = 0; + *(uint32_t*)0x200000ec = 0; + *(uint64_t*)0x200000f0 = 0; + syscall(__NR_io_uring_setup, 0x983, 0x20000080); +} + +static void sig_int(int sig) +{ + exit(0); +} + +int main(int argc, char *argv[]) +{ + if (argc > 1) + return 0; + signal(SIGINT, sig_int); + mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0); + loop(); + return 0; +} diff --git a/test/accept-link.c b/test/accept-link.c new file mode 100644 index 0000000..25825dc --- /dev/null +++ b/test/accept-link.c @@ -0,0 +1,239 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; + +static int recv_thread_ready = 0; +static int recv_thread_done = 0; + +static void signal_var(int *var) +{ + pthread_mutex_lock(&mutex); + *var = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); +} + +static void wait_for_var(int *var) +{ + pthread_mutex_lock(&mutex); + + while (!*var) + pthread_cond_wait(&cond, &mutex); + + pthread_mutex_unlock(&mutex); +} + +struct data { + unsigned expected[2]; + unsigned just_positive[2]; + unsigned long timeout; + int port; + int stop; +}; + +static void *send_thread(void *arg) +{ + struct data *data = arg; + + wait_for_var(&recv_thread_ready); + + if (data->stop) + return NULL; + + int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + assert(s0 != -1); + + struct sockaddr_in addr; + + addr.sin_family = AF_INET; + addr.sin_port = data->port; + addr.sin_addr.s_addr = 0x0100007fU; + + assert(connect(s0, (struct sockaddr*)&addr, sizeof(addr)) != -1); + + wait_for_var(&recv_thread_done); + + close(s0); + return NULL; +} + +void *recv_thread(void *arg) +{ + struct data *data = arg; + struct io_uring ring; + int i; + + assert(io_uring_queue_init(8, &ring, 0) == 0); + + int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + assert(s0 != -1); + + int32_t val = 1; + assert(setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) != -1); + assert(setsockopt(s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) != -1); + + struct sockaddr_in addr; + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = 0x0100007fU; + + i = 0; + do { + data->port = 1025 + (rand() % 64510); + addr.sin_port = data->port; + + if (bind(s0, (struct sockaddr*)&addr, sizeof(addr)) != -1) + break; + } while (++i < 100); + + if (i >= 100) { + printf("Can't find good port, skipped\n"); + data->stop = 1; + signal_var(&recv_thread_ready); + goto out; + } + + assert(listen(s0, 128) != -1); + + signal_var(&recv_thread_ready); + + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(&ring); + assert(sqe != NULL); + + io_uring_prep_accept(sqe, s0, NULL, NULL, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(&ring); + assert(sqe != NULL); + + struct __kernel_timespec ts; + ts.tv_sec = data->timeout / 1000000000; + ts.tv_nsec = data->timeout % 1000000000; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 2; + + assert(io_uring_submit(&ring) == 2); + + for (i = 0; i < 2; i++) { + struct io_uring_cqe *cqe; + int idx; + + if (io_uring_wait_cqe(&ring, &cqe)) { + fprintf(stderr, "wait cqe failed\n"); + goto err; + } + idx = cqe->user_data - 1; + if (cqe->res != data->expected[idx]) { + if (cqe->res > 0 && data->just_positive[idx]) + goto ok; + fprintf(stderr, "cqe %llu got %d, wanted %d\n", + cqe->user_data, cqe->res, + data->expected[idx]); + goto err; + } +ok: + if (cqe->user_data == 1 && cqe->res > 0) + close(cqe->res); + + io_uring_cqe_seen(&ring, cqe); + } + + signal_var(&recv_thread_done); + +out: + close(s0); + return NULL; +err: + close(s0); + return (void *) 1; +} + +static int test_accept_timeout(int do_connect, unsigned long timeout) +{ + struct io_uring ring; + struct io_uring_params p = {}; + pthread_t t1, t2; + struct data d; + void *tret; + int ret, fast_poll; + + ret = io_uring_queue_init_params(1, &ring, &p); + if (ret) { + fprintf(stderr, "queue_init: %d\n", ret); + return 1; + }; + + fast_poll = (p.features & IORING_FEAT_FAST_POLL) != 0; + io_uring_queue_exit(&ring); + + recv_thread_ready = 0; + recv_thread_done = 0; + + memset(&d, 0, sizeof(d)); + d.timeout = timeout; + if (!do_connect) { + if (fast_poll) { + d.expected[0] = -ECANCELED; + d.expected[1] = -ETIME; + } else { + d.expected[0] = -EINTR; + d.expected[1] = -EALREADY; + } + } else { + d.expected[0] = -1U; + d.just_positive[0] = 1; + d.expected[1] = -ECANCELED; + } + + pthread_create(&t1, NULL, recv_thread, &d); + + if (do_connect) + pthread_create(&t2, NULL, send_thread, &d); + + pthread_join(t1, &tret); + if (tret) + ret++; + + if (do_connect) { + pthread_join(t2, &tret); + if (tret) + ret++; + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + if (argc < 1) + return 0; + if (test_accept_timeout(0, 200000000)) { + fprintf(stderr, "accept timeout 0 failed\n"); + return 1; + } + + if (test_accept_timeout(1, 1000000000)) { + fprintf(stderr, "accept and connect timeout 0 failed\n"); + return 1; + } + + return 0; +} diff --git a/test/accept-reuse.c b/test/accept-reuse.c new file mode 100644 index 0000000..59a2f79 --- /dev/null +++ b/test/accept-reuse.c @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" +#include "../src/syscall.h" + +struct io_uring io_uring; + +int sys_io_uring_enter(const int fd, + const unsigned to_submit, + const unsigned min_complete, + const unsigned flags, sigset_t * const sig) +{ + return __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig); +} + +int submit_sqe(void) +{ + struct io_uring_sq *sq = &io_uring.sq; + const unsigned tail = *sq->ktail; + + sq->array[tail & *sq->kring_mask] = 0; + io_uring_smp_store_release(sq->ktail, tail + 1); + + return sys_io_uring_enter(io_uring.ring_fd, 1, 0, 0, NULL); +} + +int main(int argc, char **argv) +{ + struct addrinfo *addr_info_list = NULL; + struct addrinfo *ai, *addr_info = NULL; + struct io_uring_params params; + struct io_uring_sqe *sqe; + struct addrinfo hints; + struct sockaddr sa; + socklen_t sa_size = sizeof(sa); + int ret, listen_fd, connect_fd, val, i; + + if (argc > 1) + return 0; + + memset(¶ms, 0, sizeof(params)); + ret = io_uring_queue_init_params(1024, &io_uring, ¶ms); + if (ret) { + fprintf(stderr, "io_uring_init_failed: %d\n", ret); + return 1; + } + if (!(params.features & IORING_FEAT_SUBMIT_STABLE)) { + fprintf(stdout, "FEAT_SUBMIT_STABLE not there, skipping\n"); + return 0; + } + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE | AI_NUMERICSERV; + + ret = getaddrinfo(NULL, "12345", &hints, &addr_info_list); + if (ret < 0) { + perror("getaddrinfo"); + return 1; + } + + for (ai = addr_info_list; ai; ai = ai->ai_next) { + if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6) { + addr_info = ai; + break; + } + } + if (!addr_info) { + fprintf(stderr, "addrinfo not found\n"); + return 1; + } + + sqe = &io_uring.sq.sqes[0]; + listen_fd = -1; + + ret = socket(addr_info->ai_family, SOCK_STREAM, + addr_info->ai_protocol); + if (ret < 0) { + perror("socket"); + return 1; + } + listen_fd = ret; + + val = 1; + setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)); + setsockopt(listen_fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(int)); + + ret = bind(listen_fd, addr_info->ai_addr, addr_info->ai_addrlen); + if (ret < 0) { + perror("bind"); + return 1; + } + + ret = listen(listen_fd, SOMAXCONN); + if (ret < 0) { + perror("listen"); + return 1; + } + + memset(&sa, 0, sizeof(sa)); + + io_uring_prep_accept(sqe, listen_fd, &sa, &sa_size, 0); + sqe->user_data = 1; + ret = submit_sqe(); + if (ret != 1) { + fprintf(stderr, "submit failed: %d\n", ret); + return 1; + } + + connect_fd = -1; + ret = socket(addr_info->ai_family, SOCK_STREAM, addr_info->ai_protocol); + if (ret < 0) { + perror("socket"); + return 1; + } + connect_fd = ret; + + io_uring_prep_connect(sqe, connect_fd, addr_info->ai_addr, + addr_info->ai_addrlen); + sqe->user_data = 2; + ret = submit_sqe(); + if (ret != 1) { + fprintf(stderr, "submit failed: %d\n", ret); + return 1; + } + + for (i = 0; i < 2; i++) { + struct io_uring_cqe *cqe = NULL; + + ret = io_uring_wait_cqe(&io_uring, &cqe); + if (ret) { + fprintf(stderr, "io_uring_wait_cqe: %d\n", ret); + return 1; + } + + switch (cqe->user_data) { + case 1: + if (cqe->res < 0) { + fprintf(stderr, "accept failed: %d\n", cqe->res); + return 1; + } + break; + case 2: + if (cqe->res) { + fprintf(stderr, "connect failed: %d\n", cqe->res); + return 1; + } + break; + } + io_uring_cq_advance(&io_uring, 1); + } + + freeaddrinfo(addr_info_list); + io_uring_queue_exit(&io_uring); + return 0; +} diff --git a/test/accept-test.c b/test/accept-test.c new file mode 100644 index 0000000..52b4395 --- /dev/null +++ b/test/accept-test.c @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: Check to see if accept handles addr and addrlen + */ +#include +#include +#include +#include +#include +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + struct sockaddr_un addr; + socklen_t addrlen = sizeof(addr); + int ret, fd; + struct __kernel_timespec ts = { + .tv_sec = 0, + .tv_nsec = 1000000 + }; + + if (argc > 1) + return 0; + + if (io_uring_queue_init(4, &ring, 0) != 0) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + assert(fd != -1); + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + memcpy(addr.sun_path, "\0sock", 6); + + assert(bind(fd, (struct sockaddr *)&addr, addrlen) != -1); + assert(listen(fd, 128) != -1); + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + return 1; + } + io_uring_prep_accept(sqe, fd, (struct sockaddr*)&addr, &addrlen, 0); + sqe->user_data = 1; + + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "Got submit %d, expected 1\n", ret); + return 1; + } + + ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts); + if (ret != -ETIME) { + fprintf(stderr, "accept() failed to use addr & addrlen parameters!\n"); + return 1; + } + + io_uring_queue_exit(&ring); + return 0; +} diff --git a/test/accept.c b/test/accept.c new file mode 100644 index 0000000..646e904 --- /dev/null +++ b/test/accept.c @@ -0,0 +1,407 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Check that IORING_OP_ACCEPT works, and send some data across to verify we + * didn't get a junk fd. + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int no_accept; + +struct data { + char buf[128]; + struct iovec iov; +}; + +static void queue_send(struct io_uring *ring, int fd) +{ + struct io_uring_sqe *sqe; + struct data *d; + + d = malloc(sizeof(*d)); + d->iov.iov_base = d->buf; + d->iov.iov_len = sizeof(d->buf); + + sqe = io_uring_get_sqe(ring); + io_uring_prep_writev(sqe, fd, &d->iov, 1, 0); +} + +static void queue_recv(struct io_uring *ring, int fd) +{ + struct io_uring_sqe *sqe; + struct data *d; + + d = malloc(sizeof(*d)); + d->iov.iov_base = d->buf; + d->iov.iov_len = sizeof(d->buf); + + sqe = io_uring_get_sqe(ring); + io_uring_prep_readv(sqe, fd, &d->iov, 1, 0); +} + +static int accept_conn(struct io_uring *ring, int fd) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret; + + sqe = io_uring_get_sqe(ring); + io_uring_prep_accept(sqe, fd, NULL, NULL, 0); + + assert(io_uring_submit(ring) != -1); + + assert(!io_uring_wait_cqe(ring, &cqe)); + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + return ret; +} + +static int start_accept_listen(struct sockaddr_in *addr, int port_off) +{ + int fd; + + fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + + int32_t val = 1; + assert(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) != -1); + assert(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) != -1); + + struct sockaddr_in laddr; + + if (!addr) + addr = &laddr; + + addr->sin_family = AF_INET; + addr->sin_port = 0x1235 + port_off; + addr->sin_addr.s_addr = 0x0100007fU; + + assert(bind(fd, (struct sockaddr*)addr, sizeof(*addr)) != -1); + assert(listen(fd, 128) != -1); + + return fd; +} + +static int test(struct io_uring *ring, int accept_should_error) +{ + struct io_uring_cqe *cqe; + struct sockaddr_in addr; + uint32_t head; + uint32_t count = 0; + int done = 0; + int p_fd[2]; + + int32_t val, recv_s0 = start_accept_listen(&addr, 0); + + p_fd[1] = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + + val = 1; + assert(setsockopt(p_fd[1], IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) != -1); + + int32_t flags = fcntl(p_fd[1], F_GETFL, 0); + assert(flags != -1); + + flags |= O_NONBLOCK; + assert(fcntl(p_fd[1], F_SETFL, flags) != -1); + + assert(connect(p_fd[1], (struct sockaddr*)&addr, sizeof(addr)) == -1); + + flags = fcntl(p_fd[1], F_GETFL, 0); + assert(flags != -1); + + flags &= ~O_NONBLOCK; + assert(fcntl(p_fd[1], F_SETFL, flags) != -1); + + p_fd[0] = accept_conn(ring, recv_s0); + if (p_fd[0] == -EINVAL) { + if (accept_should_error) + goto out; + fprintf(stdout, "Accept not supported, skipping\n"); + no_accept = 1; + goto out; + } else if (p_fd[0] < 0) { + if (accept_should_error && + (p_fd[0] == -EBADF || p_fd[0] == -EINVAL)) + goto out; + fprintf(stderr, "Accept got %d\n", p_fd[0]); + goto err; + } + + queue_send(ring, p_fd[1]); + queue_recv(ring, p_fd[0]); + + assert(io_uring_submit_and_wait(ring, 2) != -1); + + while (count < 2) { + io_uring_for_each_cqe(ring, head, cqe) { + if (cqe->res < 0) { + fprintf(stderr, "Got cqe res %d\n", cqe->res); + done = 1; + break; + } + assert(cqe->res == 128); + count++; + } + + assert(count <= 2); + io_uring_cq_advance(ring, count); + if (done) + goto err; + } + +out: + close(p_fd[0]); + close(p_fd[1]); + return 0; +err: + close(p_fd[0]); + close(p_fd[1]); + return 1; +} + +static void sig_alrm(int sig) +{ + exit(0); +} + +static int test_accept_pending_on_exit(void) +{ + struct io_uring m_io_uring; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fd; + + assert(io_uring_queue_init(32, &m_io_uring, 0) >= 0); + + fd = start_accept_listen(NULL, 0); + + sqe = io_uring_get_sqe(&m_io_uring); + io_uring_prep_accept(sqe, fd, NULL, NULL, 0); + assert(io_uring_submit(&m_io_uring) != -1); + + signal(SIGALRM, sig_alrm); + alarm(1); + assert(!io_uring_wait_cqe(&m_io_uring, &cqe)); + io_uring_cqe_seen(&m_io_uring, cqe); + + io_uring_queue_exit(&m_io_uring); + return 0; +} + +/* + * Test issue many accepts and see if we handle cancellation on exit + */ +static int test_accept_many(unsigned nr, unsigned usecs) +{ + struct io_uring m_io_uring; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + unsigned long cur_lim; + struct rlimit rlim; + int *fds, i, ret = 0; + + if (getrlimit(RLIMIT_NPROC, &rlim) < 0) { + perror("getrlimit"); + return 1; + } + + cur_lim = rlim.rlim_cur; + rlim.rlim_cur = nr / 4; + + if (setrlimit(RLIMIT_NPROC, &rlim) < 0) { + perror("setrlimit"); + return 1; + } + + assert(io_uring_queue_init(2 * nr, &m_io_uring, 0) >= 0); + + fds = calloc(nr, sizeof(int)); + + for (i = 0; i < nr; i++) + fds[i] = start_accept_listen(NULL, i); + + for (i = 0; i < nr; i++) { + sqe = io_uring_get_sqe(&m_io_uring); + io_uring_prep_accept(sqe, fds[i], NULL, NULL, 0); + sqe->user_data = 1 + i; + assert(io_uring_submit(&m_io_uring) == 1); + } + + if (usecs) + usleep(usecs); + + for (i = 0; i < nr; i++) { + if (io_uring_peek_cqe(&m_io_uring, &cqe)) + break; + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Expected cqe to be cancelled\n"); + goto err; + } + io_uring_cqe_seen(&m_io_uring, cqe); + } +out: + rlim.rlim_cur = cur_lim; + if (setrlimit(RLIMIT_NPROC, &rlim) < 0) { + perror("setrlimit"); + return 1; + } + + free(fds); + io_uring_queue_exit(&m_io_uring); + return ret; +err: + ret = 1; + goto out; +} + +static int test_accept_cancel(unsigned usecs) +{ + struct io_uring m_io_uring; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fd, i; + + assert(io_uring_queue_init(32, &m_io_uring, 0) >= 0); + + fd = start_accept_listen(NULL, 0); + + sqe = io_uring_get_sqe(&m_io_uring); + io_uring_prep_accept(sqe, fd, NULL, NULL, 0); + sqe->user_data = 1; + assert(io_uring_submit(&m_io_uring) == 1); + + if (usecs) + usleep(usecs); + + sqe = io_uring_get_sqe(&m_io_uring); + io_uring_prep_cancel(sqe, (void *) 1, 0); + sqe->user_data = 2; + assert(io_uring_submit(&m_io_uring) == 1); + + for (i = 0; i < 2; i++) { + assert(!io_uring_wait_cqe(&m_io_uring, &cqe)); + /* + * Two cases here: + * + * 1) We cancel the accept4() before it got started, we should + * get '0' for the cancel request and '-ECANCELED' for the + * accept request. + * 2) We cancel the accept4() after it's already running, we + * should get '-EALREADY' for the cancel request and + * '-EINTR' for the accept request. + */ + if (cqe->user_data == 1) { + if (cqe->res != -EINTR && cqe->res != -ECANCELED) { + fprintf(stderr, "Cancelled accept got %d\n", cqe->res); + goto err; + } + } else if (cqe->user_data == 2) { + if (cqe->res != -EALREADY && cqe->res != 0) { + fprintf(stderr, "Cancel got %d\n", cqe->res); + goto err; + } + } + io_uring_cqe_seen(&m_io_uring, cqe); + } + + io_uring_queue_exit(&m_io_uring); + return 0; +err: + io_uring_queue_exit(&m_io_uring); + return 1; +} + +static int test_accept(void) +{ + struct io_uring m_io_uring; + int ret; + + assert(io_uring_queue_init(32, &m_io_uring, 0) >= 0); + ret = test(&m_io_uring, 0); + io_uring_queue_exit(&m_io_uring); + return ret; +} + +static int test_accept_sqpoll(void) +{ + struct io_uring m_io_uring; + int ret; + + ret = io_uring_queue_init(32, &m_io_uring, IORING_SETUP_SQPOLL); + if (ret && geteuid()) { + printf("%s: skipped, not root\n", __FUNCTION__); + return 0; + } else if (ret) + return ret; + + ret = test(&m_io_uring, 1); + io_uring_queue_exit(&m_io_uring); + return ret; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc > 1) + return 0; + + ret = test_accept(); + if (ret) { + fprintf(stderr, "test_accept failed\n"); + return ret; + } + if (no_accept) + return 0; + + ret = test_accept_sqpoll(); + if (ret) { + fprintf(stderr, "test_accept_sqpoll failed\n"); + return ret; + } + + ret = test_accept_cancel(0); + if (ret) { + fprintf(stderr, "test_accept_cancel nodelay failed\n"); + return ret; + } + + ret = test_accept_cancel(10000); + if (ret) { + fprintf(stderr, "test_accept_cancel delay failed\n"); + return ret; + } + + ret = test_accept_many(128, 0); + if (ret) { + fprintf(stderr, "test_accept_many failed\n"); + return ret; + } + + ret = test_accept_many(128, 100000); + if (ret) { + fprintf(stderr, "test_accept_many failed\n"); + return ret; + } + + ret = test_accept_pending_on_exit(); + if (ret) { + fprintf(stderr, "test_accept_pending_on_exit failed\n"); + return ret; + } + + return 0; +} diff --git a/test/across-fork.c b/test/across-fork.c new file mode 100644 index 0000000..14ee93a --- /dev/null +++ b/test/across-fork.c @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test sharing a ring across a fork + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + + +struct forktestmem +{ + struct io_uring ring; + pthread_barrier_t barrier; + pthread_barrierattr_t barrierattr; +}; + +static int open_tempfile(const char *dir, const char *fname) +{ + int fd; + char buf[32]; + + snprintf(buf, sizeof(buf), "%s/%s", + dir, fname); + fd = open(buf, O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR); + if (fd < 0) { + perror("open"); + exit(1); + } + + return fd; +} + +static int submit_write(struct io_uring *ring, int fd, const char *str, + int wait) +{ + struct io_uring_sqe *sqe; + struct iovec iovec; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "could not get sqe\n"); + return 1; + } + + iovec.iov_base = (char *) str; + iovec.iov_len = strlen(str); + io_uring_prep_writev(sqe, fd, &iovec, 1, 0); + ret = io_uring_submit_and_wait(ring, wait); + if (ret < 0) { + fprintf(stderr, "submit failed: %s\n", strerror(-ret)); + return 1; + } + + return 0; +} + +static int wait_cqe(struct io_uring *ring, const char *stage) +{ + struct io_uring_cqe *cqe; + int ret; + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "%s wait_cqe failed %d\n", stage, ret); + return 1; + } + if (cqe->res < 0) { + fprintf(stderr, "%s cqe failed %d\n", stage, cqe->res); + return 1; + } + + io_uring_cqe_seen(ring, cqe); + return 0; +} + +static int verify_file(const char *tmpdir, const char *fname, const char* expect) +{ + int fd; + char buf[512]; + int err = 0; + + memset(buf, 0, sizeof(buf)); + + fd = open_tempfile(tmpdir, fname); + if (fd < 0) + return 1; + + if (read(fd, buf, sizeof(buf) - 1) < 0) + return 1; + + if (strcmp(buf, expect) != 0) { + fprintf(stderr, "content mismatch for %s\n" + "got:\n%s\n" + "expected:\n%s\n", + fname, buf, expect); + err = 1; + } + + close(fd); + return err; +} + +static void cleanup(const char *tmpdir) +{ + char buf[32]; + + /* don't check errors, called during partial runs */ + + snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "shared"); + unlink(buf); + + snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "parent1"); + unlink(buf); + + snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "parent2"); + unlink(buf); + + snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "child"); + unlink(buf); + + rmdir(tmpdir); +} + +int main(int argc, char *argv[]) +{ + struct forktestmem *shmem; + char tmpdir[] = "forktmpXXXXXX"; + int shared_fd; + int ret; + pid_t p; + + if (argc > 1) + return 0; + + shmem = mmap(0, sizeof(struct forktestmem), PROT_READ|PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, 0, 0); + if (!shmem) { + fprintf(stderr, "mmap failed\n"); + exit(1); + } + + pthread_barrierattr_init(&shmem->barrierattr); + pthread_barrierattr_setpshared(&shmem->barrierattr, 1); + pthread_barrier_init(&shmem->barrier, &shmem->barrierattr, 2); + + ret = io_uring_queue_init(10, &shmem->ring, 0); + if (ret < 0) { + fprintf(stderr, "queue init failed\n"); + exit(1); + } + + if (mkdtemp(tmpdir) == NULL) { + fprintf(stderr, "temp directory creation failed\n"); + exit(1); + } + + shared_fd = open_tempfile(tmpdir, "shared"); + + /* + * First do a write before the fork, to test whether child can + * reap that + */ + if (submit_write(&shmem->ring, shared_fd, "before fork: write shared fd\n", 0)) + goto errcleanup; + + p = fork(); + switch (p) { + case -1: + fprintf(stderr, "fork failed\n"); + goto errcleanup; + + default: { + /* parent */ + int parent_fd1; + int parent_fd2; + int wstatus; + + /* wait till fork is started up */ + pthread_barrier_wait(&shmem->barrier); + + parent_fd1 = open_tempfile(tmpdir, "parent1"); + parent_fd2 = open_tempfile(tmpdir, "parent2"); + + /* do a parent write to the shared fd */ + if (submit_write(&shmem->ring, shared_fd, "parent: write shared fd\n", 0)) + goto errcleanup; + + /* do a parent write to an fd where same numbered fd exists in child */ + if (submit_write(&shmem->ring, parent_fd1, "parent: write parent fd 1\n", 0)) + goto errcleanup; + + /* do a parent write to an fd where no same numbered fd exists in child */ + if (submit_write(&shmem->ring, parent_fd2, "parent: write parent fd 2\n", 0)) + goto errcleanup; + + /* wait to switch read/writ roles with child */ + pthread_barrier_wait(&shmem->barrier); + + /* now wait for child to exit, to ensure we still can read completion */ + waitpid(p, &wstatus, 0); + if (WEXITSTATUS(wstatus) != 0) { + fprintf(stderr, "child failed\n"); + goto errcleanup; + } + + if (wait_cqe(&shmem->ring, "p cqe 1")) + goto errcleanup; + + if (wait_cqe(&shmem->ring, "p cqe 2")) + goto errcleanup; + + break; + } + case 0: { + /* child */ + int child_fd; + + /* wait till fork is started up */ + pthread_barrier_wait(&shmem->barrier); + + child_fd = open_tempfile(tmpdir, "child"); + + if (wait_cqe(&shmem->ring, "c cqe shared")) + exit(1); + + if (wait_cqe(&shmem->ring, "c cqe parent 1")) + exit(1); + + if (wait_cqe(&shmem->ring, "c cqe parent 2")) + exit(1); + + if (wait_cqe(&shmem->ring, "c cqe parent 3")) + exit(1); + + /* wait to switch read/writ roles with parent */ + pthread_barrier_wait(&shmem->barrier); + + if (submit_write(&shmem->ring, child_fd, "child: write child fd\n", 0)) + exit(1); + + /* ensure both writes have finished before child exits */ + if (submit_write(&shmem->ring, shared_fd, "child: write shared fd\n", 2)) + exit(1); + + exit(0); + } + } + + if (verify_file(tmpdir, "shared", + "before fork: write shared fd\n" + "parent: write shared fd\n" + "child: write shared fd\n") || + verify_file(tmpdir, "parent1", "parent: write parent fd 1\n") || + verify_file(tmpdir, "parent2", "parent: write parent fd 2\n") || + verify_file(tmpdir, "child", "child: write child fd\n")) + goto errcleanup; + + cleanup(tmpdir); + exit(0); + +errcleanup: + cleanup(tmpdir); + exit(1); +} diff --git a/test/b19062a56726-test.c b/test/b19062a56726-test.c new file mode 100644 index 0000000..697a416 --- /dev/null +++ b/test/b19062a56726-test.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: MIT */ +// autogenerated by syzkaller (https://github.com/google/syzkaller) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __NR_io_uring_setup +#define __NR_io_uring_setup 425 +#endif + +int main(int argc, char *argv[]) +{ + if (argc > 1) + return 0; + + mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0); + + *(uint32_t*)0x20000200 = 0; + *(uint32_t*)0x20000204 = 0; + *(uint32_t*)0x20000208 = 5; + *(uint32_t*)0x2000020c = 0x400; + *(uint32_t*)0x20000210 = 0; + *(uint32_t*)0x20000214 = 0; + *(uint32_t*)0x20000218 = 0; + *(uint32_t*)0x2000021c = 0; + *(uint32_t*)0x20000220 = 0; + *(uint32_t*)0x20000224 = 0; + *(uint32_t*)0x20000228 = 0; + *(uint32_t*)0x2000022c = 0; + *(uint32_t*)0x20000230 = 0; + *(uint32_t*)0x20000234 = 0; + *(uint32_t*)0x20000238 = 0; + *(uint32_t*)0x2000023c = 0; + *(uint32_t*)0x20000240 = 0; + *(uint32_t*)0x20000244 = 0; + *(uint64_t*)0x20000248 = 0; + *(uint32_t*)0x20000250 = 0; + *(uint32_t*)0x20000254 = 0; + *(uint32_t*)0x20000258 = 0; + *(uint32_t*)0x2000025c = 0; + *(uint32_t*)0x20000260 = 0; + *(uint32_t*)0x20000264 = 0; + *(uint32_t*)0x20000268 = 0; + *(uint32_t*)0x2000026c = 0; + *(uint64_t*)0x20000270 = 0; + syscall(__NR_io_uring_setup, 0xc9f, 0x20000200); + return 0; +} diff --git a/test/b5837bd5311d-test.c b/test/b5837bd5311d-test.c new file mode 100644 index 0000000..57a2b58 --- /dev/null +++ b/test/b5837bd5311d-test.c @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: Check to see if wait_nr is being honored. + */ +#include +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + int ret; + struct __kernel_timespec ts = { + .tv_sec = 0, + .tv_nsec = 10000000 + }; + + if (argc > 1) + return 0; + + if (io_uring_queue_init(4, &ring, 0) != 0) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + /* + * First, submit the timeout sqe so we can actually finish the test + * if everything is in working order. + */ + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + return 1; + } + io_uring_prep_timeout(sqe, &ts, (unsigned)-1, 0); + + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "Got submit %d, expected 1\n", ret); + return 1; + } + + /* + * Next, submit a nop and wait for two events. If everything is working + * as it should, we should be waiting for more than a millisecond and we + * should see two cqes. Otherwise, execution continues immediately + * and we see only one cqe. + */ + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + return 1; + } + io_uring_prep_nop(sqe); + + ret = io_uring_submit_and_wait(&ring, 2); + if (ret != 1) { + fprintf(stderr, "Got submit %d, expected 1\n", ret); + return 1; + } + + if (io_uring_peek_cqe(&ring, &cqe) != 0) { + fprintf(stderr, "Unable to peek cqe!\n"); + return 1; + } + + io_uring_cqe_seen(&ring, cqe); + + if (io_uring_peek_cqe(&ring, &cqe) != 0) { + fprintf(stderr, "Unable to peek cqe!\n"); + return 1; + } + + io_uring_queue_exit(&ring); + return 0; +} diff --git a/test/ce593a6c480a-test.c b/test/ce593a6c480a-test.c new file mode 100644 index 0000000..9aee2f2 --- /dev/null +++ b/test/ce593a6c480a-test.c @@ -0,0 +1,133 @@ +/* + * Test 5.7 regression with task_work not being run while a task is + * waiting on another event in the kernel. + */ +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" + +static int use_sqpoll = 0; + +void notify_fd(int fd) +{ + char buf[8] = {0, 0, 0, 0, 0, 0, 1}; + int ret; + + ret = write(fd, &buf, 8); + if (ret < 0) + perror("write"); +} + +void *delay_set_fd_from_thread(void *data) +{ + int fd = (intptr_t) data; + + sleep(1); + notify_fd(fd); + return NULL; +} + +int main(int argc, char *argv[]) +{ + struct io_uring_params p = {}; + struct io_uring ring; + int loop_fd, other_fd; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe = NULL; + int ret, use_fd; + char buf[8] = {0, 0, 0, 0, 0, 0, 1}; + pthread_t tid; + + if (argc > 1) + return 0; + + /* Create an eventfd to be registered with the loop to be + * notified of events being ready + */ + loop_fd = eventfd(0, EFD_CLOEXEC); + if (loop_fd == -1) { + fprintf(stderr, "eventfd errno=%d\n", errno); + return 1; + } + + /* Create an eventfd that can create events */ + use_fd = other_fd = eventfd(0, EFD_CLOEXEC); + if (other_fd == -1) { + fprintf(stderr, "eventfd errno=%d\n", errno); + return 1; + } + + if (use_sqpoll) + p.flags = IORING_SETUP_SQPOLL; + + /* Setup the ring with a registered event fd to be notified on events */ + ret = io_uring_queue_init_params(8, &ring, &p); + if (ret) { + fprintf(stderr, "queue_init=%d\n", ret); + return 1; + } + ret = io_uring_register_eventfd(&ring, loop_fd); + if (ret < 0) { + fprintf(stderr, "register_eventfd=%d\n", ret); + return 1; + } + + if (use_sqpoll) { + ret = io_uring_register_files(&ring, &other_fd, 1); + if (ret < 0) { + fprintf(stderr, "register_files=%d\n", ret); + return 1; + } + use_fd = 0; + } + + /* Submit a poll operation to wait on an event in other_fd */ + sqe = io_uring_get_sqe(&ring); + io_uring_prep_poll_add(sqe, use_fd, POLLIN); + sqe->user_data = 1; + if (use_sqpoll) + sqe->flags |= IOSQE_FIXED_FILE; + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit=%d\n", ret); + return 1; + } + + /* + * CASE 3: Hangs forever in Linux 5.7.5; Works in Linux 5.6.0 When this + * code is uncommented, we don't se a notification on other_fd until + * _after_ we have started the read on loop_fd. In that case, the read() on + * loop_fd seems to hang forever. + */ + pthread_create(&tid, NULL, delay_set_fd_from_thread, + (void*) (intptr_t) other_fd); + + /* Wait on the event fd for an event to be ready */ + ret = read(loop_fd, buf, 8); + if (ret < 0) { + perror("read"); + return 1; + } else if (ret != 8) { + fprintf(stderr, "Odd-sized eventfd read: %d\n", ret); + return 1; + } + + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + return ret; + } + if (cqe->res < 0) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + return 1; + } + + io_uring_cqe_seen(&ring, cqe); + return 0; +} diff --git a/test/close-opath.c b/test/close-opath.c new file mode 100644 index 0000000..884bda9 --- /dev/null +++ b/test/close-opath.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: MIT + +#define _GNU_SOURCE 1 +#define _FILE_OFFSET_BITS 64 + +// Test program for io_uring IORING_OP_CLOSE with O_PATH file. +// Author: Clayton Harris , 2020-06-07 + +// linux 5.6.14-300.fc32.x86_64 +// gcc 10.1.1-1.fc32 +// liburing.x86_64 0.5-1.fc32 + +// gcc -O2 -Wall -Wextra -std=c11 -o close_opath close_opath.c -luring +// ./close_opath testfilepath + +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + const char *const flnames; + const int oflags; +} oflgs_t; + +static int test_io_uring_close(struct io_uring *ring, int fd) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "io_uring_get_sqe() failed\n"); + return -ENOENT; + } + + io_uring_prep_close(sqe, fd); + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "io_uring_submit() failed, errno %d: %s\n", + -ret, strerror(-ret)); + return ret; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "io_uring_wait_cqe() failed, errno %d: %s\n", + -ret, strerror(-ret)); + return ret; + } + + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + + if (ret < 0 && ret != -EOPNOTSUPP) { + fprintf(stderr, "io_uring close() failed, errno %d: %s\n", + -ret, strerror(-ret)); + return ret; + } + + return 0; +} + +static int open_file(const char *path, const oflgs_t *oflgs) +{ + int fd; + + fd = openat(AT_FDCWD, path, oflgs->oflags, 0); + if (fd < 0) { + int err = errno; + fprintf(stderr, "openat(%s, %s) failed, errno %d: %s\n", + path, oflgs->flnames, err, strerror(err)); + return -err; + } + + return fd; +} + +int main(int argc, char *argv[]) +{ + const char *fname = "."; + struct io_uring ring; + int ret, i; + static const oflgs_t oflgs[] = { + { "O_RDONLY", O_RDONLY }, + { "O_PATH", O_PATH } + }; + + ret = io_uring_queue_init(2, &ring, 0); + if (ret < 0) { + fprintf(stderr, "io_uring_queue_init() failed, errno %d: %s\n", + -ret, strerror(-ret)); + return 0x02; + } + +#define OFLGS_SIZE (sizeof(oflgs) / sizeof(oflgs[0])) + + ret = 0; + for (i = 0; i < OFLGS_SIZE; i++) { + int fd; + + fd = open_file(fname, &oflgs[i]); + if (fd < 0) { + ret |= 0x02; + break; + } + + /* Should always succeed */ + if (test_io_uring_close(&ring, fd) < 0) + ret |= 0x04 << i; + } +#undef OFLGS_SIZE + + io_uring_queue_exit(&ring); + return ret; +} diff --git a/test/config b/test/config new file mode 100644 index 0000000..80a5f46 --- /dev/null +++ b/test/config @@ -0,0 +1,4 @@ +# Define raw test devices (or files) for test cases, if any +# Copy this to config.local, and uncomment + define test files +# +# TEST_FILES="/dev/nvme0n1p2 /data/file" diff --git a/test/connect.c b/test/connect.c new file mode 100644 index 0000000..668997f --- /dev/null +++ b/test/connect.c @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Check that IORING_OP_CONNECT works, with and without other side + * being open. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int create_socket(void) +{ + int fd; + + fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (fd == -1) { + perror("socket()"); + return -1; + } + + return fd; +} + +static int submit_and_wait(struct io_uring *ring, int *res) +{ + struct io_uring_cqe *cqe; + int ret; + + ret = io_uring_submit_and_wait(ring, 1); + if (ret != 1) { + fprintf(stderr, "io_using_submit: got %d\n", ret); + return 1; + } + + ret = io_uring_peek_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "io_uring_peek_cqe(): no cqe returned"); + return 1; + } + + *res = cqe->res; + io_uring_cqe_seen(ring, cqe); + return 0; +} + +static int wait_for(struct io_uring *ring, int fd, int mask) +{ + struct io_uring_sqe *sqe; + int ret, res; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "unable to get sqe\n"); + return -1; + } + + io_uring_prep_poll_add(sqe, fd, mask); + sqe->user_data = 2; + + ret = submit_and_wait(ring, &res); + if (ret) + return -1; + + if (res < 0) { + fprintf(stderr, "poll(): failed with %d\n", res); + return -1; + } + + return res; +} + +static int listen_on_socket(int fd) +{ + struct sockaddr_in addr; + int ret; + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = 0x1234; + addr.sin_addr.s_addr = 0x0100007fU; + + ret = bind(fd, (struct sockaddr*)&addr, sizeof(addr)); + if (ret == -1) { + perror("bind()"); + return -1; + } + + ret = listen(fd, 128); + if (ret == -1) { + perror("listen()"); + return -1; + } + + return 0; +} + +static int connect_socket(struct io_uring *ring, int fd, int *code) +{ + struct io_uring_sqe *sqe; + struct sockaddr_in addr; + int ret, res, val = 1; + socklen_t code_len = sizeof(*code); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); + if (ret == -1) { + perror("setsockopt()"); + return -1; + } + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + if (ret == -1) { + perror("setsockopt()"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = 0x1234; + addr.sin_addr.s_addr = 0x0100007fU; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "unable to get sqe\n"); + return -1; + } + + io_uring_prep_connect(sqe, fd, (struct sockaddr*)&addr, sizeof(addr)); + sqe->user_data = 1; + + ret = submit_and_wait(ring, &res); + if (ret) + return -1; + + if (res == -EINPROGRESS) { + ret = wait_for(ring, fd, POLLOUT | POLLHUP | POLLERR); + if (ret == -1) + return -1; + + int ev = (ret & POLLOUT) || (ret & POLLHUP) || (ret & POLLERR); + if (!ev) { + fprintf(stderr, "poll(): returned invalid value %#x\n", ret); + return -1; + } + + ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, code, &code_len); + if (ret == -1) { + perror("getsockopt()"); + return -1; + } + } else + *code = res; + return 0; +} + +static int test_connect_with_no_peer(struct io_uring *ring) +{ + int connect_fd; + int ret, code; + + connect_fd = create_socket(); + if (connect_fd == -1) + return -1; + + ret = connect_socket(ring, connect_fd, &code); + if (ret == -1) + goto err; + + if (code != -ECONNREFUSED) { + fprintf(stderr, "connect failed with %d\n", code); + goto err; + } + + close(connect_fd); + return 0; + +err: + close(connect_fd); + return -1; +} + +static int test_connect(struct io_uring *ring) +{ + int accept_fd; + int connect_fd; + int ret, code; + + accept_fd = create_socket(); + if (accept_fd == -1) + return -1; + + ret = listen_on_socket(accept_fd); + if (ret == -1) + goto err1; + + connect_fd = create_socket(); + if (connect_fd == -1) + goto err1; + + ret = connect_socket(ring, connect_fd, &code); + if (ret == -1) + goto err2; + + if (code != 0) { + fprintf(stderr, "connect failed with %d\n", code); + goto err2; + } + + close(connect_fd); + close(accept_fd); + + return 0; + +err2: + close(connect_fd); + +err1: + close(accept_fd); + return -1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "io_uring_queue_setup() = %d\n", ret); + return 1; + } + + ret = test_connect_with_no_peer(&ring); + if (ret == -1) { + fprintf(stderr, "test_connect_with_no_peer(): failed\n"); + return 1; + } + + ret = test_connect(&ring); + if (ret == -1) { + fprintf(stderr, "test_connect(): failed\n"); + return 1; + } + + io_uring_queue_exit(&ring); + return 0; +} diff --git a/test/cq-full.c b/test/cq-full.c new file mode 100644 index 0000000..5c4041b --- /dev/null +++ b/test/cq-full.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test CQ ring overflow + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int queue_n_nops(struct io_uring *ring, int n) +{ + struct io_uring_sqe *sqe; + int i, ret; + + for (i = 0; i < n; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + } + + ret = io_uring_submit(ring); + if (ret < n) { + printf("Submitted only %d\n", ret); + goto err; + } else if (ret < 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring_cqe *cqe; + struct io_uring_params p; + struct io_uring ring; + int i, ret; + + if (argc > 1) + return 0; + + memset(&p, 0, sizeof(p)); + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret) { + printf("ring setup failed\n"); + return 1; + + } + + if (queue_n_nops(&ring, 4)) + goto err; + if (queue_n_nops(&ring, 4)) + goto err; + if (queue_n_nops(&ring, 4)) + goto err; + + i = 0; + do { + ret = io_uring_peek_cqe(&ring, &cqe); + if (ret < 0) { + if (ret == -EAGAIN) + break; + printf("wait completion %d\n", ret); + goto err; + } + io_uring_cqe_seen(&ring, cqe); + if (!cqe) + break; + i++; + } while (1); + + if (i < 8 || + ((*ring.cq.koverflow != 4) && !(p.features & IORING_FEAT_NODROP))) { + printf("CQ overflow fail: %d completions, %u overflow\n", i, + *ring.cq.koverflow); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} diff --git a/test/cq-overflow-peek.c b/test/cq-overflow-peek.c new file mode 100644 index 0000000..72b6768 --- /dev/null +++ b/test/cq-overflow-peek.c @@ -0,0 +1,82 @@ +/* + * Check if the kernel sets IORING_SQ_CQ_OVERFLOW so that peeking events + * still enter the kernel to flush events, if the CQ side is overflown. + */ +#include +#include +#include +#include "liburing.h" + +static int test_cq_overflow(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + unsigned flags; + int issued = 0; + int ret = 0; + + do { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + ret = io_uring_submit(ring); + if (ret <= 0) { + if (ret != -EBUSY) + fprintf(stderr, "sqe submit failed: %d\n", ret); + break; + } + issued++; + } while (ret > 0); + + assert(ret == -EBUSY); + + flags = IO_URING_READ_ONCE(*ring->sq.kflags); + if (!(flags & IORING_SQ_CQ_OVERFLOW)) { + fprintf(stdout, "OVERFLOW not set on -EBUSY, skipping\n"); + goto done; + } + + while (issued) { + ret = io_uring_peek_cqe(ring, &cqe); + if (ret) { + if (ret != -EAGAIN) { + fprintf(stderr, "peek completion failed: %s\n", + strerror(ret)); + break; + } + continue; + } + io_uring_cqe_seen(ring, cqe); + issued--; + } + +done: + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + int ret; + struct io_uring ring; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(16, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + ret = test_cq_overflow(&ring); + if (ret) { + fprintf(stderr, "test_cq_overflow failed\n"); + return 1; + } + + return 0; +} diff --git a/test/cq-overflow.c b/test/cq-overflow.c new file mode 100644 index 0000000..37b8d24 --- /dev/null +++ b/test/cq-overflow.c @@ -0,0 +1,534 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various CQ ring overflow tests + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define FILE_SIZE (256 * 1024) +#define BS 4096 +#define BUFFERS (FILE_SIZE / BS) + +static struct iovec *vecs; + +static int create_buffers(void) +{ + int i; + + vecs = malloc(BUFFERS * sizeof(struct iovec)); + for (i = 0; i < BUFFERS; i++) { + if (posix_memalign(&vecs[i].iov_base, BS, BS)) + return 1; + vecs[i].iov_len = BS; + } + + return 0; +} + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + close(fd); + return ret != FILE_SIZE; +} + +#define ENTRIES 8 + +static int test_io(const char *file, unsigned long usecs, unsigned *drops, int fault) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring_params p; + unsigned reaped, total; + struct io_uring ring; + int nodrop, i, fd, ret; + + fd = open(file, O_RDONLY | O_DIRECT); + if (fd < 0) { + perror("file open"); + goto err; + } + + memset(&p, 0, sizeof(p)); + ret = io_uring_queue_init_params(ENTRIES, &ring, &p); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + goto err; + } + nodrop = 0; + if (p.features & IORING_FEAT_NODROP) + nodrop = 1; + + total = 0; + for (i = 0; i < BUFFERS / 2; i++) { + off_t offset; + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + offset = BS * (rand() % BUFFERS); + if (fault && i == ENTRIES + 4) + vecs[i].iov_base = NULL; + io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); + + ret = io_uring_submit(&ring); + if (nodrop && ret == -EBUSY) { + *drops = 1; + total = i; + break; + } else if (ret != 1) { + fprintf(stderr, "submit got %d, wanted %d\n", ret, 1); + total = i; + break; + } + total++; + } + + if (*drops) + goto reap_it; + + usleep(usecs); + + for (i = total; i < BUFFERS; i++) { + off_t offset; + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + offset = BS * (rand() % BUFFERS); + io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); + + ret = io_uring_submit(&ring); + if (nodrop && ret == -EBUSY) { + *drops = 1; + break; + } else if (ret != 1) { + fprintf(stderr, "submit got %d, wanted %d\n", ret, 1); + break; + } + total++; + } + +reap_it: + reaped = 0; + do { + if (nodrop) { + /* nodrop should never lose events */ + if (reaped == total) + break; + } else { + if (reaped + *ring.cq.koverflow == total) + break; + } + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + goto err; + } + if (cqe->res != BS) { + if (!(fault && cqe->res == -EFAULT)) { + fprintf(stderr, "cqe res %d, wanted %d\n", + cqe->res, BS); + goto err; + } + } + io_uring_cqe_seen(&ring, cqe); + reaped++; + } while (1); + + if (!io_uring_peek_cqe(&ring, &cqe)) { + fprintf(stderr, "found unexpected completion\n"); + goto err; + } + + if (!nodrop) { + *drops = *ring.cq.koverflow; + } else if (*ring.cq.koverflow) { + fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow); + goto err; + } + + io_uring_queue_exit(&ring); + close(fd); + return 0; +err: + if (fd != -1) + close(fd); + io_uring_queue_exit(&ring); + return 1; +} + +static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait) +{ + struct io_uring_cqe *cqe; + int i, ret = 0, seq = 0; + + for (i = 0; i < nr_events; i++) { + if (do_wait) + ret = io_uring_wait_cqe(ring, &cqe); + else + ret = io_uring_peek_cqe(ring, &cqe); + if (ret) { + if (ret != -EAGAIN) + fprintf(stderr, "cqe peek failed: %d\n", ret); + break; + } + if (cqe->user_data != seq) { + fprintf(stderr, "cqe sequence out-of-order\n"); + fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data, + seq); + return -EINVAL; + } + seq++; + io_uring_cqe_seen(ring, cqe); + } + + return i ? i : ret; +} + +/* + * Setup ring with CQ_NODROP and check we get -EBUSY on trying to submit new IO + * on an overflown ring, and that we get all the events (even overflows) when + * we finally reap them. + */ +static int test_overflow_nodrop(void) +{ + struct __kernel_timespec ts; + struct io_uring_sqe *sqe; + struct io_uring_params p; + struct io_uring ring; + unsigned pending; + int ret, i, j; + + memset(&p, 0, sizeof(p)); + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret) { + fprintf(stderr, "io_uring_queue_init failed %d\n", ret); + return 1; + } + if (!(p.features & IORING_FEAT_NODROP)) { + fprintf(stdout, "FEAT_NODROP not supported, skipped\n"); + return 0; + } + + ts.tv_sec = 0; + ts.tv_nsec = 10000000; + + /* submit 4x4 SQEs, should overflow the ring by 8 */ + pending = 0; + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_timeout(sqe, &ts, -1U, 0); + sqe->user_data = (i * 4) + j; + } + + ret = io_uring_submit(&ring); + if (ret <= 0) { + if (ret == -EBUSY) + break; + fprintf(stderr, "sqe submit failed: %d, %d\n", ret, pending); + goto err; + } + pending += ret; + } + + /* wait for timers to fire */ + usleep(2 * 10000); + + /* + * We should have 16 pending CQEs now, 8 of them in the overflow list. Any + * attempt to queue more IO should return -EBUSY + */ + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + ret = io_uring_submit(&ring); + if (ret != -EBUSY) { + fprintf(stderr, "expected sqe submit busy: %d\n", ret); + goto err; + } + + /* reap the events we should have available */ + ret = reap_events(&ring, pending, 1); + if (ret < 0) { + fprintf(stderr, "ret=%d\n", ret); + goto err; + } + + if (*ring.cq.koverflow) { + fprintf(stderr, "cq ring overflow %d, expected 0\n", + *ring.cq.koverflow); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} + +/* + * Submit some NOPs and watch if the overflow is correct + */ +static int test_overflow(void) +{ + struct io_uring ring; + struct io_uring_params p; + struct io_uring_sqe *sqe; + unsigned pending; + int ret, i, j; + + memset(&p, 0, sizeof(p)); + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret) { + fprintf(stderr, "io_uring_queue_init failed %d\n", ret); + return 1; + } + + /* submit 4x4 SQEs, should overflow the ring by 8 */ + pending = 0; + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->user_data = (i * 4) + j; + } + + ret = io_uring_submit(&ring); + if (ret == 4) { + pending += 4; + continue; + } + if (p.features & IORING_FEAT_NODROP) { + if (ret == -EBUSY) + break; + } + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + /* we should now have 8 completions ready */ + ret = reap_events(&ring, pending, 0); + if (ret < 0) + goto err; + + if (!(p.features & IORING_FEAT_NODROP)) { + if (*ring.cq.koverflow != 8) { + fprintf(stderr, "cq ring overflow %d, expected 8\n", + *ring.cq.koverflow); + goto err; + } + } + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} + +/* + * Test attempted submit with overflown cq ring that can't get flushed + */ +static int test_overflow_nodrop_submit_ebusy(void) +{ + struct __kernel_timespec ts; + struct io_uring_sqe *sqe; + struct io_uring_params p; + struct io_uring ring; + unsigned pending; + int ret, i, j; + + memset(&p, 0, sizeof(p)); + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret) { + fprintf(stderr, "io_uring_queue_init failed %d\n", ret); + return 1; + } + if (!(p.features & IORING_FEAT_NODROP)) { + fprintf(stdout, "FEAT_NODROP not supported, skipped\n"); + return 0; + } + + ts.tv_sec = 1; + ts.tv_nsec = 0; + + /* submit 4x4 SQEs, should overflow the ring by 8 */ + pending = 0; + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_timeout(sqe, &ts, -1U, 0); + sqe->user_data = (i * 4) + j; + } + + ret = io_uring_submit(&ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d, %d\n", ret, pending); + goto err; + } + pending += ret; + } + + /* wait for timers to fire */ + usleep(1100000); + + /* + * We should have 16 pending CQEs now, 8 of them in the overflow list. Any + * attempt to queue more IO should return -EBUSY + */ + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + ret = io_uring_submit(&ring); + if (ret != -EBUSY) { + fprintf(stderr, "expected sqe submit busy: %d\n", ret); + goto err; + } + + /* + * Now peek existing events so the CQ ring is empty, apart from the + * backlog + */ + ret = reap_events(&ring, pending, 0); + if (ret < 0) { + fprintf(stderr, "ret=%d\n", ret); + goto err; + } else if (ret < 8) { + fprintf(stderr, "only found %d events, expected 8\n", ret); + goto err; + } + + /* + * We should now be able to submit our previous nop that's still + * in the sq ring, as the kernel can flush the existing backlog + * to the now empty CQ ring. + */ + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit got %d, expected 1\n", ret); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} + + +int main(int argc, char *argv[]) +{ + unsigned iters, drops; + unsigned long usecs; + int ret; + + if (argc > 1) + return 0; + + ret = test_overflow(); + if (ret) { + printf("test_overflow failed\n"); + return ret; + } + + ret = test_overflow_nodrop(); + if (ret) { + printf("test_overflow_nodrop failed\n"); + return ret; + } + + ret = test_overflow_nodrop_submit_ebusy(); + if (ret) { + fprintf(stderr, "test_overflow_npdrop_submit_ebusy failed\n"); + return ret; + } + + if (create_file(".basic-rw")) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + if (create_buffers()) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + + iters = 0; + usecs = 1000; + do { + drops = 0; + + if (test_io(".basic-rw", usecs, &drops, 0)) { + fprintf(stderr, "test_io nofault failed\n"); + goto err; + } + if (drops) + break; + usecs = (usecs * 12) / 10; + iters++; + } while (iters < 40); + + if (test_io(".basic-rw", usecs, &drops, 0)) { + fprintf(stderr, "test_io nofault failed\n"); + goto err; + } + + if (test_io(".basic-rw", usecs, &drops, 1)) { + fprintf(stderr, "test_io fault failed\n"); + goto err; + } + + unlink(".basic-rw"); + return 0; +err: + unlink(".basic-rw"); + return 1; +} diff --git a/test/cq-peek-batch.c b/test/cq-peek-batch.c new file mode 100644 index 0000000..ee7537c --- /dev/null +++ b/test/cq-peek-batch.c @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test CQ peek-batch + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int queue_n_nops(struct io_uring *ring, int n, int offset) +{ + struct io_uring_sqe *sqe; + int i, ret; + + for (i = 0; i < n; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->user_data = i + offset; + } + + ret = io_uring_submit(ring); + if (ret < n) { + printf("Submitted only %d\n", ret); + goto err; + } else if (ret < 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + return 0; +err: + return 1; +} + +#define CHECK_BATCH(ring, got, cqes, count, expected) do {\ + got = io_uring_peek_batch_cqe((ring), cqes, count);\ + if (got != expected) {\ + printf("Got %d CQs, expected %d\n", got, expected);\ + goto err;\ + }\ +} while(0) + +int main(int argc, char *argv[]) +{ + struct io_uring_cqe *cqes[8]; + struct io_uring ring; + int ret, i; + unsigned got; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(4, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + + } + + CHECK_BATCH(&ring, got, cqes, 4, 0); + if (queue_n_nops(&ring, 4, 0)) + goto err; + + CHECK_BATCH(&ring, got, cqes, 4, 4); + for (i=0;i<4;i++) { + if (i != cqes[i]->user_data) { + printf("Got user_data %lld, expected %d\n", cqes[i]->user_data, i); + goto err; + } + } + + if (queue_n_nops(&ring, 4, 4)) + goto err; + + io_uring_cq_advance(&ring, 4); + CHECK_BATCH(&ring, got, cqes, 4, 4); + for (i=0;i<4;i++) { + if (i + 4 != cqes[i]->user_data) { + printf("Got user_data %lld, expected %d\n", cqes[i]->user_data, i + 4); + goto err; + } + } + + io_uring_cq_advance(&ring, 8); + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} diff --git a/test/cq-ready.c b/test/cq-ready.c new file mode 100644 index 0000000..7af7e54 --- /dev/null +++ b/test/cq-ready.c @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test CQ ready + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int queue_n_nops(struct io_uring *ring, int n) +{ + struct io_uring_sqe *sqe; + int i, ret; + + for (i = 0; i < n; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + } + + ret = io_uring_submit(ring); + if (ret < n) { + printf("Submitted only %d\n", ret); + goto err; + } else if (ret < 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + return 0; +err: + return 1; +} + +#define CHECK_READY(ring, expected) do {\ + ready = io_uring_cq_ready((ring));\ + if (ready != expected) {\ + printf("Got %d CQs ready, expected %d\n", ready, expected);\ + goto err;\ + }\ +} while(0) + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + unsigned ready; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(4, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + + } + + CHECK_READY(&ring, 0); + if (queue_n_nops(&ring, 4)) + goto err; + + CHECK_READY(&ring, 4); + io_uring_cq_advance(&ring, 4); + CHECK_READY(&ring, 0); + if (queue_n_nops(&ring, 4)) + goto err; + + CHECK_READY(&ring, 4); + + io_uring_cq_advance(&ring, 1); + CHECK_READY(&ring, 3); + + io_uring_cq_advance(&ring, 2); + CHECK_READY(&ring, 1); + + io_uring_cq_advance(&ring, 1); + CHECK_READY(&ring, 0); + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} diff --git a/test/cq-size.c b/test/cq-size.c new file mode 100644 index 0000000..b7dd5b4 --- /dev/null +++ b/test/cq-size.c @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test CQ ring sizing + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_params p; + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + memset(&p, 0, sizeof(p)); + p.flags = IORING_SETUP_CQSIZE; + p.cq_entries = 64; + + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret) { + if (ret == -EINVAL) { + printf("Skipped, not supported on this kernel\n"); + goto done; + } + printf("ring setup failed\n"); + return 1; + } + + if (p.cq_entries < 64) { + printf("cq entries invalid (%d)\n", p.cq_entries); + goto err; + } + io_uring_queue_exit(&ring); + + memset(&p, 0, sizeof(p)); + p.flags = IORING_SETUP_CQSIZE; + p.cq_entries = 0; + + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret >= 0 || errno != EINVAL) { + printf("zero sized cq ring succeeded\n"); + goto err; + } + +done: + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} diff --git a/test/d4ae271dfaae-test.c b/test/d4ae271dfaae-test.c new file mode 100644 index 0000000..6f263c6 --- /dev/null +++ b/test/d4ae271dfaae-test.c @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Test case for SQPOLL missing a 'ret' clear in case of busy. + * + * Heavily based on a test case from + * Xiaoguang Wang + */ +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define FILE_SIZE (128 * 1024) + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + fsync(fd); + close(fd); + return ret != FILE_SIZE; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int i, fd, ret; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec *iovecs; + struct io_uring_params p; + char *fname; + void *buf; + + if (geteuid()) { + fprintf(stdout, "Test requires root, skipping\n"); + return 0; + } + + memset(&p, 0, sizeof(p)); + p.flags = IORING_SETUP_SQPOLL; + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret < 0) { + fprintf(stderr, "queue_init: %s\n", strerror(-ret)); + return 1; + } + + if (argc > 1) { + fname = argv[1]; + } else { + fname = ".sqpoll.tmp"; + if (create_file(fname)) { + fprintf(stderr, "file creation failed\n"); + goto out; + } + } + + fd = open(fname, O_RDONLY | O_DIRECT); + if (fd < 0) { + perror("open"); + goto out; + } + + iovecs = calloc(10, sizeof(struct iovec)); + for (i = 0; i < 10; i++) { + if (posix_memalign(&buf, 4096, 4096)) + goto out; + iovecs[i].iov_base = buf; + iovecs[i].iov_len = 4096; + } + + ret = io_uring_register_files(&ring, &fd, 1); + if (ret < 0) { + fprintf(stderr, "register files %d\n", ret); + goto out; + } + + for (i = 0; i < 10; i++) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) + break; + + io_uring_prep_readv(sqe, 0, &iovecs[i], 1, 0); + sqe->flags |= IOSQE_FIXED_FILE; + + ret = io_uring_submit(&ring); + usleep(1000); + } + + for (i = 0; i < 10; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + break; + } + if (cqe->res != 4096) { + fprintf(stderr, "ret=%d, wanted 4096\n", cqe->res); + ret = 1; + break; + } + io_uring_cqe_seen(&ring, cqe); + } + + close(fd); +out: + if (fname != argv[1]) + unlink(fname); + io_uring_queue_exit(&ring); + return ret; +} diff --git a/test/d77a67ed5f27-test.c b/test/d77a67ed5f27-test.c new file mode 100644 index 0000000..f3ef071 --- /dev/null +++ b/test/d77a67ed5f27-test.c @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include "liburing.h" + +static void sig_alrm(int sig) +{ + fprintf(stderr, "Timed out!\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring_params p; + struct io_uring ring; + int ret, data; + + if (argc > 1) + return 0; + + signal(SIGALRM, sig_alrm); + + memset(&p, 0, sizeof(p)); + p.sq_thread_idle = 100; + p.flags = IORING_SETUP_SQPOLL; + ret = io_uring_queue_init_params(4, &ring, &p); + if (ret) { + if (geteuid()) { + fprintf(stdout, "SQPOLL requires root, skipped\n"); + return 0; + } + fprintf(stderr, "ring create failed: %d\n", ret); + return 1; + } + + /* make sure sq thread is sleeping at this point */ + usleep(150000); + alarm(1); + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + return 1; + } + + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, (void *) (unsigned long) 42); + io_uring_submit_and_wait(&ring, 1); + + ret = io_uring_peek_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "cqe get failed\n"); + return 1; + } + + data = (unsigned long) io_uring_cqe_get_data(cqe); + if (data != 42) { + fprintf(stderr, "invalid data: %d\n", data); + return 1; + } + + return 0; +} diff --git a/test/defer.c b/test/defer.c new file mode 100644 index 0000000..05833d4 --- /dev/null +++ b/test/defer.c @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +struct test_context { + struct io_uring *ring; + struct io_uring_sqe **sqes; + struct io_uring_cqe *cqes; + int nr; +}; + +static void free_context(struct test_context *ctx) +{ + free(ctx->sqes); + free(ctx->cqes); + memset(ctx, 0, sizeof(*ctx)); +} + +static int init_context(struct test_context *ctx, struct io_uring *ring, int nr) +{ + struct io_uring_sqe *sqe; + int i; + + memset(ctx, 0, sizeof(*ctx)); + ctx->nr = nr; + ctx->ring = ring; + ctx->sqes = malloc(nr * sizeof(*ctx->sqes)); + ctx->cqes = malloc(nr * sizeof(*ctx->cqes)); + + if (!ctx->sqes || !ctx->cqes) + goto err; + + for (i = 0; i < nr; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) + goto err; + io_uring_prep_nop(sqe); + sqe->user_data = i; + ctx->sqes[i] = sqe; + } + + return 0; +err: + free_context(ctx); + printf("init context failed\n"); + return 1; +} + +static int wait_cqes(struct test_context *ctx) +{ + int ret, i; + struct io_uring_cqe *cqe; + + for (i = 0; i < ctx->nr; i++) { + ret = io_uring_wait_cqe(ctx->ring, &cqe); + + if (ret < 0) { + printf("wait_cqes: wait completion %d\n", ret); + return 1; + } + memcpy(&ctx->cqes[i], cqe, sizeof(*cqe)); + io_uring_cqe_seen(ctx->ring, cqe); + } + + return 0; +} + +static int test_cancelled_userdata(struct io_uring *ring) +{ + struct test_context ctx; + int ret, i, nr = 100; + + if (init_context(&ctx, ring, nr)) + return 1; + + for (i = 0; i < nr; i++) + ctx.sqes[i]->flags |= IOSQE_IO_LINK; + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + if (wait_cqes(&ctx)) + goto err; + + for (i = 0; i < nr; i++) { + if (i != ctx.cqes[i].user_data) { + printf("invalid user data\n"); + goto err; + } + } + + free_context(&ctx); + return 0; +err: + free_context(&ctx); + return 1; +} + +static int test_thread_link_cancel(struct io_uring *ring) +{ + struct test_context ctx; + int ret, i, nr = 100; + + if (init_context(&ctx, ring, nr)) + return 1; + + for (i = 0; i < nr; i++) + ctx.sqes[i]->flags |= IOSQE_IO_LINK; + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + if (wait_cqes(&ctx)) + goto err; + + for (i = 0; i < nr; i++) { + bool fail = false; + + if (i == 0) + fail = (ctx.cqes[i].res != -EINVAL); + else + fail = (ctx.cqes[i].res != -ECANCELED); + + if (fail) { + printf("invalid status\n"); + goto err; + } + } + + free_context(&ctx); + return 0; +err: + free_context(&ctx); + return 1; +} + +static int run_drained(struct io_uring *ring, int nr) +{ + struct test_context ctx; + int ret, i; + + if (init_context(&ctx, ring, nr)) + return 1; + + for (i = 0; i < nr; i++) + ctx.sqes[i]->flags |= IOSQE_IO_DRAIN; + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + if (wait_cqes(&ctx)) + goto err; + + free_context(&ctx); + return 0; +err: + free_context(&ctx); + return 1; +} + +static int test_overflow_hung(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + int ret, nr = 10; + + while (*ring->cq.koverflow != 1000) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + return 1; + } + + io_uring_prep_nop(sqe); + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + return 1; + } + } + + return run_drained(ring, nr); +} + +static int test_dropped_hung(struct io_uring *ring) +{ + int nr = 10; + + *ring->sq.kdropped = 1000; + return run_drained(ring, nr); +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring, poll_ring, sqthread_ring; + struct io_uring_params p; + int ret, no_sqthread = 0; + + if (argc > 1) + return 0; + + memset(&p, 0, sizeof(p)); + ret = io_uring_queue_init_params(1000, &ring, &p); + if (ret) { + printf("ring setup failed\n"); + return 1; + } + + ret = io_uring_queue_init(1000, &poll_ring, IORING_SETUP_IOPOLL); + if (ret) { + printf("poll_ring setup failed\n"); + return 1; + } + + ret = io_uring_queue_init(1000, &sqthread_ring, + IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL); + if (ret) { + if (geteuid()) { + no_sqthread = 1; + } else { + printf("poll_ring setup failed\n"); + return 1; + } + } + + ret = test_cancelled_userdata(&poll_ring); + if (ret) { + printf("test_cancelled_userdata failed\n"); + return ret; + } + + if (no_sqthread) { + printf("test_thread_link_cancel: skipped, not root\n"); + } else { + ret = test_thread_link_cancel(&sqthread_ring); + if (ret) { + printf("test_thread_link_cancel failed\n"); + return ret; + } + } + + if (!(p.features & IORING_FEAT_NODROP)) { + ret = test_overflow_hung(&ring); + if (ret) { + printf("test_overflow_hung failed\n"); + return ret; + } + } + + ret = test_dropped_hung(&ring); + if (ret) { + printf("test_dropped_hung failed\n"); + return ret; + } + + return 0; +} diff --git a/test/eeed8b54e0df-test.c b/test/eeed8b54e0df-test.c new file mode 100644 index 0000000..b2702d6 --- /dev/null +++ b/test/eeed8b54e0df-test.c @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: -EAGAIN handling + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define BLOCK 4096 + +#ifndef RWF_NOWAIT +#define RWF_NOWAIT 8 +#endif + +static int get_file_fd(void) +{ + ssize_t ret; + char *buf; + int fd; + + fd = open("testfile", O_RDWR | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return -1; + } + + buf = malloc(BLOCK); + ret = write(fd, buf, BLOCK); + if (ret != BLOCK) { + if (ret < 0) + perror("write"); + else + printf("Short write\n"); + goto err; + } + fsync(fd); + + if (posix_fadvise(fd, 0, 4096, POSIX_FADV_DONTNEED)) { + perror("fadvise"); +err: + close(fd); + free(buf); + return -1; + } + + free(buf); + return fd; +} + +static void put_file_fd(int fd) +{ + close(fd); + unlink("testfile"); +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec iov; + int ret, fd; + + if (argc > 1) + return 0; + + iov.iov_base = malloc(4096); + iov.iov_len = 4096; + + ret = io_uring_queue_init(2, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + + } + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + printf("get sqe failed\n"); + return 1; + } + + fd = get_file_fd(); + if (fd < 0) + return 1; + + io_uring_prep_readv(sqe, fd, &iov, 1, 0); + sqe->rw_flags = RWF_NOWAIT; + + ret = io_uring_submit(&ring); + if (ret != 1) { + printf("Got submit %d, expected 1\n", ret); + goto err; + } + + ret = io_uring_peek_cqe(&ring, &cqe); + if (ret) { + printf("Ring peek got %d\n", ret); + goto err; + } + + if (cqe->res != -EAGAIN) { + printf("cqe error: %d\n", cqe->res); + goto err; + } + + put_file_fd(fd); + return 0; +err: + put_file_fd(fd); + return 1; +} diff --git a/test/eventfd-disable.c b/test/eventfd-disable.c new file mode 100644 index 0000000..f172fd7 --- /dev/null +++ b/test/eventfd-disable.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test disable/enable notifications through eventfd + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_params p = {}; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + uint64_t ptr; + struct iovec vec = { + .iov_base = &ptr, + .iov_len = sizeof(ptr) + }; + int ret, evfd, i; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init_params(64, &ring, &p); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + evfd = eventfd(0, EFD_CLOEXEC); + if (evfd < 0) { + perror("eventfd"); + return 1; + } + + ret = io_uring_register_eventfd(&ring, evfd); + if (ret) { + fprintf(stderr, "failed to register evfd: %d\n", ret); + return 1; + } + + if (!io_uring_cq_eventfd_enabled(&ring)) { + fprintf(stderr, "eventfd disabled\n"); + return 1; + } + + ret = io_uring_cq_eventfd_toggle(&ring, false); + if (ret) { + fprintf(stdout, "Skipping, CQ flags not available!\n"); + return 0; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_readv(sqe, evfd, &vec, 1, 0); + sqe->user_data = 1; + + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + for (i = 0; i < 63; i++) { + sqe = io_uring_get_sqe(&ring); + io_uring_prep_nop(sqe); + sqe->user_data = 2; + } + + ret = io_uring_submit(&ring); + if (ret != 63) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + for (i = 0; i < 63; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait: %d\n", ret); + return 1; + } + + switch (cqe->user_data) { + case 1: /* eventfd */ + fprintf(stderr, "eventfd unexpected: %d\n", (int)ptr); + return 1; + case 2: + if (cqe->res) { + fprintf(stderr, "nop: %d\n", cqe->res); + return 1; + } + break; + } + io_uring_cqe_seen(&ring, cqe); + } + + ret = io_uring_cq_eventfd_toggle(&ring, true); + if (ret) { + fprintf(stderr, "io_uring_cq_eventfd_toggle: %d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_nop(sqe); + sqe->user_data = 2; + + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait: %d\n", ret); + return 1; + } + + switch (cqe->user_data) { + case 1: /* eventfd */ + if (cqe->res != sizeof(ptr)) { + fprintf(stderr, "read res: %d\n", cqe->res); + return 1; + } + + if (ptr != 1) { + fprintf(stderr, "eventfd: %d\n", (int)ptr); + return 1; + } + break; + case 2: + if (cqe->res) { + fprintf(stderr, "nop: %d\n", cqe->res); + return 1; + } + break; + } + io_uring_cqe_seen(&ring, cqe); + } + + return 0; +} diff --git a/test/eventfd-ring.c b/test/eventfd-ring.c new file mode 100644 index 0000000..67e102c --- /dev/null +++ b/test/eventfd-ring.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various nop tests + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_params p = {}; + struct io_uring ring1, ring2; + struct io_uring_sqe *sqe; + int ret, evfd1, evfd2; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init_params(8, &ring1, &p); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + if (!(p.features & IORING_FEAT_CUR_PERSONALITY)) { + fprintf(stdout, "Skipping\n"); + return 0; + } + ret = io_uring_queue_init(8, &ring2, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + evfd1 = eventfd(0, EFD_CLOEXEC); + if (evfd1 < 0) { + perror("eventfd"); + return 1; + } + + evfd2 = eventfd(0, EFD_CLOEXEC); + if (evfd2 < 0) { + perror("eventfd"); + return 1; + } + + ret = io_uring_register_eventfd(&ring1, evfd1); + if (ret) { + fprintf(stderr, "failed to register evfd: %d\n", ret); + return 1; + } + + ret = io_uring_register_eventfd(&ring2, evfd2); + if (ret) { + fprintf(stderr, "failed to register evfd: %d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(&ring1); + io_uring_prep_poll_add(sqe, evfd2, POLLIN); + sqe->user_data = 1; + + sqe = io_uring_get_sqe(&ring2); + io_uring_prep_poll_add(sqe, evfd1, POLLIN); + sqe->user_data = 1; + + ret = io_uring_submit(&ring1); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + ret = io_uring_submit(&ring2); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(&ring1); + io_uring_prep_nop(sqe); + sqe->user_data = 3; + + ret = io_uring_submit(&ring1); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + return 0; +} diff --git a/test/eventfd.c b/test/eventfd.c new file mode 100644 index 0000000..1a7e3f3 --- /dev/null +++ b/test/eventfd.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various nop tests + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_params p = {}; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + uint64_t ptr; + struct iovec vec = { + .iov_base = &ptr, + .iov_len = sizeof(ptr) + }; + int ret, evfd, i; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init_params(8, &ring, &p); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + if (!(p.features & IORING_FEAT_CUR_PERSONALITY)) { + fprintf(stdout, "Skipping\n"); + return 0; + } + + evfd = eventfd(0, EFD_CLOEXEC); + if (evfd < 0) { + perror("eventfd"); + return 1; + } + + ret = io_uring_register_eventfd(&ring, evfd); + if (ret) { + fprintf(stderr, "failed to register evfd: %d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_poll_add(sqe, evfd, POLLIN); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_readv(sqe, evfd, &vec, 1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 2; + + ret = io_uring_submit(&ring); + if (ret != 2) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_nop(sqe); + sqe->user_data = 3; + + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait: %d\n", ret); + return 1; + } + switch (cqe->user_data) { + case 1: + /* POLLIN */ + if (cqe->res != 1) { + fprintf(stderr, "poll: %d\n", cqe->res); + return 1; + } + break; + case 2: + if (cqe->res != sizeof(ptr)) { + fprintf(stderr, "read: %d\n", cqe->res); + return 1; + } + break; + case 3: + if (cqe->res) { + fprintf(stderr, "nop: %d\n", cqe->res); + return 1; + } + break; + } + io_uring_cqe_seen(&ring, cqe); + } + + return 0; +} diff --git a/test/fadvise.c b/test/fadvise.c new file mode 100644 index 0000000..0759446 --- /dev/null +++ b/test/fadvise.c @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: basic fadvise test + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define FILE_SIZE (128 * 1024) +#define LOOPS 100 +#define MIN_LOOPS 10 + +static unsigned long long utime_since(const struct timeval *s, + const struct timeval *e) +{ + long long sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = (e->tv_usec - s->tv_usec); + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= 1000000; + return sec + usec; +} + +static unsigned long long utime_since_now(struct timeval *tv) +{ + struct timeval end; + + gettimeofday(&end, NULL); + return utime_since(tv, &end); +} + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + fsync(fd); + close(fd); + return ret != FILE_SIZE; +} + +static int do_fadvise(struct io_uring *ring, int fd, off_t offset, off_t len, + int advice) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "failed to get sqe\n"); + return 1; + } + + io_uring_prep_fadvise(sqe, fd, offset, len, advice); + sqe->user_data = advice; + ret = io_uring_submit_and_wait(ring, 1); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait: %d\n", ret); + return 1; + } + + ret = cqe->res; + if (ret == -EINVAL || ret == -EBADF) { + fprintf(stdout, "Fadvise not supported, skipping\n"); + unlink(".fadvise.tmp"); + exit(0); + } else if (ret) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + } + io_uring_cqe_seen(ring, cqe); + return ret; +} + +static long do_read(int fd, char *buf) +{ + struct timeval tv; + int ret; + long t; + + ret = lseek(fd, 0, SEEK_SET); + if (ret) { + perror("lseek"); + return -1; + } + + gettimeofday(&tv, NULL); + ret = read(fd, buf, FILE_SIZE); + t = utime_since_now(&tv); + if (ret < 0) { + perror("read"); + return -1; + } else if (ret != FILE_SIZE) { + fprintf(stderr, "short read1: %d\n", ret); + return -1; + } + + return t; +} + +static int test_fadvise(struct io_uring *ring, const char *filename) +{ + unsigned long cached_read, uncached_read, cached_read2; + int fd, ret; + char *buf; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror("open"); + return 1; + } + + buf = malloc(FILE_SIZE); + + cached_read = do_read(fd, buf); + if (cached_read == -1) + return 1; + + ret = do_fadvise(ring, fd, 0, FILE_SIZE, POSIX_FADV_DONTNEED); + if (ret) + return 1; + + uncached_read = do_read(fd, buf); + if (uncached_read == -1) + return 1; + + ret = do_fadvise(ring, fd, 0, FILE_SIZE, POSIX_FADV_DONTNEED); + if (ret) + return 1; + + ret = do_fadvise(ring, fd, 0, FILE_SIZE, POSIX_FADV_WILLNEED); + if (ret) + return 1; + + fsync(fd); + + cached_read2 = do_read(fd, buf); + if (cached_read2 == -1) + return 1; + + if (cached_read < uncached_read && + cached_read2 < uncached_read) + return 0; + + return 2; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret, i, good, bad; + char *fname; + + if (argc > 1) { + fname = argv[1]; + } else { + fname = ".fadvise.tmp"; + if (create_file(".fadvise.tmp")) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + } + if (io_uring_queue_init(8, &ring, 0)) { + fprintf(stderr, "ring creation failed\n"); + goto err; + } + + good = bad = 0; + for (i = 0; i < LOOPS; i++) { + ret = test_fadvise(&ring, fname); + if (ret == 1) { + fprintf(stderr, "read_fadvise failed\n"); + goto err; + } else if (!ret) + good++; + else if (ret == 2) + bad++; + if (i >= MIN_LOOPS && !bad) + break; + } + if (bad > good) { + fprintf(stderr, "Suspicious timings\n"); + goto err; + } + + if (fname != argv[1]) + unlink(fname); + io_uring_queue_exit(&ring); + return 0; +err: + if (fname != argv[1]) + unlink(fname); + return 1; +} diff --git a/test/fallocate.c b/test/fallocate.c new file mode 100644 index 0000000..e662a6a --- /dev/null +++ b/test/fallocate.c @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test io_uring fallocate + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int no_fallocate; + +static int test_fallocate_rlimit(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct rlimit rlim; + char buf[32]; + int fd, ret; + + if (getrlimit(RLIMIT_FSIZE, &rlim) < 0) { + perror("getrlimit"); + return 1; + } + rlim.rlim_cur = 64 * 1024; + rlim.rlim_max = 64 * 1024; + if (setrlimit(RLIMIT_FSIZE, &rlim) < 0) { + perror("setrlimit"); + return 1; + } + + sprintf(buf, "./XXXXXX"); + fd = mkstemp(buf); + if (fd < 0) { + perror("open"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_fallocate(sqe, fd, 0, 0, 128*1024); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + + if (cqe->res == -EINVAL) { + fprintf(stdout, "Fallocate not supported, skipping\n"); + no_fallocate = 1; + goto out; + } else if (cqe->res != -EFBIG) { + fprintf(stderr, "Expected -EFBIG: %d\n", cqe->res); + goto err; + } + io_uring_cqe_seen(ring, cqe); +out: + unlink(buf); + return 0; +err: + unlink(buf); + return 1; +} + +static int test_fallocate(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct stat st; + char buf[32]; + int fd, ret; + + sprintf(buf, "./XXXXXX"); + fd = mkstemp(buf); + if (fd < 0) { + perror("open"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_fallocate(sqe, fd, 0, 0, 128*1024); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + + if (cqe->res == -EINVAL) { + fprintf(stdout, "Fallocate not supported, skipping\n"); + no_fallocate = 1; + goto out; + } + if (cqe->res) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + goto err; + } + io_uring_cqe_seen(ring, cqe); + + if (fstat(fd, &st) < 0) { + perror("stat"); + goto err; + } + + if (st.st_size != 128*1024) { + fprintf(stderr, "Size mismatch: %llu\n", + (unsigned long long) st.st_size); + goto err; + } + +out: + unlink(buf); + return 0; +err: + unlink(buf); + return 1; +} + +static int test_fallocate_fsync(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct stat st; + char buf[32]; + int fd, ret, i; + + if (no_fallocate) + return 0; + + sprintf(buf, "./XXXXXX"); + fd = mkstemp(buf); + if (fd < 0) { + perror("open"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_fallocate(sqe, fd, 0, 0, 128*1024); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_fsync(sqe, fd, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + if (cqe->res) { + fprintf(stderr, "cqe->res=%d,data=%llu\n", cqe->res, + cqe->user_data); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + if (fstat(fd, &st) < 0) { + perror("stat"); + goto err; + } + + if (st.st_size != 128*1024) { + fprintf(stderr, "Size mismatch: %llu\n", + (unsigned long long) st.st_size); + goto err; + } + + unlink(buf); + return 0; +err: + unlink(buf); + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + ret = test_fallocate(&ring); + if (ret) { + fprintf(stderr, "test_fallocate failed\n"); + return ret; + } + + ret = test_fallocate_fsync(&ring); + if (ret) { + fprintf(stderr, "test_fallocate_fsync failed\n"); + return ret; + } + + ret = test_fallocate_rlimit(&ring); + if (ret) { + fprintf(stderr, "test_fallocate_rlimit failed\n"); + return ret; + } + + return 0; +} diff --git a/test/fc2a85cb02ef-test.c b/test/fc2a85cb02ef-test.c new file mode 100644 index 0000000..e922d17 --- /dev/null +++ b/test/fc2a85cb02ef-test.c @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: MIT */ +// https://syzkaller.appspot.com/bug?id=1f2ecd7a23dba87e5ca3505ec44514a462cfe8c0 +// autogenerated by syzkaller (https://github.com/google/syzkaller) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool write_file(const char* file, const char* what, ...) +{ + char buf[1024]; + va_list args; + va_start(args, what); + vsnprintf(buf, sizeof(buf), what, args); + va_end(args); + buf[sizeof(buf) - 1] = 0; + int len = strlen(buf); + int fd = open(file, O_WRONLY | O_CLOEXEC); + if (fd == -1) + return false; + if (write(fd, buf, len) != len) { + int err = errno; + close(fd); + errno = err; + return false; + } + close(fd); + return true; +} + +static int inject_fault(int nth) +{ + int fd; + fd = open("/proc/thread-self/fail-nth", O_RDWR); + if (fd == -1) + exit(1); + char buf[16]; + sprintf(buf, "%d", nth + 1); + if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) + exit(1); + return fd; +} + +static int setup_fault() +{ + static struct { + const char* file; + const char* val; + bool fatal; + } files[] = { + {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true}, + {"/sys/kernel/debug/failslab/verbose", "0", false}, + {"/sys/kernel/debug/fail_futex/ignore-private", "N", false}, + {"/sys/kernel/debug/fail_page_alloc/verbose", "0", false}, + {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false}, + {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false}, + {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false}, + }; + unsigned i; + for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) { + if (!write_file(files[i].file, files[i].val)) { + if (files[i].fatal) + return 1; + } + } + return 0; +} + +#ifndef __NR_io_uring_register +#define __NR_io_uring_register 427 +#endif +#ifndef __NR_io_uring_setup +#define __NR_io_uring_setup 425 +#endif + +uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff}; + +int main(int argc, char *argv[]) +{ + if (argc > 1) + return 0; + mmap((void *) 0x20000000ul, 0x1000000ul, 3ul, 0x32ul, -1, 0); + if (setup_fault()) { + printf("Test needs failslab/fail_futex/fail_page_alloc enabled, skipped\n"); + return 0; + } + intptr_t res = 0; + *(uint32_t*)0x20000000 = 0; + *(uint32_t*)0x20000004 = 0; + *(uint32_t*)0x20000008 = 0; + *(uint32_t*)0x2000000c = 0; + *(uint32_t*)0x20000010 = 0; + *(uint32_t*)0x20000014 = 0; + *(uint32_t*)0x20000018 = 0; + *(uint32_t*)0x2000001c = 0; + *(uint32_t*)0x20000020 = 0; + *(uint32_t*)0x20000024 = 0; + *(uint32_t*)0x20000028 = 0; + *(uint32_t*)0x2000002c = 0; + *(uint32_t*)0x20000030 = 0; + *(uint32_t*)0x20000034 = 0; + *(uint32_t*)0x20000038 = 0; + *(uint32_t*)0x2000003c = 0; + *(uint32_t*)0x20000040 = 0; + *(uint32_t*)0x20000044 = 0; + *(uint64_t*)0x20000048 = 0; + *(uint32_t*)0x20000050 = 0; + *(uint32_t*)0x20000054 = 0; + *(uint32_t*)0x20000058 = 0; + *(uint32_t*)0x2000005c = 0; + *(uint32_t*)0x20000060 = 0; + *(uint32_t*)0x20000064 = 0; + *(uint32_t*)0x20000068 = 0; + *(uint32_t*)0x2000006c = 0; + *(uint64_t*)0x20000070 = 0; + res = syscall(__NR_io_uring_setup, 0x6a6, 0x20000000ul); + if (res != -1) + r[0] = res; + res = syscall(__NR_socket, 0x11ul, 2ul, 0x300ul); + if (res != -1) + r[1] = res; + *(uint32_t*)0x20000080 = r[1]; + inject_fault(1); + syscall(__NR_io_uring_register, r[0], 2ul, 0x20000080ul, 1ul); + return 0; +} diff --git a/test/file-register.c b/test/file-register.c new file mode 100644 index 0000000..7400b3a --- /dev/null +++ b/test/file-register.c @@ -0,0 +1,689 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various file registration tests + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int no_update = 0; + +static void close_files(int *files, int nr_files, int add) +{ + char fname[32]; + int i; + + for (i = 0; i < nr_files; i++) { + if (files) + close(files[i]); + if (!add) + sprintf(fname, ".reg.%d", i); + else + sprintf(fname, ".add.%d", i + add); + unlink(fname); + } + if (files) + free(files); +} + +static int *open_files(int nr_files, int extra, int add) +{ + char fname[32]; + int *files; + int i; + + files = calloc(nr_files + extra, sizeof(int)); + + for (i = 0; i < nr_files; i++) { + if (!add) + sprintf(fname, ".reg.%d", i); + else + sprintf(fname, ".add.%d", i + add); + files[i] = open(fname, O_RDWR | O_CREAT, 0644); + if (files[i] < 0) { + perror("open"); + free(files); + files = NULL; + break; + } + } + if (extra) { + for (i = nr_files; i < nr_files + extra; i++) + files[i] = -1; + } + + return files; +} + +static int test_shrink(struct io_uring *ring) +{ + int ret, off, fd; + int *files; + + files = open_files(50, 0, 0); + ret = io_uring_register_files(ring, files, 50); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + off = 0; + do { + fd = -1; + ret = io_uring_register_files_update(ring, off, &fd, 1); + if (ret != 1) { + if (off == 50 && ret == -EINVAL) + break; + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + break; + } + off++; + } while (1); + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + close_files(files, 50, 0); + return 0; +err: + close_files(files, 50, 0); + return 1; +} + + +static int test_grow(struct io_uring *ring) +{ + int ret, off; + int *files, *fds = NULL; + + files = open_files(50, 250, 0); + ret = io_uring_register_files(ring, files, 300); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + off = 50; + do { + fds = open_files(1, 0, off); + ret = io_uring_register_files_update(ring, off, fds, 1); + if (ret != 1) { + if (off == 300 && ret == -EINVAL) + break; + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + break; + } + if (off >= 300) { + fprintf(stderr, "%s: Succeeded beyond end-of-list?\n", __FUNCTION__); + goto err; + } + off++; + } while (1); + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + close_files(files, 100, 0); + close_files(NULL, 251, 50); + return 0; +err: + close_files(files, 100, 0); + close_files(NULL, 251, 50); + return 1; +} + +static int test_replace_all(struct io_uring *ring) +{ + int *files, *fds = NULL; + int ret, i; + + files = open_files(100, 0, 0); + ret = io_uring_register_files(ring, files, 100); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + fds = malloc(100 * sizeof(int)); + for (i = 0; i < 100; i++) + fds[i] = -1; + + ret = io_uring_register_files_update(ring, 0, fds, 100); + if (ret != 100) { + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + goto err; + } + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + close_files(files, 100, 0); + if (fds) + free(fds); + return 0; +err: + close_files(files, 100, 0); + if (fds) + free(fds); + return 1; +} + +static int test_replace(struct io_uring *ring) +{ + int *files, *fds = NULL; + int ret; + + files = open_files(100, 0, 0); + ret = io_uring_register_files(ring, files, 100); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + fds = open_files(10, 0, 1); + ret = io_uring_register_files_update(ring, 90, fds, 10); + if (ret != 10) { + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + goto err; + } + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + close_files(files, 100, 0); + if (fds) + close_files(fds, 10, 1); + return 0; +err: + close_files(files, 100, 0); + if (fds) + close_files(fds, 10, 1); + return 1; +} + +static int test_removals(struct io_uring *ring) +{ + int *files, *fds = NULL; + int ret, i; + + files = open_files(100, 0, 0); + ret = io_uring_register_files(ring, files, 100); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + fds = calloc(10, sizeof(int)); + for (i = 0; i < 10; i++) + fds[i] = -1; + + ret = io_uring_register_files_update(ring, 50, fds, 10); + if (ret != 10) { + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + goto err; + } + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + close_files(files, 100, 0); + if (fds) + free(fds); + return 0; +err: + close_files(files, 100, 0); + if (fds) + free(fds); + return 1; +} + +static int test_additions(struct io_uring *ring) +{ + int *files, *fds = NULL; + int ret; + + files = open_files(100, 100, 0); + ret = io_uring_register_files(ring, files, 200); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + fds = open_files(2, 0, 1); + ret = io_uring_register_files_update(ring, 100, fds, 2); + if (ret != 2) { + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + goto err; + } + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + close_files(files, 100, 0); + if (fds) + close_files(fds, 2, 1); + return 0; +err: + close_files(files, 100, 0); + if (fds) + close_files(fds, 2, 1); + return 1; +} + +static int test_sparse(struct io_uring *ring) +{ + int *files; + int ret; + + files = open_files(100, 100, 0); + ret = io_uring_register_files(ring, files, 200); + if (ret) { + if (ret == -EBADF) { + fprintf(stdout, "Sparse files not supported\n"); + no_update = 1; + goto done; + } + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } +done: + close_files(files, 100, 0); + return 0; +err: + close_files(files, 100, 0); + return 1; +} + +static int test_basic_many(struct io_uring *ring) +{ + int *files; + int ret; + + files = open_files(768, 0, 0); + ret = io_uring_register_files(ring, files, 768); + if (ret) { + fprintf(stderr, "%s: register %d\n", __FUNCTION__, ret); + goto err; + } + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister %d\n", __FUNCTION__, ret); + goto err; + } + close_files(files, 768, 0); + return 0; +err: + close_files(files, 768, 0); + return 1; +} + +static int test_basic(struct io_uring *ring) +{ + int *files; + int ret; + + files = open_files(100, 0, 0); + ret = io_uring_register_files(ring, files, 100); + if (ret) { + fprintf(stderr, "%s: register %d\n", __FUNCTION__, ret); + goto err; + } + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister %d\n", __FUNCTION__, ret); + goto err; + } + close_files(files, 100, 0); + return 0; +err: + close_files(files, 100, 0); + return 1; +} + +/* + * Register 0 files, but reserve space for 10. Then add one file. + */ +static int test_zero(struct io_uring *ring) +{ + int *files, *fds = NULL; + int ret; + + files = open_files(0, 10, 0); + ret = io_uring_register_files(ring, files, 10); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + fds = open_files(1, 0, 1); + ret = io_uring_register_files_update(ring, 0, fds, 1); + if (ret != 1) { + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + goto err; + } + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + if (fds) + close_files(fds, 1, 1); + free(files); + return 0; +err: + if (fds) + close_files(fds, 1, 1); + free(files); + return 1; +} + +static int test_fixed_read_write(struct io_uring *ring, int index) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec iov[2]; + int ret; + + iov[0].iov_base = malloc(4096); + iov[0].iov_len = 4096; + memset(iov[0].iov_base, 0x5a, 4096); + + iov[1].iov_base = malloc(4096); + iov[1].iov_len = 4096; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); + return 1; + } + io_uring_prep_writev(sqe, index, &iov[0], 1, 0); + sqe->flags |= IOSQE_FIXED_FILE; + sqe->user_data = 1; + + ret = io_uring_submit(ring); + if (ret != 1) { + fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: io_uring_wait_cqe=%d\n", __FUNCTION__, ret); + return 1; + } + if (cqe->res != 4096) { + fprintf(stderr, "%s: write cqe->res=%d\n", __FUNCTION__, cqe->res); + return 1; + } + io_uring_cqe_seen(ring, cqe); + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); + return 1; + } + io_uring_prep_readv(sqe, index, &iov[1], 1, 0); + sqe->flags |= IOSQE_FIXED_FILE; + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret != 1) { + fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: io_uring_wait_cqe=%d\n", __FUNCTION__, ret); + return 1; + } + if (cqe->res != 4096) { + fprintf(stderr, "%s: read cqe->res=%d\n", __FUNCTION__, cqe->res); + return 1; + } + io_uring_cqe_seen(ring, cqe); + + if (memcmp(iov[1].iov_base, iov[0].iov_base, 4096)) { + fprintf(stderr, "%s: data mismatch\n", __FUNCTION__); + return 1; + } + + free(iov[0].iov_base); + free(iov[1].iov_base); + return 0; +} + +/* + * Register 8K of sparse files, update one at a random spot, then do some + * file IO to verify it works. + */ +static int test_huge(struct io_uring *ring) +{ + int *files; + int ret; + + files = open_files(0, 8192, 0); + ret = io_uring_register_files(ring, files, 8192); + if (ret) { + /* huge sets not supported */ + if (ret == -EMFILE) { + fprintf(stdout, "%s: No huge file set support, skipping\n", __FUNCTION__); + goto out; + } + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + goto err; + } + + files[7193] = open(".reg.7193", O_RDWR | O_CREAT, 0644); + if (files[7193] < 0) { + fprintf(stderr, "%s: open=%d\n", __FUNCTION__, errno); + goto err; + } + + ret = io_uring_register_files_update(ring, 7193, &files[7193], 1); + if (ret != 1) { + fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); + goto err; + } + + if (test_fixed_read_write(ring, 7193)) + goto err; + + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); + goto err; + } + + if (files[7193] != -1) { + close(files[7193]); + unlink(".reg.7193"); + } +out: + free(files); + return 0; +err: + if (files[7193] != -1) { + close(files[7193]); + unlink(".reg.7193"); + } + free(files); + return 1; +} + +static int test_sparse_updates(void) +{ + struct io_uring ring; + int ret, i, *fds, newfd; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "queue_init: %d\n", ret); + return ret; + } + + fds = malloc(256 * sizeof(int)); + for (i = 0; i < 256; i++) + fds[i] = -1; + + ret = io_uring_register_files(&ring, fds, 256); + if (ret) { + fprintf(stderr, "file_register: %d\n", ret); + return ret; + } + + newfd = 1; + for (i = 0; i < 256; i++) { + ret = io_uring_register_files_update(&ring, i, &newfd, 1); + if (ret != 1) { + fprintf(stderr, "file_update: %d\n", ret); + return ret; + } + } + io_uring_unregister_files(&ring); + + for (i = 0; i < 256; i++) + fds[i] = 1; + + ret = io_uring_register_files(&ring, fds, 256); + if (ret) { + fprintf(stderr, "file_register: %d\n", ret); + return ret; + } + + newfd = -1; + for (i = 0; i < 256; i++) { + ret = io_uring_register_files_update(&ring, i, &newfd, 1); + if (ret != 1) { + fprintf(stderr, "file_update: %d\n", ret); + return ret; + } + } + io_uring_unregister_files(&ring); + + io_uring_queue_exit(&ring); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + } + + ret = test_basic(&ring); + if (ret) { + printf("test_basic failed\n"); + return ret; + } + + ret = test_basic_many(&ring); + if (ret) { + printf("test_basic_many failed\n"); + return ret; + } + + ret = test_sparse(&ring); + if (ret) { + printf("test_sparse failed\n"); + return ret; + } + + if (no_update) + return 0; + + ret = test_additions(&ring); + if (ret) { + printf("test_additions failed\n"); + return ret; + } + + ret = test_removals(&ring); + if (ret) { + printf("test_removals failed\n"); + return ret; + } + + ret = test_replace(&ring); + if (ret) { + printf("test_replace failed\n"); + return ret; + } + + ret = test_replace_all(&ring); + if (ret) { + printf("test_replace_all failed\n"); + return ret; + } + + ret = test_grow(&ring); + if (ret) { + printf("test_grow failed\n"); + return ret; + } + + ret = test_shrink(&ring); + if (ret) { + printf("test_shrink failed\n"); + return ret; + } + + ret = test_zero(&ring); + if (ret) { + printf("test_zero failed\n"); + return ret; + } + + ret = test_huge(&ring); + if (ret) { + printf("test_huge failed\n"); + return ret; + } + + ret = test_sparse_updates(); + if (ret) { + printf("test_sparse_updates failed\n"); + return ret; + } + + return 0; +} diff --git a/test/file-update.c b/test/file-update.c new file mode 100644 index 0000000..9c4715a --- /dev/null +++ b/test/file-update.c @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various file registration tests + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static void close_files(int *files, int nr_files, int add) +{ + char fname[32]; + int i; + + for (i = 0; i < nr_files; i++) { + if (files) + close(files[i]); + if (!add) + sprintf(fname, ".reg.%d", i); + else + sprintf(fname, ".add.%d", i + add); + unlink(fname); + } + if (files) + free(files); +} + +static int *open_files(int nr_files, int extra, int add) +{ + char fname[32]; + int *files; + int i; + + files = calloc(nr_files + extra, sizeof(int)); + + for (i = 0; i < nr_files; i++) { + if (!add) + sprintf(fname, ".reg.%d", i); + else + sprintf(fname, ".add.%d", i + add); + files[i] = open(fname, O_RDWR | O_CREAT, 0644); + if (files[i] < 0) { + perror("open"); + free(files); + files = NULL; + break; + } + } + if (extra) { + for (i = nr_files; i < nr_files + extra; i++) + files[i] = -1; + } + + return files; +} + +static int test_update_multiring(struct io_uring *r1, struct io_uring *r2, + struct io_uring *r3, int do_unreg) +{ + int *fds, *newfds; + + fds = open_files(10, 0, 0); + newfds = open_files(10, 0, 1); + + if (io_uring_register_files(r1, fds, 10) || + io_uring_register_files(r2, fds, 10) || + io_uring_register_files(r3, fds, 10)) { + fprintf(stderr, "%s: register files failed\n", __FUNCTION__); + goto err; + } + + if (io_uring_register_files_update(r1, 0, newfds, 10) != 10 || + io_uring_register_files_update(r2, 0, newfds, 10) != 10 || + io_uring_register_files_update(r3, 0, newfds, 10) != 10) { + fprintf(stderr, "%s: update files failed\n", __FUNCTION__); + goto err; + } + + if (!do_unreg) + goto done; + + if (io_uring_unregister_files(r1) || + io_uring_unregister_files(r2) || + io_uring_unregister_files(r3)) { + fprintf(stderr, "%s: unregister files failed\n", __FUNCTION__); + goto err; + } + +done: + close_files(fds, 10, 0); + close_files(newfds, 10, 1); + return 0; +err: + close_files(fds, 10, 0); + close_files(newfds, 10, 1); + return 1; +} + +static int test_sqe_update(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int *fds, i, ret; + + fds = malloc(sizeof(int) * 10); + for (i = 0; i < 10; i++) + fds[i] = -1; + + sqe = io_uring_get_sqe(ring); + io_uring_prep_files_update(sqe, fds, 10, 0); + ret = io_uring_submit(ring); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait: %d\n", ret); + return 1; + } + + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret == -EINVAL) { + fprintf(stdout, "IORING_OP_FILES_UPDATE not supported, skipping\n"); + return 0; + } + return ret != 10; +} + +int main(int argc, char *argv[]) +{ + struct io_uring r1, r2, r3; + int ret; + + if (argc > 1) + return 0; + + if (io_uring_queue_init(8, &r1, 0) || + io_uring_queue_init(8, &r2, 0) || + io_uring_queue_init(8, &r3, 0)) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + ret = test_update_multiring(&r1, &r2, &r3, 1); + if (ret) { + fprintf(stderr, "test_update_multiring w/unreg\n"); + return ret; + } + + ret = test_update_multiring(&r1, &r2, &r3, 0); + if (ret) { + fprintf(stderr, "test_update_multiring wo/unreg\n"); + return ret; + } + + ret = test_sqe_update(&r1); + if (ret) { + fprintf(stderr, "test_sqe_update failed\n"); + return ret; + } + + return 0; +} diff --git a/test/fixed-link.c b/test/fixed-link.c new file mode 100644 index 0000000..14def83 --- /dev/null +++ b/test/fixed-link.c @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define IOVECS_LEN 2 + +int main(int argc, char *argv[]) +{ + struct iovec iovecs[IOVECS_LEN]; + struct io_uring ring; + int i, fd, ret; + + if (argc > 1) + return 0; + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Failed to open /dev/zero\n"); + return 1; + } + + if (io_uring_queue_init(32, &ring, 0) < 0) { + fprintf(stderr, "Faild to init io_uring\n"); + close(fd); + return 1; + } + + for (i = 0; i < IOVECS_LEN; ++i) { + iovecs[i].iov_base = malloc(64); + iovecs[i].iov_len = 64; + }; + + ret = io_uring_register_buffers(&ring, iovecs, IOVECS_LEN); + if (ret) { + fprintf(stderr, "Failed to register buffers\n"); + return 1; + } + + for (i = 0; i < IOVECS_LEN; ++i) { + struct io_uring_sqe *sqe = io_uring_get_sqe(&ring); + const char *str = "#include "; + + iovecs[i].iov_len = strlen(str); + io_uring_prep_read_fixed(sqe, fd, iovecs[i].iov_base, strlen(str), 0, i); + if (i == 0) + io_uring_sqe_set_flags(sqe, IOSQE_IO_LINK); + io_uring_sqe_set_data(sqe, (void *)str); + } + + ret = io_uring_submit_and_wait(&ring, IOVECS_LEN); + if (ret < 0) { + fprintf(stderr, "Failed to submit IO\n"); + return 1; + } else if (ret < 2) { + fprintf(stderr, "Submitted %d, wanted %d\n", ret, IOVECS_LEN); + return 1; + } + + for (i = 0; i < IOVECS_LEN; i++) { + struct io_uring_cqe *cqe; + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + return 1; + } + if (cqe->res != iovecs[i].iov_len) { + fprintf(stderr, "read: wanted %ld, got %d\n", + (long) iovecs[i].iov_len, cqe->res); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + } + + close(fd); + io_uring_queue_exit(&ring); + + for (i = 0; i < IOVECS_LEN; ++i) + free(iovecs[i].iov_base); + + return 0; +} diff --git a/test/fsync.c b/test/fsync.c new file mode 100644 index 0000000..343f383 --- /dev/null +++ b/test/fsync.c @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test io_uring fsync handling + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int test_single_fsync(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + char buf[32]; + int fd, ret; + + sprintf(buf, "./XXXXXX"); + fd = mkstemp(buf); + if (fd < 0) { + perror("open"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_fsync(sqe, fd, 0); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + + io_uring_cqe_seen(ring, cqe); + unlink(buf); + return 0; +err: + unlink(buf); + return 1; +} + +static int test_barrier_fsync(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct iovec iovecs[4]; + int i, fd, ret; + off_t off; + + fd = open("testfile", O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open"); + return 1; + } + + for (i = 0; i < 4; i++) { + iovecs[i].iov_base = malloc(4096); + iovecs[i].iov_len = 4096; + } + + off = 0; + for (i = 0; i < 4; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_writev(sqe, fd, &iovecs[i], 1, off); + sqe->user_data = 0; + off += 4096; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_fsync(sqe, fd, IORING_FSYNC_DATASYNC); + sqe->user_data = 1; + io_uring_sqe_set_flags(sqe, IOSQE_IO_DRAIN); + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } else if (ret < 5) { + fprintf(stderr, "Submitted only %d\n", ret); + goto err; + } + + for (i = 0; i < 5; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + /* kernel doesn't support IOSQE_IO_DRAIN */ + if (cqe->res == -EINVAL) + break; + if (i <= 3) { + if (cqe->user_data) { + fprintf(stderr, "Got fsync early?\n"); + goto err; + } + } else { + if (!cqe->user_data) { + fprintf(stderr, "Got write late?\n"); + goto err; + } + } + io_uring_cqe_seen(ring, cqe); + } + + unlink("testfile"); + return 0; +err: + unlink("testfile"); + return 1; +} + +#define FILE_SIZE 1024 + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + close(fd); + return ret != FILE_SIZE; +} + +static int test_sync_file_range(struct io_uring *ring) +{ + int ret, fd, save_errno; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + + if (create_file(".sync_file_range")) { + fprintf(stderr, "file creation failed\n"); + return 1; + } + + fd = open(".sync_file_range", O_RDWR); + save_errno = errno; + unlink(".sync_file_range"); + errno = save_errno; + if (fd < 0) { + perror("file open"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + return 1; + } + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_SYNC_FILE_RANGE; + sqe->off = 0; + sqe->len = 0; + sqe->sync_range_flags = 0; + sqe->user_data = 1; + sqe->fd = fd; + + ret = io_uring_submit(ring); + if (ret != 1) { + fprintf(stderr, "submit failed: %d\n", ret); + return 1; + } + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe failed: %d\n", ret); + return 1; + } + if (cqe->res) { + fprintf(stderr, "sfr failed: %d\n", cqe->res); + return 1; + } + + io_uring_cqe_seen(ring, cqe); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + + } + + ret = test_single_fsync(&ring); + if (ret) { + fprintf(stderr, "test_single_fsync failed\n"); + return ret; + } + + ret = test_barrier_fsync(&ring); + if (ret) { + fprintf(stderr, "test_barrier_fsync failed\n"); + return ret; + } + + ret = test_sync_file_range(&ring); + if (ret) { + fprintf(stderr, "test_sync_file_range failed\n"); + return ret; + } + + return 0; +} diff --git a/test/io-cancel.c b/test/io-cancel.c new file mode 100644 index 0000000..e0e8fed --- /dev/null +++ b/test/io-cancel.c @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: Basic IO cancel test + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define FILE_SIZE (128 * 1024) +#define BS 4096 +#define BUFFERS (FILE_SIZE / BS) + +static struct iovec *vecs; + +static int create_buffers(void) +{ + int i; + + vecs = malloc(BUFFERS * sizeof(struct iovec)); + for (i = 0; i < BUFFERS; i++) { + if (posix_memalign(&vecs[i].iov_base, BS, BS)) + return 1; + vecs[i].iov_len = BS; + } + + return 0; +} + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + close(fd); + return ret != FILE_SIZE; +} + +static unsigned long long utime_since(const struct timeval *s, + const struct timeval *e) +{ + long long sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = (e->tv_usec - s->tv_usec); + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= 1000000; + return sec + usec; +} + +static unsigned long long utime_since_now(struct timeval *tv) +{ + struct timeval end; + + gettimeofday(&end, NULL); + return utime_since(tv, &end); +} + +static int start_io(struct io_uring *ring, int fd, int do_write) +{ + struct io_uring_sqe *sqe; + int i, ret; + + for (i = 0; i < BUFFERS; i++) { + off_t offset; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + offset = BS * (rand() % BUFFERS); + if (do_write) { + io_uring_prep_writev(sqe, fd, &vecs[i], 1, offset); + } else { + io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); + } + sqe->user_data = i + 1; + } + + ret = io_uring_submit(ring); + if (ret != BUFFERS) { + fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); + goto err; + } + + return 0; +err: + return 1; +} + +static int wait_io(struct io_uring *ring, unsigned nr_io, int do_partial) +{ + struct io_uring_cqe *cqe; + int i, ret; + + for (i = 0; i < nr_io; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + goto err; + } + if (do_partial && cqe->user_data) { + if (!(cqe->user_data & 1)) { + if (cqe->res != BS) { + fprintf(stderr, "IO %d wasn't cancelled but got error %d\n", (unsigned) cqe->user_data, cqe->res); + goto err; + } + } + } + io_uring_cqe_seen(ring, cqe); + } + return 0; +err: + return 1; + +} + +static int do_io(struct io_uring *ring, int fd, int do_write) +{ + if (start_io(ring, fd, do_write)) + return 1; + if (wait_io(ring, BUFFERS, 0)) + return 1; + return 0; +} + +static int start_cancel(struct io_uring *ring, int do_partial) +{ + struct io_uring_sqe *sqe; + int i, ret, submitted = 0; + + for (i = 0; i < BUFFERS; i++) { + if (do_partial && (i & 1)) + continue; + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + io_uring_prep_cancel(sqe, (void *) (unsigned long) i + 1, 0); + sqe->user_data = 0; + submitted++; + } + + ret = io_uring_submit(ring); + if (ret != submitted) { + fprintf(stderr, "submit got %d, wanted %d\n", ret, submitted); + goto err; + } + return 0; +err: + return 1; +} + +/* + * Test cancels. If 'do_partial' is set, then we only attempt to cancel half of + * the submitted IO. This is done to verify that cancelling one piece of IO doesn't + * impact others. + */ +static int test_io_cancel(const char *file, int do_write, int do_partial) +{ + struct io_uring ring; + struct timeval start_tv; + unsigned long usecs; + unsigned to_wait; + int fd, ret; + + fd = open(file, O_RDWR | O_DIRECT); + if (fd < 0) { + perror("file open"); + goto err; + } + + ret = io_uring_queue_init(4 * BUFFERS, &ring, 0); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + goto err; + } + + if (do_io(&ring, fd, do_write)) + goto err; + gettimeofday(&start_tv, NULL); + if (do_io(&ring, fd, do_write)) + goto err; + usecs = utime_since_now(&start_tv); + + if (start_io(&ring, fd, do_write)) + goto err; + /* sleep for 1/3 of the total time, to allow some to start/complete */ + usleep(usecs / 3); + if (start_cancel(&ring, do_partial)) + goto err; + to_wait = BUFFERS; + if (do_partial) + to_wait += BUFFERS / 2; + else + to_wait += BUFFERS; + if (wait_io(&ring, to_wait, do_partial)) + goto err; + + io_uring_queue_exit(&ring); + close(fd); + return 0; +err: + if (fd != -1) + close(fd); + return 1; +} + +int main(int argc, char *argv[]) +{ + int i, ret; + + if (argc > 1) + return 0; + + if (create_file(".basic-rw")) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + if (create_buffers()) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + + for (i = 0; i < 4; i++) { + int v1 = (i & 1) != 0; + int v2 = (i & 2) != 0; + + ret = test_io_cancel(".basic-rw", v1, v2); + if (ret) { + fprintf(stderr, "test_io_cancel %d %d failed\n", v1, v2); + goto err; + } + } + + unlink(".basic-rw"); + return 0; +err: + unlink(".basic-rw"); + return 1; +} diff --git a/test/io_uring_enter.c b/test/io_uring_enter.c new file mode 100644 index 0000000..1a5ff27 --- /dev/null +++ b/test/io_uring_enter.c @@ -0,0 +1,291 @@ +/* SPDX-License-Identifier: MIT */ +/* + * io_uring_enter.c + * + * Description: Unit tests for the io_uring_enter system call. + * + * Copyright 2019, Red Hat, Inc. + * Author: Jeff Moyer + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" +#include "liburing/barrier.h" +#include "../src/syscall.h" + +#define IORING_MAX_ENTRIES 4096 + +int +expect_failed_submit(struct io_uring *ring, int error) +{ + int ret; + + ret = io_uring_submit(ring); + if (ret == 1) { + printf("expected failure, but io_uring_submit succeeded.\n"); + return 1; + } + + if (errno != error) { + printf("expected %d, got %d\n", error, errno); + return 1; + } + + return 0; +} + +int +expect_fail(int fd, unsigned int to_submit, unsigned int min_complete, + unsigned int flags, sigset_t *sig, int error) +{ + int ret; + + ret = __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig); + if (ret != -1) { + printf("expected %s, but call succeeded\n", strerror(error)); + return 1; + } + + if (errno != error) { + printf("expected %d, got %d\n", error, errno); + return 1; + } + + return 0; +} + +int +try_io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete, + unsigned int flags, sigset_t *sig, int expect, int error) +{ + int ret; + + printf("io_uring_enter(%d, %u, %u, %u, %p)\n", fd, to_submit, + min_complete, flags, sig); + + if (expect == -1) + return expect_fail(fd, to_submit, min_complete, + flags, sig, error); + + ret = __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig); + if (ret != expect) { + printf("Expected %d, got %d\n", expect, errno); + return 1; + } + + return 0; +} + +/* + * prep a read I/O. index is treated like a block number. + */ +int +setup_file(char *template, off_t len) +{ + int fd, ret; + char buf[4096]; + + fd = mkstemp(template); + if (fd < 0) { + perror("mkstemp"); + exit(1); + } + ret = ftruncate(fd, len); + if (ret < 0) { + perror("ftruncate"); + exit(1); + } + + ret = read(fd, buf, 4096); + if (ret != 4096) { + printf("read returned %d, expected 4096\n", ret); + exit(1); + } + + return fd; +} + +void +io_prep_read(struct io_uring_sqe *sqe, int fd, off_t offset, size_t len) +{ + struct iovec *iov; + + iov = malloc(sizeof(*iov)); + assert(iov); + + iov->iov_base = malloc(len); + assert(iov->iov_base); + iov->iov_len = len; + + io_uring_prep_readv(sqe, fd, iov, 1, offset); + io_uring_sqe_set_data(sqe, iov); // free on completion +} + +void +reap_events(struct io_uring *ring, unsigned nr) +{ + int ret; + unsigned left = nr; + struct io_uring_cqe *cqe; + struct iovec *iov; + struct timeval start, now, elapsed; + + printf("Reaping %u I/Os\n", nr); + gettimeofday(&start, NULL); + while (left) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("io_uring_wait_cqe returned %d\n", ret); + printf("expected success\n"); + exit(1); + } + if (cqe->res != 4096) + printf("cqe->res: %d, expected 4096\n", cqe->res); + iov = io_uring_cqe_get_data(cqe); + free(iov->iov_base); + free(iov); + left--; + io_uring_cqe_seen(ring, cqe); + + gettimeofday(&now, NULL); + timersub(&now, &start, &elapsed); + if (elapsed.tv_sec > 10) { + printf("Timed out waiting for I/Os to complete.\n"); + printf("%u expected, %u completed\n", nr, left); + break; + } + } +} + +void +submit_io(struct io_uring *ring, unsigned nr) +{ + int fd, ret; + off_t file_len; + unsigned i; + static char template[32] = "/tmp/io_uring_enter-test.XXXXXX"; + struct io_uring_sqe *sqe; + + printf("Allocating %u sqes\n", nr); + file_len = nr * 4096; + fd = setup_file(template, file_len); + for (i = 0; i < nr; i++) { + /* allocate an sqe */ + sqe = io_uring_get_sqe(ring); + /* fill it in */ + io_prep_read(sqe, fd, i * 4096, 4096); + } + + /* submit the I/Os */ + printf("Submitting %u I/Os\n", nr); + ret = io_uring_submit(ring); + unlink(template); + if (ret < 0) { + perror("io_uring_enter"); + exit(1); + } + printf("Done\n"); +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned int status = 0; + struct io_uring ring; + struct io_uring_sq *sq = &ring.sq; + unsigned ktail, mask, index; + unsigned sq_entries; + unsigned completed, dropped; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(IORING_MAX_ENTRIES, &ring, 0); + if (ret < 0) { + perror("io_uring_queue_init"); + exit(1); + } + mask = *sq->kring_mask; + + /* invalid flags */ + status |= try_io_uring_enter(ring.ring_fd, 1, 0, ~0U, NULL, -1, EINVAL); + + /* invalid fd, EBADF */ + status |= try_io_uring_enter(-1, 0, 0, 0, NULL, -1, EBADF); + + /* valid, non-ring fd, EOPNOTSUPP */ + status |= try_io_uring_enter(0, 0, 0, 0, NULL, -1, EOPNOTSUPP); + + /* to_submit: 0, flags: 0; should get back 0. */ + status |= try_io_uring_enter(ring.ring_fd, 1, 0, 0, NULL, 0, 0); + + /* fill the sq ring */ + sq_entries = *ring.sq.kring_entries; + submit_io(&ring, sq_entries); + printf("Waiting for %u events\n", sq_entries); + ret = __sys_io_uring_enter(ring.ring_fd, 0, sq_entries, + IORING_ENTER_GETEVENTS, NULL); + if (ret < 0) { + perror("io_uring_enter"); + status = 1; + } else { + /* + * This is a non-IOPOLL ring, which means that io_uring_enter + * should not return until min_complete events are available + * in the completion queue. + */ + completed = *ring.cq.ktail - *ring.cq.khead; + if (completed != sq_entries) { + printf("Submitted %u I/Os, but only got %u completions\n", + sq_entries, completed); + status = 1; + } + reap_events(&ring, sq_entries); + } + + /* + * Add an invalid index to the submission queue. This should + * result in the dropped counter increasing. + */ + printf("Submitting invalid sqe index.\n"); + index = *sq->kring_entries + 1; // invalid index + dropped = *sq->kdropped; + ktail = *sq->ktail; + sq->array[ktail & mask] = index; + ++ktail; + /* + * Ensure that the kernel sees the SQE update before it sees the tail + * update. + */ + io_uring_smp_store_release(sq->ktail, ktail); + + ret = __sys_io_uring_enter(ring.ring_fd, 1, 0, 0, NULL); + /* now check to see if our sqe was dropped */ + if (*sq->kdropped == dropped) { + printf("dropped counter did not increase\n"); + status = 1; + } + + if (!status) { + printf("PASS\n"); + return 0; + } + + printf("FAIL\n"); + return -1; +} diff --git a/test/io_uring_register.c b/test/io_uring_register.c new file mode 100644 index 0000000..eb5b971 --- /dev/null +++ b/test/io_uring_register.c @@ -0,0 +1,543 @@ +/* SPDX-License-Identifier: MIT */ +/* + * io_uring_register.c + * + * Description: Unit tests for the io_uring_register system call. + * + * Copyright 2019, Red Hat, Inc. + * Author: Jeff Moyer + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" +#include "../src/syscall.h" + +static int pagesize; +static rlim_t mlock_limit; +static int devnull; + +int +expect_fail(int fd, unsigned int opcode, void *arg, + unsigned int nr_args, int error) +{ + int ret; + + printf("io_uring_register(%d, %u, %p, %u)\n", + fd, opcode, arg, nr_args); + ret = __sys_io_uring_register(fd, opcode, arg, nr_args); + if (ret != -1) { + int ret2 = 0; + + printf("expected %s, but call succeeded\n", strerror(error)); + if (opcode == IORING_REGISTER_BUFFERS) { + ret2 = __sys_io_uring_register(fd, + IORING_UNREGISTER_BUFFERS, 0, 0); + } else if (opcode == IORING_REGISTER_FILES) { + ret2 = __sys_io_uring_register(fd, + IORING_UNREGISTER_FILES, 0, 0); + } + if (ret2) { + printf("internal error: failed to unregister\n"); + exit(1); + } + return 1; + } + + if (errno != error) { + printf("expected %d, got %d\n", error, errno); + return 1; + } + return 0; +} + +int +new_io_uring(int entries, struct io_uring_params *p) +{ + int fd; + + fd = __sys_io_uring_setup(entries, p); + if (fd < 0) { + perror("io_uring_setup"); + exit(1); + } + return fd; +} + +#define MAXFDS (UINT_MAX * sizeof(int)) + +void * +map_filebacked(size_t size) +{ + int fd, ret; + void *addr; + char template[32] = "io_uring_register-test-XXXXXXXX"; + + fd = mkstemp(template); + if (fd < 0) { + perror("mkstemp"); + return NULL; + } + unlink(template); + + ret = ftruncate(fd, size); + if (ret < 0) { + perror("ftruncate"); + close(fd); + return NULL; + } + + addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + close(fd); + return NULL; + } + + close(fd); + return addr; +} + +/* + * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now, + * but probably should augment it to test 253 and 254, specifically. + */ +int +test_max_fds(int uring_fd) +{ + int status = 1; + int ret; + void *fd_as; /* file descriptor address space */ + int fdtable_fd; /* fd for the file that will be mapped over and over */ + int io_fd; /* the valid fd for I/O -- /dev/null */ + int *fds; /* used to map the file into the address space */ + char template[32] = "io_uring_register-test-XXXXXXXX"; + unsigned long long i, nr_maps, nr_fds; + + /* + * First, mmap anonymous the full size. That will guarantee the + * mapping will fit in the memory area selected by mmap. Then, + * over-write that mapping using a file-backed mapping, 128MiB at + * a time using MAP_FIXED. + */ + fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (fd_as == MAP_FAILED) { + if (errno == ENOMEM) { + printf("Not enough memory for this test, skipping\n"); + return 0; + } + perror("mmap fd_as"); + exit(1); + } + printf("allocated %zu bytes of address space\n", UINT_MAX * sizeof(int)); + + fdtable_fd = mkstemp(template); + if (fdtable_fd < 0) { + perror("mkstemp"); + exit(1); + } + unlink(template); + ret = ftruncate(fdtable_fd, 128*1024*1024); + if (ret < 0) { + perror("ftruncate"); + exit(1); + } + + io_fd = open("/dev/null", O_RDWR); + if (io_fd < 0) { + perror("open /dev/null"); + exit(1); + } + fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_FIXED, fdtable_fd, 0); + if (fds == MAP_FAILED) { + perror("mmap fdtable"); + exit(1); + } + + /* fill the fd table */ + nr_fds = 128*1024*1024 / sizeof(int); + for (i = 0; i < nr_fds; i++) + fds[i] = io_fd; + + /* map the file through the rest of the address space */ + nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024); + for (i = 0; i < nr_maps; i++) { + fds = &fds[nr_fds]; /* advance fds by 128MiB */ + fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_FIXED, fdtable_fd, 0); + if (fds == MAP_FAILED) { + printf("mmap failed at offset %lu\n", + (unsigned long)((char *)fd_as - (char *)fds)); + exit(1); + } + } + + /* Now fd_as points to the file descriptor array. */ + /* + * We may not be able to map all of these files. Let's back off + * until success. + */ + nr_fds = UINT_MAX; + while (nr_fds) { + ret = __sys_io_uring_register(uring_fd, IORING_REGISTER_FILES, + fd_as, nr_fds); + if (ret != 0) { + nr_fds /= 2; + continue; + } + printf("io_uring_register(%d, IORING_REGISTER_FILES, %p, %llu)" + "...succeeded\n", uring_fd, fd_as, nr_fds); + status = 0; + printf("io_uring_register(%d, IORING_UNREGISTER_FILES, 0, 0)...", + uring_fd); + ret = __sys_io_uring_register(uring_fd, IORING_UNREGISTER_FILES, + 0, 0); + if (ret < 0) { + ret = errno; + printf("failed\n"); + errno = ret; + perror("io_uring_register UNREGISTER_FILES"); + exit(1); + } + printf("succeeded\n"); + break; + } + + close(io_fd); + close(fdtable_fd); + ret = munmap(fd_as, UINT_MAX * sizeof(int)); + if (ret != 0) { + printf("munmap(%zu) failed\n", UINT_MAX * sizeof(int)); + exit(1); + } + + return status; +} + +int +test_memlock_exceeded(int fd) +{ + int ret; + void *buf; + struct iovec iov; + + /* if limit is larger than 2gb, just skip this test */ + if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL) + return 0; + + iov.iov_len = mlock_limit * 2; + buf = malloc(iov.iov_len); + assert(buf); + iov.iov_base = buf; + + while (iov.iov_len) { + ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); + if (ret < 0) { + if (errno == ENOMEM) { + printf("io_uring_register of %zu bytes failed " + "with ENOMEM (expected).\n", iov.iov_len); + iov.iov_len /= 2; + continue; + } + printf("expected success or EFAULT, got %d\n", errno); + free(buf); + return 1; + } + printf("successfully registered %zu bytes (%d).\n", + iov.iov_len, ret); + ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, + NULL, 0); + if (ret != 0) { + printf("error: unregister failed with %d\n", errno); + free(buf); + return 1; + } + break; + } + if (!iov.iov_len) + printf("Unable to register buffers. Check memlock rlimit.\n"); + + free(buf); + return 0; +} + +int +test_iovec_nr(int fd) +{ + int i, ret, status = 0; + unsigned int nr = UIO_MAXIOV + 1; + struct iovec *iovs; + void *buf; + + buf = malloc(pagesize); + assert(buf); + + iovs = malloc(nr * sizeof(struct iovec)); + assert(iovs); + + for (i = 0; i < nr; i++) { + iovs[i].iov_base = buf; + iovs[i].iov_len = pagesize; + } + + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL); + + /* reduce to UIO_MAXIOV */ + nr--; + printf("io_uring_register(%d, %u, %p, %u)\n", + fd, IORING_REGISTER_BUFFERS, iovs, nr); + ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr); + if (ret != 0) { + printf("expected success, got %d\n", errno); + status = 1; + } else + __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); + + free(buf); + free(iovs); + return status; +} + +/* + * io_uring limit is 1G. iov_len limit is ~OUL, I think + */ +int +test_iovec_size(int fd) +{ + unsigned int status = 0; + int ret; + struct iovec iov; + void *buf; + + /* NULL pointer for base */ + iov.iov_base = 0; + iov.iov_len = 4096; + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); + + /* valid base, 0 length */ + iov.iov_base = &buf; + iov.iov_len = 0; + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); + + /* valid base, length exceeds size */ + /* this requires an unampped page directly after buf */ + buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(buf != MAP_FAILED); + ret = munmap(buf + pagesize, pagesize); + assert(ret == 0); + iov.iov_base = buf; + iov.iov_len = 2 * pagesize; + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); + munmap(buf, pagesize); + + /* huge page */ + buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE, + MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, + -1, 0); + if (buf == MAP_FAILED) { + printf("Unable to map a huge page. Try increasing " + "/proc/sys/vm/nr_hugepages by at least 1.\n"); + printf("Skipping the hugepage test\n"); + } else { + /* + * This should succeed, so long as RLIMIT_MEMLOCK is + * not exceeded + */ + iov.iov_base = buf; + iov.iov_len = 2*1024*1024; + ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); + if (ret < 0) { + if (errno == ENOMEM) + printf("Unable to test registering of a huge " + "page. Try increasing the " + "RLIMIT_MEMLOCK resource limit by at " + "least 2MB."); + else { + printf("expected success, got %d\n", errno); + status = 1; + } + } else { + printf("Success!\n"); + ret = __sys_io_uring_register(fd, + IORING_UNREGISTER_BUFFERS, 0, 0); + if (ret < 0) { + perror("io_uring_unregister"); + status = 1; + } + } + } + ret = munmap(iov.iov_base, iov.iov_len); + assert(ret == 0); + + /* file-backed buffers -- not supported */ + buf = map_filebacked(2*1024*1024); + if (!buf) + status = 1; + iov.iov_base = buf; + iov.iov_len = 2*1024*1024; + printf("reserve file-backed buffers\n"); + status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP); + munmap(buf, 2*1024*1024); + + /* bump up against the soft limit and make sure we get EFAULT + * or whatever we're supposed to get. NOTE: this requires + * running the test as non-root. */ + if (getuid() != 0) + status |= test_memlock_exceeded(fd); + + return status; +} + +void +dump_sqe(struct io_uring_sqe *sqe) +{ + printf("\topcode: %d\n", sqe->opcode); + printf("\tflags: 0x%.8x\n", sqe->flags); + printf("\tfd: %d\n", sqe->fd); + if (sqe->opcode == IORING_OP_POLL_ADD) + printf("\tpoll_events: 0x%.8x\n", sqe->poll_events); +} + +int +ioring_poll(struct io_uring *ring, int fd, int fixed) +{ + int ret; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + + sqe = io_uring_get_sqe(ring); + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_POLL_ADD; + if (fixed) + sqe->flags = IOSQE_FIXED_FILE; + sqe->fd = fd; + sqe->poll_events = POLLIN|POLLOUT; + + printf("io_uring_submit:\n"); + dump_sqe(sqe); + ret = io_uring_submit(ring); + if (ret != 1) { + printf("failed to submit poll sqe: %d.\n", errno); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("io_uring_wait_cqe failed with %d\n", ret); + return 1; + } + ret = 0; + if (cqe->res != POLLOUT) { + printf("io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n", + POLLOUT, cqe->res); + ret = 1; + } + + io_uring_cqe_seen(ring, cqe); + return ret; +} + +int +test_poll_ringfd(void) +{ + int status = 0; + int ret; + int fd; + struct io_uring ring; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + perror("io_uring_queue_init"); + return 1; + } + fd = ring.ring_fd; + + /* try polling the ring fd */ + status = ioring_poll(&ring, fd, 0); + + /* + * now register the ring fd, and try the poll again. This should + * fail, because the kernel does not allow registering of the + * ring_fd. + */ + status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF); + + /* tear down queue */ + io_uring_queue_exit(&ring); + + return status; +} + +int +main(int argc, char **argv) +{ + int fd, ret; + unsigned int status = 0; + struct io_uring_params p; + struct rlimit rlim; + + if (argc > 1) + return 0; + + /* setup globals */ + pagesize = getpagesize(); + ret = getrlimit(RLIMIT_MEMLOCK, &rlim); + if (ret < 0) { + perror("getrlimit"); + return 1; + } + mlock_limit = rlim.rlim_cur; + printf("RELIMIT_MEMLOCK: %lu (%lu)\n", rlim.rlim_cur, rlim.rlim_max); + devnull = open("/dev/null", O_RDWR); + if (devnull < 0) { + perror("open /dev/null"); + exit(1); + } + + /* invalid fd */ + status |= expect_fail(-1, 0, NULL, 0, EBADF); + /* valid fd that is not an io_uring fd */ + status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP); + + /* invalid opcode */ + memset(&p, 0, sizeof(p)); + fd = new_io_uring(1, &p); + ret = expect_fail(fd, ~0U, NULL, 0, EINVAL); + if (ret) { + /* if this succeeds, tear down the io_uring instance + * and start clean for the next test. */ + close(fd); + fd = new_io_uring(1, &p); + } + + /* IORING_REGISTER_BUFFERS */ + status |= test_iovec_size(fd); + status |= test_iovec_nr(fd); + /* IORING_REGISTER_FILES */ + status |= test_max_fds(fd); + close(fd); + /* uring poll on the uring fd */ + status |= test_poll_ringfd(); + + if (!status) + printf("PASS\n"); + else + printf("FAIL\n"); + + return status; +} diff --git a/test/io_uring_setup.c b/test/io_uring_setup.c new file mode 100644 index 0000000..a0709a7 --- /dev/null +++ b/test/io_uring_setup.c @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: MIT */ +/* + * io_uring_setup.c + * + * Description: Unit tests for the io_uring_setup system call. + * + * Copyright 2019, Red Hat, Inc. + * Author: Jeff Moyer + */ +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" + +#include "../syscall.h" + +char *features_string(struct io_uring_params *p) +{ + static char flagstr[64]; + + if (!p || !p->features) + return "none"; + + if (p->features & ~IORING_FEAT_SINGLE_MMAP) { + snprintf(flagstr, 64, "0x%.8x", p->features); + return flagstr; + } + + if (p->features & IORING_FEAT_SINGLE_MMAP) + strncat(flagstr, "IORING_FEAT_SINGLE_MMAP", 64 - strlen(flagstr)); + + return flagstr; +} + +/* + * Attempt the call with the given args. Return 0 when expect matches + * the return value of the system call, 1 otherwise. + */ +char * +flags_string(struct io_uring_params *p) +{ + static char flagstr[64]; + int add_pipe = 0; + + memset(flagstr, 0, sizeof(flagstr)); + + if (!p || p->flags == 0) + return "none"; + + /* + * If unsupported flags are present, just print the bitmask. + */ + if (p->flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | + IORING_SETUP_SQ_AFF)) { + snprintf(flagstr, 64, "0x%.8x", p->flags); + return flagstr; + } + + if (p->flags & IORING_SETUP_IOPOLL) { + strncat(flagstr, "IORING_SETUP_IOPOLL", 64 - strlen(flagstr)); + add_pipe = 1; + } + if (p->flags & IORING_SETUP_SQPOLL) { + if (add_pipe) + strncat(flagstr, "|", 64 - strlen(flagstr)); + else + add_pipe = 1; + strncat(flagstr, "IORING_SETUP_SQPOLL", 64 - strlen(flagstr)); + } + if (p->flags & IORING_SETUP_SQ_AFF) { + if (add_pipe) + strncat(flagstr, "|", 64 - strlen(flagstr)); + strncat(flagstr, "IORING_SETUP_SQ_AFF", 64 - strlen(flagstr)); + } + + return flagstr; +} + +char * +dump_resv(struct io_uring_params *p) +{ + static char resvstr[4096]; + + if (!p) + return ""; + + sprintf(resvstr, "0x%.8x 0x%.8x 0x%.8x", p->resv[0], + p->resv[1], p->resv[2]); + + return resvstr; +} + +/* bogus: setup returns a valid fd on success... expect can't predict the + fd we'll get, so this really only takes 1 parameter: error */ +int +try_io_uring_setup(unsigned entries, struct io_uring_params *p, int expect, int error) +{ + int ret, __errno; + + printf("io_uring_setup(%u, %p), flags: %s, feat: %s, resv: %s, sq_thread_cpu: %u\n", + entries, p, flags_string(p), features_string(p), dump_resv(p), + p ? p->sq_thread_cpu : 0); + + ret = __sys_io_uring_setup(entries, p); + if (ret != expect) { + printf("expected %d, got %d\n", expect, ret); + /* if we got a valid uring, close it */ + if (ret > 0) + close(ret); + return 1; + } + __errno = errno; + if (expect == -1 && error != __errno) { + if (__errno == EPERM && geteuid() != 0) { + printf("Needs root, not flagging as an error\n"); + return 0; + } + printf("expected errno %d, got %d\n", error, __errno); + return 1; + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int fd; + unsigned int status = 0; + struct io_uring_params p; + + if (argc > 1) + return 0; + + memset(&p, 0, sizeof(p)); + status |= try_io_uring_setup(0, &p, -1, EINVAL); + status |= try_io_uring_setup(1, NULL, -1, EFAULT); + + /* resv array is non-zero */ + memset(&p, 0, sizeof(p)); + p.resv[0] = p.resv[1] = p.resv[2] = 1; + status |= try_io_uring_setup(1, &p, -1, EINVAL); + + /* invalid flags */ + memset(&p, 0, sizeof(p)); + p.flags = ~0U; + status |= try_io_uring_setup(1, &p, -1, EINVAL); + + /* IORING_SETUP_SQ_AFF set but not IORING_SETUP_SQPOLL */ + memset(&p, 0, sizeof(p)); + p.flags = IORING_SETUP_SQ_AFF; + status |= try_io_uring_setup(1, &p, -1, EINVAL); + + /* attempt to bind to invalid cpu */ + memset(&p, 0, sizeof(p)); + p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF; + p.sq_thread_cpu = get_nprocs_conf(); + status |= try_io_uring_setup(1, &p, -1, EINVAL); + + /* I think we can limit a process to a set of cpus. I assume + * we shouldn't be able to setup a kernel thread outside of that. + * try to do that. (task->cpus_allowed) */ + + /* read/write on io_uring_fd */ + memset(&p, 0, sizeof(p)); + fd = __sys_io_uring_setup(1, &p); + if (fd < 0) { + printf("io_uring_setup failed with %d, expected success\n", + errno); + status = 1; + } else { + char buf[4096]; + int ret; + ret = read(fd, buf, 4096); + if (ret >= 0) { + printf("read from io_uring fd succeeded. expected fail\n"); + status = 1; + } + } + + if (!status) { + printf("PASS\n"); + return 0; + } + + printf("FAIL\n"); + return -1; +} diff --git a/test/iopoll.c b/test/iopoll.c new file mode 100644 index 0000000..296dad2 --- /dev/null +++ b/test/iopoll.c @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: basic read/write tests with polled IO + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" + +#define FILE_SIZE (128 * 1024) +#define BS 4096 +#define BUFFERS (FILE_SIZE / BS) + +static struct iovec *vecs; +static int no_buf_select; +static int no_iopoll; + +static int create_buffers(void) +{ + int i; + + vecs = malloc(BUFFERS * sizeof(struct iovec)); + for (i = 0; i < BUFFERS; i++) { + if (posix_memalign(&vecs[i].iov_base, BS, BS)) + return 1; + vecs[i].iov_len = BS; + } + + return 0; +} + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + close(fd); + return ret != FILE_SIZE; +} + +static int provide_buffers(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret, i; + + for (i = 0; i < BUFFERS; i++) { + sqe = io_uring_get_sqe(ring); + io_uring_prep_provide_buffers(sqe, vecs[i].iov_base, + vecs[i].iov_len, 1, 1, i); + } + + ret = io_uring_submit(ring); + if (ret != BUFFERS) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + for (i = 0; i < BUFFERS; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (cqe->res < 0) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + return 1; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +} + +static int __test_io(const char *file, struct io_uring *ring, int write, int sqthread, + int fixed, int buf_select) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int open_flags; + int i, fd, ret; + off_t offset; + + if (buf_select && write) + write = 0; + if (buf_select && fixed) + fixed = 0; + + if (buf_select && provide_buffers(ring)) + return 1; + + if (write) + open_flags = O_WRONLY; + else + open_flags = O_RDONLY; + open_flags |= O_DIRECT; + + fd = open(file, open_flags); + if (fd < 0) { + perror("file open"); + goto err; + } + + if (fixed) { + ret = io_uring_register_buffers(ring, vecs, BUFFERS); + if (ret) { + fprintf(stderr, "buffer reg failed: %d\n", ret); + goto err; + } + } + if (sqthread) { + ret = io_uring_register_files(ring, &fd, 1); + if (ret) { + fprintf(stderr, "file reg failed: %d\n", ret); + goto err; + } + } + + offset = 0; + for (i = 0; i < BUFFERS; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + offset = BS * (rand() % BUFFERS); + if (write) { + int do_fixed = fixed; + int use_fd = fd; + + if (sqthread) + use_fd = 0; + if (fixed && (i & 1)) + do_fixed = 0; + if (do_fixed) { + io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, + vecs[i].iov_len, + offset, i); + } else { + io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, + offset); + } + } else { + int do_fixed = fixed; + int use_fd = fd; + + if (sqthread) + use_fd = 0; + if (fixed && (i & 1)) + do_fixed = 0; + if (do_fixed) { + io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, + vecs[i].iov_len, + offset, i); + } else { + io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, + offset); + } + + } + if (sqthread) + sqe->flags |= IOSQE_FIXED_FILE; + if (buf_select) { + sqe->flags |= IOSQE_BUFFER_SELECT; + sqe->buf_group = buf_select; + sqe->user_data = i; + } + } + + ret = io_uring_submit(ring); + if (ret != BUFFERS) { + fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); + goto err; + } + + for (i = 0; i < BUFFERS; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + goto err; + } else if (cqe->res == -EOPNOTSUPP) { + fprintf(stdout, "File/device/fs doesn't support polled IO\n"); + no_iopoll = 1; + break; + } else if (cqe->res != BS) { + fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + if (fixed) { + ret = io_uring_unregister_buffers(ring); + if (ret) { + fprintf(stderr, "buffer unreg failed: %d\n", ret); + goto err; + } + } + if (sqthread) { + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "file unreg failed: %d\n", ret); + goto err; + } + } + + close(fd); +#ifdef VERBOSE + fprintf(stdout, "PASS\n"); +#endif + return 0; +err: +#ifdef VERBOSE + fprintf(stderr, "FAILED\n"); +#endif + if (fd != -1) + close(fd); + return 1; +} + +static int test_io(const char *file, int write, int sqthread, int fixed, + int buf_select) +{ + struct io_uring ring; + int ret, ring_flags; + + ring_flags = IORING_SETUP_IOPOLL; + if (sqthread) + ring_flags |= IORING_SETUP_SQPOLL; + + ret = io_uring_queue_init(64, &ring, ring_flags); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + return 1; + } + + ret = __test_io(file, &ring, write, sqthread, fixed, buf_select); + + io_uring_queue_exit(&ring); + return ret; +} + +static int probe_buf_select(void) +{ + struct io_uring_probe *p; + struct io_uring ring; + int ret; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + return 1; + } + + p = io_uring_get_probe_ring(&ring); + if (!p || !io_uring_opcode_supported(p, IORING_OP_PROVIDE_BUFFERS)) { + no_buf_select = 1; + fprintf(stdout, "Buffer select not supported, skipping\n"); + return 0; + } + free(p); + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, ret, nr; + char *fname; + + if (geteuid()) { + fprintf(stdout, "iopoll requires root, skipping\n"); + return 0; + } + + if (probe_buf_select()) + return 1; + + if (argc > 1) { + fname = argv[1]; + } else { + fname = ".iopoll-rw"; + if (create_file(".iopoll-rw")) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + } + + if (create_buffers()) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + + nr = 16; + if (no_buf_select) + nr = 8; + for (i = 0; i < nr; i++) { + int v1, v2, v3, v4; + + v1 = (i & 1) != 0; + v2 = (i & 2) != 0; + v3 = (i & 4) != 0; + v4 = (i & 8) != 0; + ret = test_io(fname, v1, v2, v3, v4); + if (ret) { + fprintf(stderr, "test_io failed %d/%d/%d/%d\n", v1, v2, v3, v4); + goto err; + } + if (no_iopoll) + break; + } + + if (fname != argv[1]) + unlink(fname); + return 0; +err: + if (fname != argv[1]) + unlink(fname); + return 1; +} diff --git a/test/lfs-openat-write.c b/test/lfs-openat-write.c new file mode 100644 index 0000000..ac35e1b --- /dev/null +++ b/test/lfs-openat-write.c @@ -0,0 +1,117 @@ +#define _LARGEFILE_SOURCE +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const int RSIZE = 2; +static const int OPEN_FLAGS = O_RDWR | O_CREAT; +static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR; + +#define DIE(...) do {\ + fprintf(stderr, __VA_ARGS__);\ + abort();\ + } while(0); + +static int do_write(struct io_uring *ring, int fd, off_t offset) +{ + char buf[] = "some test write buf"; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int res, ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "failed to get sqe\n"); + return 1; + } + io_uring_prep_write(sqe, fd, buf, sizeof(buf), offset); + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "failed to submit write: %s\n", strerror(-ret)); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); + return 1; + } + + res = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (res < 0) { + fprintf(stderr, "write failed: %s\n", strerror(-res)); + return 1; + } + + return 0; +} + +static int test_open_write(struct io_uring *ring, int dfd, const char *fn) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret, fd = -1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "failed to get sqe\n"); + return 1; + } + io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); + return 1; + } + + fd = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (fd < 0) { + fprintf(stderr, "openat failed: %s\n", strerror(-fd)); + return 1; + } + + return do_write(ring, fd, 1ULL << 32); +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int dfd, ret; + + if (argc > 1) + return 0; + + dfd = open("/tmp", O_RDONLY | O_DIRECTORY); + if (dfd < 0) + DIE("open /tmp: %s\n", strerror(errno)); + + ret = io_uring_queue_init(RSIZE, &ring, 0); + if (ret < 0) + DIE("failed to init io_uring: %s\n", strerror(-ret)); + + ret = test_open_write(&ring, dfd, "io_uring_openat_write_test1"); + + io_uring_queue_exit(&ring); + close(dfd); + unlink("/tmp/io_uring_openat_write_test1"); + return ret; +} diff --git a/test/lfs-openat.c b/test/lfs-openat.c new file mode 100644 index 0000000..502743f --- /dev/null +++ b/test/lfs-openat.c @@ -0,0 +1,104 @@ +#define _LARGEFILE_SOURCE +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define DIE(...) do {\ + fprintf(stderr, __VA_ARGS__);\ + abort();\ + } while(0); + +static const int RSIZE = 2; +static const int OPEN_FLAGS = O_RDWR | O_CREAT; +static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR; + +static int open_io_uring(struct io_uring *ring, int dfd, const char *fn) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret, fd; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "failed to get sqe\n"); + return 1; + } + io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + fd = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret < 0) { + fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); + return 1; + } else if (fd < 0) { + fprintf(stderr, "io_uring openat failed: %s\n", strerror(-fd)); + return 1; + } + + close(fd); + return 0; +} + +static int prepare_file(int dfd, const char* fn) +{ + const char buf[] = "foo"; + int fd, res; + + fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE); + if (fd < 0) { + fprintf(stderr, "prepare/open: %s\n", strerror(errno)); + return -1; + } + + res = pwrite(fd, buf, sizeof(buf), 1ull << 32); + if (res < 0) + fprintf(stderr, "prepare/pwrite: %s\n", strerror(errno)); + + close(fd); + return res < 0 ? res : 0; +} + +int main(int argc, char *argv[]) +{ + const char *fn = "io_uring_openat_test"; + int dfd = open("/tmp", O_PATH); + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + if (dfd < 0) + DIE("open /tmp: %s\n", strerror(errno)); + + ret = io_uring_queue_init(RSIZE, &ring, 0); + if (ret < 0) + DIE("failed to init io_uring: %s\n", strerror(-ret)); + + if (prepare_file(dfd, fn)) + return 1; + + ret = open_io_uring(&ring, dfd, fn); + + io_uring_queue_exit(&ring); + close(dfd); + unlink("/tmp/io_uring_openat_test"); + return ret; +} diff --git a/test/link-timeout.c b/test/link-timeout.c new file mode 100644 index 0000000..c9aff11 --- /dev/null +++ b/test/link-timeout.c @@ -0,0 +1,1094 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various linked timeout cases + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int test_fail_lone_link_timeouts(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_link_timeout(sqe, &ts, 0); + ts.tv_sec = 1; + ts.tv_nsec = 0; + sqe->user_data = 1; + sqe->flags |= IOSQE_IO_LINK; + + ret = io_uring_submit(ring); + if (ret != 1) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + + if (cqe->user_data != 1) { + fprintf(stderr, "invalid user data %d\n", cqe->res); + goto err; + } + if (cqe->res != -EINVAL) { + fprintf(stderr, "got %d, wanted -EINVAL\n", cqe->res); + goto err; + } + io_uring_cqe_seen(ring, cqe); + + return 0; +err: + return 1; +} + +static int test_fail_two_link_timeouts(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + ts.tv_sec = 1; + ts.tv_nsec = 0; + + /* + * sqe_1: write destined to fail + * use buf=NULL, to do that during the issuing stage + */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_writev(sqe, 0, NULL, 1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + + /* sqe_2: valid linked timeout */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 2; + sqe->flags |= IOSQE_IO_LINK; + + + /* sqe_3: invalid linked timeout */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 3; + + /* sqe_4: invalid linked timeout */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 4; + + ret = io_uring_submit(ring); + if (ret != 4) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 4; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + + switch (cqe->user_data) { + case 1: + if (cqe->res != -EFAULT && cqe->res != -ECANCELED) { + fprintf(stderr, "write got %d, wanted -EFAULT " + "or -ECANCELED\n", cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Link timeout got %d, wanted -ECACNCELED\n", cqe->res); + goto err; + } + break; + case 3: + /* fall through */ + case 4: + if (cqe->res != -ECANCELED && cqe->res != -EINVAL) { + fprintf(stderr, "Invalid link timeout got %d" + ", wanted -ECACNCELED || -EINVAL\n", cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test linked timeout with timeout (timeoutception) + */ +static int test_single_link_timeout_ception(struct io_uring *ring) +{ + struct __kernel_timespec ts1, ts2; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + ts1.tv_sec = 1; + ts1.tv_nsec = 0; + io_uring_prep_timeout(sqe, &ts1, -1U, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + ts2.tv_sec = 2; + ts2.tv_nsec = 0; + io_uring_prep_link_timeout(sqe, &ts2, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret != 2) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 1: + /* newer kernels allow timeout links */ + if (cqe->res != -EINVAL && cqe->res != -ETIME) { + fprintf(stderr, "Timeout got %d, wanted " + "-EINVAL or -ETIME\n", cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Link timeout got %d, wanted -ECANCELED\n", cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test linked timeout with NOP + */ +static int test_single_link_timeout_nop(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + ts.tv_sec = 1; + ts.tv_nsec = 0; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret != 2) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 1: + if (cqe->res) { + fprintf(stderr, "NOP got %d, wanted 0\n", cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Link timeout got %d, wanted -ECACNCELED\n", cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test read that will not complete, with a linked timeout behind it that + * has errors in the SQE + */ +static int test_single_link_timeout_error(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fds[2], ret, i; + struct iovec iov; + char buffer[128]; + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + ts.tv_sec = 1; + ts.tv_nsec = 0; + io_uring_prep_link_timeout(sqe, &ts, 0); + /* set invalid field, it'll get failed */ + sqe->ioprio = 89; + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret != 2) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 1: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Read got %d, wanted -ECANCELED\n", + cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -EINVAL) { + fprintf(stderr, "Link timeout got %d, wanted -EINVAL\n", cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test read that will complete, with a linked timeout behind it + */ +static int test_single_link_no_timeout(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fds[2], ret, i; + struct iovec iov; + char buffer[128]; + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + ts.tv_sec = 1; + ts.tv_nsec = 0; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 2; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + io_uring_prep_writev(sqe, fds[1], &iov, 1, 0); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret != 3) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 1: + case 3: + if (cqe->res != sizeof(buffer)) { + fprintf(stderr, "R/W got %d, wanted %d\n", cqe->res, + (int) sizeof(buffer)); + goto err; + } + break; + case 2: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Link timeout %d, wanted -ECANCELED\n", + cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test read that will not complete, with a linked timeout behind it + */ +static int test_single_link_timeout(struct io_uring *ring, unsigned nsec) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fds[2], ret, i; + struct iovec iov; + char buffer[128]; + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + ts.tv_sec = 0; + ts.tv_nsec = nsec; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret != 2) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 1: + if (cqe->res != -EINTR && cqe->res != -ECANCELED) { + fprintf(stderr, "Read got %d\n", cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -EALREADY && cqe->res != -ETIME && + cqe->res != 0) { + fprintf(stderr, "Link timeout got %d\n", cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +static int test_timeout_link_chain1(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fds[2], ret, i; + struct iovec iov; + char buffer[128]; + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + ts.tv_sec = 0; + ts.tv_nsec = 1000000; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 2; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret != 3) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 1: + if (cqe->res != -EINTR && cqe->res != -ECANCELED) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 2: + /* FASTPOLL kernels can cancel successfully */ + if (cqe->res != -EALREADY && cqe->res != -ETIME) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 3: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + } + + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +static int test_timeout_link_chain2(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fds[2], ret, i; + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_poll_add(sqe, fds[0], POLLIN); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + ts.tv_sec = 0; + ts.tv_nsec = 1000000; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 2; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 3; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->user_data = 4; + + ret = io_uring_submit(ring); + if (ret != 4) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 4; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + /* poll cancel really should return -ECANCEL... */ + case 1: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -ETIME) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 3: + case 4: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +static int test_timeout_link_chain3(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fds[2], ret, i; + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_poll_add(sqe, fds[0], POLLIN); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + ts.tv_sec = 0; + ts.tv_nsec = 1000000; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 2; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 3; + + /* POLL -> TIMEOUT -> NOP */ + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_poll_add(sqe, fds[0], POLLIN); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 4; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + ts.tv_sec = 0; + ts.tv_nsec = 1000000; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 5; + + /* poll on pipe + timeout */ + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->user_data = 6; + + /* nop */ + + ret = io_uring_submit(ring); + if (ret != 6) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 6; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 2: + if (cqe->res != -ETIME) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 1: + case 3: + case 4: + case 5: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 6: + if (cqe->res) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +static int test_timeout_link_chain4(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int fds[2], ret, i; + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_poll_add(sqe, fds[0], POLLIN); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 2; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + ts.tv_sec = 0; + ts.tv_nsec = 1000000; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret != 3) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + /* poll cancel really should return -ECANCEL... */ + case 1: + if (cqe->res) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + case 3: + if (cqe->res != -ETIME) { + fprintf(stderr, "Req %llu got %d\n", cqe->user_data, + cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +static int test_timeout_link_chain5(struct io_uring *ring) +{ + struct __kernel_timespec ts1, ts2; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + ts1.tv_sec = 1; + ts1.tv_nsec = 0; + io_uring_prep_link_timeout(sqe, &ts1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 2; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + ts2.tv_sec = 2; + ts2.tv_nsec = 0; + io_uring_prep_link_timeout(sqe, &ts2, 0); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret != 3) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + switch (cqe->user_data) { + case 1: + if (cqe->res) { + fprintf(stderr, "Timeout got %d, wanted -EINVAL\n", + cqe->res); + goto err; + } + break; + case 2: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "Link timeout got %d, wanted -ECANCELED\n", cqe->res); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + } + + ret = test_timeout_link_chain1(&ring); + if (ret) { + printf("test_single_link_chain1 failed\n"); + return ret; + } + + ret = test_timeout_link_chain2(&ring); + if (ret) { + printf("test_single_link_chain2 failed\n"); + return ret; + } + + ret = test_timeout_link_chain3(&ring); + if (ret) { + printf("test_single_link_chain3 failed\n"); + return ret; + } + + ret = test_timeout_link_chain4(&ring); + if (ret) { + printf("test_single_link_chain4 failed\n"); + return ret; + } + + ret = test_timeout_link_chain5(&ring); + if (ret) { + printf("test_single_link_chain5 failed\n"); + return ret; + } + + ret = test_single_link_timeout(&ring, 10); + if (ret) { + printf("test_single_link_timeout 10 failed\n"); + return ret; + } + + ret = test_single_link_timeout(&ring, 100000ULL); + if (ret) { + printf("test_single_link_timeout 100000 failed\n"); + return ret; + } + + ret = test_single_link_timeout(&ring, 500000000ULL); + if (ret) { + printf("test_single_link_timeout 500000000 failed\n"); + return ret; + } + + ret = test_single_link_no_timeout(&ring); + if (ret) { + printf("test_single_link_no_timeout failed\n"); + return ret; + } + + ret = test_single_link_timeout_error(&ring); + if (ret) { + printf("test_single_link_timeout_error failed\n"); + return ret; + } + + ret = test_single_link_timeout_nop(&ring); + if (ret) { + printf("test_single_link_timeout_nop failed\n"); + return ret; + } + + ret = test_single_link_timeout_ception(&ring); + if (ret) { + printf("test_single_link_timeout_ception failed\n"); + return ret; + } + + ret = test_fail_lone_link_timeouts(&ring); + if (ret) { + printf("test_fail_lone_link_timeouts failed\n"); + return ret; + } + + ret = test_fail_two_link_timeouts(&ring); + if (ret) { + printf("test_fail_two_link_timeouts failed\n"); + return ret; + } + + return 0; +} diff --git a/test/link.c b/test/link.c new file mode 100644 index 0000000..c89d6b2 --- /dev/null +++ b/test/link.c @@ -0,0 +1,496 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various linked sqe tests + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int no_hardlink; + +/* + * Timer with single nop + */ +static int test_single_hardlink(struct io_uring *ring) +{ + struct __kernel_timespec ts; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + ts.tv_sec = 0; + ts.tv_nsec = 10000000ULL; + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->flags |= IOSQE_IO_LINK | IOSQE_IO_HARDLINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + if (!cqe) { + fprintf(stderr, "failed to get cqe\n"); + goto err; + } + if (no_hardlink) + goto next; + if (cqe->user_data == 1 && cqe->res == -EINVAL) { + fprintf(stdout, "Hard links not supported, skipping\n"); + no_hardlink = 1; + goto next; + } + if (cqe->user_data == 1 && cqe->res != -ETIME) { + fprintf(stderr, "timeout failed with %d\n", cqe->res); + goto err; + } + if (cqe->user_data == 2 && cqe->res) { + fprintf(stderr, "nop failed with %d\n", cqe->res); + goto err; + } +next: + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Timer -> timer -> nop + */ +static int test_double_hardlink(struct io_uring *ring) +{ + struct __kernel_timespec ts1, ts2; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + if (no_hardlink) + return 0; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + ts1.tv_sec = 0; + ts1.tv_nsec = 10000000ULL; + io_uring_prep_timeout(sqe, &ts1, 0, 0); + sqe->flags |= IOSQE_IO_LINK | IOSQE_IO_HARDLINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + ts2.tv_sec = 0; + ts2.tv_nsec = 15000000ULL; + io_uring_prep_timeout(sqe, &ts2, 0, 0); + sqe->flags |= IOSQE_IO_LINK | IOSQE_IO_HARDLINK; + sqe->user_data = 2; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + if (!cqe) { + fprintf(stderr, "failed to get cqe\n"); + goto err; + } + if (cqe->user_data == 1 && cqe->res != -ETIME) { + fprintf(stderr, "timeout failed with %d\n", cqe->res); + goto err; + } + if (cqe->user_data == 2 && cqe->res != -ETIME) { + fprintf(stderr, "timeout failed with %d\n", cqe->res); + goto err; + } + if (cqe->user_data == 3 && cqe->res) { + fprintf(stderr, "nop failed with %d\n", cqe->res); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; + +} + +/* + * Test failing head of chain, and dependent getting -ECANCELED + */ +static int test_single_link_fail(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_peek_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + if (!cqe) { + printf("failed to get cqe\n"); + goto err; + } + if (i == 0 && cqe->res != -EINVAL) { + printf("sqe0 failed with %d, wanted -EINVAL\n", cqe->res); + goto err; + } + if (i == 1 && cqe->res != -ECANCELED) { + printf("sqe1 failed with %d, wanted -ECANCELED\n", cqe->res); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test two independent chains + */ +static int test_double_chain(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 4; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test multiple dependents + */ +static int test_double_link(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test single dependency + */ +static int test_single_link(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + + ret = io_uring_submit(ring); + if (ret <= 0) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("wait completion %d\n", ret); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +static int test_early_fail_and_wait(void) +{ + struct io_uring ring; + struct io_uring_sqe *sqe; + int ret, invalid_fd = 42; + struct iovec iov = { .iov_base = NULL, .iov_len = 0 }; + + /* create a new ring as it leaves it dirty */ + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_readv(sqe, invalid_fd, &iov, 1, 0); + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + printf("get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + + ret = io_uring_submit_and_wait(&ring, 2); + if (ret <= 0 && ret != -EAGAIN) { + printf("sqe submit failed: %d\n", ret); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring, poll_ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + + } + + ret = io_uring_queue_init(8, &poll_ring, IORING_SETUP_IOPOLL); + if (ret) { + printf("poll_ring setup failed\n"); + return 1; + } + + ret = test_single_link(&ring); + if (ret) { + printf("test_single_link failed\n"); + return ret; + } + + ret = test_double_link(&ring); + if (ret) { + printf("test_double_link failed\n"); + return ret; + } + + ret = test_double_chain(&ring); + if (ret) { + printf("test_double_chain failed\n"); + return ret; + } + + ret = test_single_link_fail(&poll_ring); + if (ret) { + printf("test_single_link_fail failed\n"); + return ret; + } + + ret = test_single_hardlink(&ring); + if (ret) { + fprintf(stderr, "test_single_hardlink\n"); + return ret; + } + + ret = test_double_hardlink(&ring); + if (ret) { + fprintf(stderr, "test_double_hardlink\n"); + return ret; + } + + ret = test_early_fail_and_wait(); + if (ret) { + fprintf(stderr, "test_early_fail_and_wait\n"); + return ret; + } + + return 0; +} diff --git a/test/link_drain.c b/test/link_drain.c new file mode 100644 index 0000000..f2eff51 --- /dev/null +++ b/test/link_drain.c @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test io_uring link io with drain io + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int test_link_drain_one(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe[5]; + struct iovec iovecs; + int i, fd, ret; + off_t off = 0; + char data[5] = {0}; + char expect[5] = {0, 1, 2, 3, 4}; + + fd = open("testfile", O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open"); + return 1; + } + + iovecs.iov_base = malloc(4096); + iovecs.iov_len = 4096; + + for (i = 0; i < 5; i++) { + sqe[i] = io_uring_get_sqe(ring); + if (!sqe[i]) { + printf("get sqe failed\n"); + goto err; + } + } + + /* normal heavy io */ + io_uring_prep_writev(sqe[0], fd, &iovecs, 1, off); + sqe[0]->user_data = 0; + + /* link io */ + io_uring_prep_nop(sqe[1]); + sqe[1]->flags |= IOSQE_IO_LINK; + sqe[1]->user_data = 1; + + /* link drain io */ + io_uring_prep_nop(sqe[2]); + sqe[2]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN); + sqe[2]->user_data = 2; + + /* link io */ + io_uring_prep_nop(sqe[3]); + sqe[3]->user_data = 3; + + /* normal nop io */ + io_uring_prep_nop(sqe[4]); + sqe[4]->user_data = 4; + + ret = io_uring_submit(ring); + if (ret < 0) { + printf("sqe submit failed\n"); + goto err; + } else if (ret < 5) { + printf("Submitted only %d\n", ret); + goto err; + } + + for (i = 0; i < 5; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("child: wait completion %d\n", ret); + goto err; + } + + data[i] = cqe->user_data; + io_uring_cqe_seen(ring, cqe); + } + + if (memcmp(data, expect, 5) != 0) + goto err; + + free(iovecs.iov_base); + close(fd); + unlink("testfile"); + return 0; +err: + free(iovecs.iov_base); + close(fd); + unlink("testfile"); + return 1; +} + +int test_link_drain_multi(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe[9]; + struct iovec iovecs; + int i, fd, ret; + off_t off = 0; + char data[9] = {0}; + char expect[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + + fd = open("testfile", O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open"); + return 1; + } + + iovecs.iov_base = malloc(4096); + iovecs.iov_len = 4096; + + for (i = 0; i < 9; i++) { + sqe[i] = io_uring_get_sqe(ring); + if (!sqe[i]) { + printf("get sqe failed\n"); + goto err; + } + } + + /* normal heavy io */ + io_uring_prep_writev(sqe[0], fd, &iovecs, 1, off); + sqe[0]->user_data = 0; + + /* link1 io head */ + io_uring_prep_nop(sqe[1]); + sqe[1]->flags |= IOSQE_IO_LINK; + sqe[1]->user_data = 1; + + /* link1 drain io */ + io_uring_prep_nop(sqe[2]); + sqe[2]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN); + sqe[2]->user_data = 2; + + /* link1 io end*/ + io_uring_prep_nop(sqe[3]); + sqe[3]->user_data = 3; + + /* link2 io head */ + io_uring_prep_nop(sqe[4]); + sqe[4]->flags |= IOSQE_IO_LINK; + sqe[4]->user_data = 4; + + /* link2 io */ + io_uring_prep_nop(sqe[5]); + sqe[5]->flags |= IOSQE_IO_LINK; + sqe[5]->user_data = 5; + + /* link2 drain io */ + io_uring_prep_writev(sqe[6], fd, &iovecs, 1, off); + sqe[6]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN); + sqe[6]->user_data = 6; + + /* link2 io end */ + io_uring_prep_nop(sqe[7]); + sqe[7]->user_data = 7; + + /* normal io */ + io_uring_prep_nop(sqe[8]); + sqe[8]->user_data = 8; + + ret = io_uring_submit(ring); + if (ret < 0) { + printf("sqe submit failed\n"); + goto err; + } else if (ret < 9) { + printf("Submitted only %d\n", ret); + goto err; + } + + for (i = 0; i < 9; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + printf("child: wait completion %d\n", ret); + goto err; + } + + data[i] = cqe->user_data; + io_uring_cqe_seen(ring, cqe); + } + + if (memcmp(data, expect, 9) != 0) + goto err; + + free(iovecs.iov_base); + close(fd); + unlink("testfile"); + return 0; +err: + free(iovecs.iov_base); + close(fd); + unlink("testfile"); + return 1; + +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int i, ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(100, &ring, 0); + if (ret) { + printf("ring setup failed\n"); + return 1; + } + + for (i = 0; i < 1000; i++) { + ret = test_link_drain_one(&ring); + if (ret) { + fprintf(stderr, "test_link_drain_one failed\n"); + break; + } + ret = test_link_drain_multi(&ring); + if (ret) { + fprintf(stderr, "test_link_drain_multi failed\n"); + break; + } + } + + return ret; +} diff --git a/test/madvise.c b/test/madvise.c new file mode 100644 index 0000000..e3af4f1 --- /dev/null +++ b/test/madvise.c @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: basic madvise test + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define FILE_SIZE (128 * 1024) + +#define LOOPS 100 +#define MIN_LOOPS 10 + +static unsigned long long utime_since(const struct timeval *s, + const struct timeval *e) +{ + long long sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = (e->tv_usec - s->tv_usec); + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= 1000000; + return sec + usec; +} + +static unsigned long long utime_since_now(struct timeval *tv) +{ + struct timeval end; + + gettimeofday(&end, NULL); + return utime_since(tv, &end); +} + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + fsync(fd); + close(fd); + return ret != FILE_SIZE; +} + +static int do_madvise(struct io_uring *ring, void *addr, off_t len, int advice) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "failed to get sqe\n"); + return 1; + } + + io_uring_prep_madvise(sqe, addr, len, advice); + sqe->user_data = advice; + ret = io_uring_submit_and_wait(ring, 1); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait: %d\n", ret); + return 1; + } + + ret = cqe->res; + if (ret == -EINVAL || ret == -EBADF) { + fprintf(stdout, "Madvise not supported, skipping\n"); + unlink(".madvise.tmp"); + exit(0); + } else if (ret) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + } + io_uring_cqe_seen(ring, cqe); + return ret; +} + +static long do_copy(int fd, char *buf, void *ptr) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + memcpy(buf, ptr, FILE_SIZE); + return utime_since_now(&tv); +} + +static int test_madvise(struct io_uring *ring, const char *filename) +{ + unsigned long cached_read, uncached_read, cached_read2; + int fd, ret; + char *buf; + void *ptr; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror("open"); + return 1; + } + + buf = malloc(FILE_SIZE); + + ptr = mmap(NULL, FILE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + return 1; + } + + cached_read = do_copy(fd, buf, ptr); + if (cached_read == -1) + return 1; + + cached_read = do_copy(fd, buf, ptr); + if (cached_read == -1) + return 1; + + ret = do_madvise(ring, ptr, FILE_SIZE, MADV_DONTNEED); + if (ret) + return 1; + + uncached_read = do_copy(fd, buf, ptr); + if (uncached_read == -1) + return 1; + + ret = do_madvise(ring, ptr, FILE_SIZE, MADV_DONTNEED); + if (ret) + return 1; + + ret = do_madvise(ring, ptr, FILE_SIZE, MADV_WILLNEED); + if (ret) + return 1; + + msync(ptr, FILE_SIZE, MS_SYNC); + + cached_read2 = do_copy(fd, buf, ptr); + if (cached_read2 == -1) + return 1; + + if (cached_read < uncached_read && + cached_read2 < uncached_read) + return 0; + + return 2; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret, i, good, bad; + char *fname; + + if (argc > 1) { + fname = argv[1]; + } else { + fname = ".madvise.tmp"; + if (create_file(".madvise.tmp")) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + } + + if (io_uring_queue_init(8, &ring, 0)) { + fprintf(stderr, "ring creation failed\n"); + goto err; + } + + good = bad = 0; + for (i = 0; i < LOOPS; i++) { + ret = test_madvise(&ring, fname); + if (ret == 1) { + fprintf(stderr, "test_madvise failed\n"); + goto err; + } else if (!ret) + good++; + else if (ret == 2) + bad++; + if (i >= MIN_LOOPS && !bad) + break; + } + + if (bad > good) + fprintf(stderr, "Suspicious timings (%u > %u)\n", bad, good); + if (fname != argv[1]) + unlink(fname); + io_uring_queue_exit(&ring); + return 0; +err: + if (fname != argv[1]) + unlink(fname); + return 1; +} diff --git a/test/nop.c b/test/nop.c new file mode 100644 index 0000000..82201bd --- /dev/null +++ b/test/nop.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various nop tests + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int test_single_nop(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + + io_uring_cqe_seen(ring, cqe); + return 0; +err: + return 1; +} + +static int test_barrier_nop(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i; + + for (i = 0; i < 8; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + + io_uring_prep_nop(sqe); + if (i == 4) + sqe->flags = IOSQE_IO_DRAIN; + } + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } else if (ret < 8) { + fprintf(stderr, "Submitted only %d\n", ret); + goto err; + } + + for (i = 0; i < 8; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + ret = test_single_nop(&ring); + if (ret) { + fprintf(stderr, "test_single_nop failed\n"); + return ret; + } + + ret = test_barrier_nop(&ring); + if (ret) { + fprintf(stderr, "test_barrier_nop failed\n"); + return ret; + } + + return 0; +} diff --git a/test/open-close.c b/test/open-close.c new file mode 100644 index 0000000..cb74d91 --- /dev/null +++ b/test/open-close.c @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various openat(2) tests + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int create_file(const char *file, size_t size) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(size); + memset(buf, 0xaa, size); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, size); + close(fd); + return ret != size; +} + +static int test_close(struct io_uring *ring, int fd, int is_ring_fd) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_close(sqe, fd); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + if (!(is_ring_fd && ret == -EBADF)) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + return ret; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + return ret; +err: + return -1; +} + +static int test_openat(struct io_uring *ring, const char *path, int dfd) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_openat(sqe, dfd, path, O_RDONLY, 0); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + return ret; +err: + return -1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + const char *path, *path_rel; + int ret, do_unlink; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + if (argc > 1) { + path = "/tmp/.open.close"; + path_rel = argv[1]; + do_unlink = 0; + } else { + path = "/tmp/.open.close"; + path_rel = ".open.close"; + do_unlink = 1; + } + + if (create_file(path, 4096)) { + fprintf(stderr, "file create failed\n"); + return 1; + } + if (do_unlink && create_file(path_rel, 4096)) { + fprintf(stderr, "file create failed\n"); + return 1; + } + + ret = test_openat(&ring, path, -1); + if (ret < 0) { + if (ret == -EINVAL) { + fprintf(stdout, "Open not supported, skipping\n"); + goto done; + } + fprintf(stderr, "test_openat absolute failed: %d\n", ret); + goto err; + } + + ret = test_openat(&ring, path_rel, AT_FDCWD); + if (ret < 0) { + fprintf(stderr, "test_openat relative failed: %d\n", ret); + goto err; + } + + ret = test_close(&ring, ret, 0); + if (ret) { + fprintf(stderr, "test_close normal failed\n"); + goto err; + } + + ret = test_close(&ring, ring.ring_fd, 1); + if (ret != -EBADF) { + fprintf(stderr, "test_close ring_fd failed\n"); + goto err; + } + +done: + unlink(path); + if (do_unlink) + unlink(path_rel); + return 0; +err: + unlink(path); + if (do_unlink) + unlink(path_rel); + return 1; +} diff --git a/test/openat2.c b/test/openat2.c new file mode 100644 index 0000000..197821a --- /dev/null +++ b/test/openat2.c @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various openat(2) tests + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int create_file(const char *file, size_t size) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(size); + memset(buf, 0xaa, size); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, size); + close(fd); + return ret != size; +} + +static int test_openat2(struct io_uring *ring, const char *path, int dfd) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct open_how how; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + memset(&how, 0, sizeof(how)); + how.flags = O_RDONLY; + io_uring_prep_openat2(sqe, dfd, path, &how); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + return ret; +err: + return -1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + const char *path, *path_rel; + int ret, do_unlink; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + if (argc > 1) { + path = "/tmp/.open.close"; + path_rel = argv[1]; + do_unlink = 0; + } else { + path = "/tmp/.open.close"; + path_rel = ".open.close"; + do_unlink = 1; + } + + if (create_file(path, 4096)) { + fprintf(stderr, "file create failed\n"); + return 1; + } + if (do_unlink && create_file(path_rel, 4096)) { + fprintf(stderr, "file create failed\n"); + return 1; + } + + ret = test_openat2(&ring, path, -1); + if (ret < 0) { + if (ret == -EINVAL) { + fprintf(stdout, "openat2 not supported, skipping\n"); + goto done; + } + fprintf(stderr, "test_openat2 absolute failed: %d\n", ret); + goto err; + } + + ret = test_openat2(&ring, path_rel, AT_FDCWD); + if (ret < 0) { + fprintf(stderr, "test_openat2 relative failed: %d\n", ret); + goto err; + } + +done: + unlink(path); + if (do_unlink) + unlink(path_rel); + return 0; +err: + unlink(path); + if (do_unlink) + unlink(path_rel); + return 1; +} diff --git a/test/personality.c b/test/personality.c new file mode 100644 index 0000000..591ec83 --- /dev/null +++ b/test/personality.c @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test if personalities work + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define FNAME "/tmp/.tmp.access" +#define USE_UID 1000 + +static int no_personality; + +static int open_file(struct io_uring *ring, int cred_id, int with_link) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret, i, to_submit = 1; + + if (with_link) { + sqe = io_uring_get_sqe(ring); + io_uring_prep_nop(sqe); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + to_submit++; + } + + sqe = io_uring_get_sqe(ring); + io_uring_prep_openat(sqe, -1, FNAME, O_RDONLY, 0); + sqe->user_data = 2; + + if (cred_id != -1) + sqe->personality = cred_id; + + ret = io_uring_submit(ring); + if (ret != to_submit) { + fprintf(stderr, "submit got: %d\n", ret); + goto err; + } + + for (i = 0; i < to_submit; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + } +err: + return ret; +} + +static int test_personality(struct io_uring *ring) +{ + int ret, cred_id; + + ret = io_uring_register_personality(ring); + if (ret < 0) { + if (ret == -EINVAL) { + fprintf(stdout, "Personalities not supported, skipping\n"); + no_personality = 1; + goto out; + } + fprintf(stderr, "register_personality: %d\n", ret); + goto err; + } + cred_id = ret; + + /* create file only owner can open */ + ret = open(FNAME, O_RDONLY | O_CREAT, 0600); + if (ret < 0) { + perror("open"); + goto err; + } + close(ret); + + /* verify we can open it */ + ret = open_file(ring, -1, 0); + if (ret < 0) { + fprintf(stderr, "current open got: %d\n", ret); + goto err; + } + + if (seteuid(USE_UID) < 0) { + fprintf(stdout, "Can't switch to UID %u, skipping\n", USE_UID); + goto out; + } + + /* verify we can't open it with current credentials */ + ret = open_file(ring, -1, 0); + if (ret != -EACCES) { + fprintf(stderr, "open got: %d\n", ret); + goto err; + } + + /* verify we can open with registered credentials */ + ret = open_file(ring, cred_id, 0); + if (ret < 0) { + fprintf(stderr, "credential open: %d\n", ret); + goto err; + } + close(ret); + + /* verify we can open with registered credentials and as a link */ + ret = open_file(ring, cred_id, 1); + if (ret < 0) { + fprintf(stderr, "credential open: %d\n", ret); + goto err; + } + + if (seteuid(0)) + perror("seteuid"); + + ret = io_uring_unregister_personality(ring, cred_id); + if (ret) { + fprintf(stderr, "register_personality: %d\n", ret); + goto err; + } + +out: + unlink(FNAME); + return 0; +err: + unlink(FNAME); + return 1; +} + +static int test_invalid_personality(struct io_uring *ring) +{ + int ret; + + ret = open_file(ring, 2, 0); + if (ret != -EINVAL) { + fprintf(stderr, "invalid personality got: %d\n", ret); + goto err; + } + return 0; +err: + return 1; +} + +static int test_invalid_unregister(struct io_uring *ring) +{ + int ret; + + ret = io_uring_unregister_personality(ring, 2); + if (ret != -EINVAL) { + fprintf(stderr, "invalid personality unregister got: %d\n", ret); + goto err; + } + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + if (geteuid()) { + fprintf(stderr, "Not root, skipping\n"); + return 0; + } + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + ret = test_personality(&ring); + if (ret) { + fprintf(stderr, "test_personality failed\n"); + return ret; + } + if (no_personality) + return 0; + + ret = test_invalid_personality(&ring); + if (ret) { + fprintf(stderr, "test_invalid_personality failed\n"); + return ret; + } + + ret = test_invalid_unregister(&ring); + if (ret) { + fprintf(stderr, "test_invalid_unregister failed\n"); + return ret; + } + + return 0; +} diff --git a/test/poll-cancel-ton.c b/test/poll-cancel-ton.c new file mode 100644 index 0000000..1a75463 --- /dev/null +++ b/test/poll-cancel-ton.c @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test massive amounts of poll with cancel + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define POLL_COUNT 30000 + +static void *sqe_index[POLL_COUNT]; + +static int reap_events(struct io_uring *ring, unsigned nr_events, int nowait) +{ + struct io_uring_cqe *cqe; + int i, ret = 0; + + for (i = 0; i < nr_events; i++) { + if (!i && !nowait) + ret = io_uring_wait_cqe(ring, &cqe); + else + ret = io_uring_peek_cqe(ring, &cqe); + if (ret) { + if (ret != -EAGAIN) + fprintf(stderr, "cqe peek failed: %d\n", ret); + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return i ? i : ret; +} + +static int del_polls(struct io_uring *ring, int fd, int nr) +{ + int batch, i, ret; + struct io_uring_sqe *sqe; + + while (nr) { + batch = 1024; + if (batch > nr) + batch = nr; + + for (i = 0; i < batch; i++) { + void *data; + + sqe = io_uring_get_sqe(ring); + data = sqe_index[lrand48() % nr]; + io_uring_prep_poll_remove(sqe, data); + } + + ret = io_uring_submit(ring); + if (ret != batch) { + fprintf(stderr, "%s: failed submit, %d\n", __FUNCTION__, ret); + return 1; + } + nr -= batch; + ret = reap_events(ring, 2 * batch, 0); + } + return 0; +} + +static int add_polls(struct io_uring *ring, int fd, int nr) +{ + int pending, batch, i, count, ret; + struct io_uring_sqe *sqe; + + pending = count = 0; + while (nr) { + batch = 1024; + if (batch > nr) + batch = nr; + + for (i = 0; i < batch; i++) { + sqe = io_uring_get_sqe(ring); + io_uring_prep_poll_add(sqe, fd, POLLIN); + sqe_index[count++] = sqe; + sqe->user_data = (unsigned long) sqe; + } + + ret = io_uring_submit(ring); + if (ret != batch) { + fprintf(stderr, "%s: failed submit, %d\n", __FUNCTION__, ret); + return 1; + } + nr -= batch; + pending += batch; + reap_events(ring, batch, 1); + } + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int pipe1[2]; + int ret; + + if (argc > 1) + return 0; + + if (pipe(pipe1) != 0) { + perror("pipe"); + return 1; + } + + ret = io_uring_queue_init(1024, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + add_polls(&ring, pipe1[0], 30000); +#if 0 + usleep(1000); +#endif + del_polls(&ring, pipe1[0], 30000); + + io_uring_queue_exit(&ring); + return 0; +} diff --git a/test/poll-cancel.c b/test/poll-cancel.c new file mode 100644 index 0000000..a74e915 --- /dev/null +++ b/test/poll-cancel.c @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test io_uring poll cancel handling + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +struct poll_data { + unsigned is_poll; + unsigned is_cancel; +}; + +static void sig_alrm(int sig) +{ + fprintf(stderr, "Timed out!\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int pipe1[2]; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct poll_data *pd, pds[2]; + struct sigaction act; + int ret; + + if (argc > 1) + return 0; + + if (pipe(pipe1) != 0) { + perror("pipe"); + return 1; + } + + ret = io_uring_queue_init(2, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + memset(&act, 0, sizeof(act)); + act.sa_handler = sig_alrm; + act.sa_flags = SA_RESTART; + sigaction(SIGALRM, &act, NULL); + alarm(1); + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + return 1; + } + + io_uring_prep_poll_add(sqe, pipe1[0], POLLIN); + + pds[0].is_poll = 1; + pds[0].is_cancel = 0; + io_uring_sqe_set_data(sqe, &pds[0]); + + ret = io_uring_submit(&ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed\n"); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + return 1; + } + + pds[1].is_poll = 0; + pds[1].is_cancel = 1; + io_uring_prep_poll_remove(sqe, &pds[0]); + io_uring_sqe_set_data(sqe, &pds[1]); + + ret = io_uring_submit(&ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + return 1; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait cqe failed: %d\n", ret); + return 1; + } + + pd = io_uring_cqe_get_data(cqe); + if (pd->is_poll && cqe->res != -ECANCELED) { + fprintf(stderr ,"sqe (add=%d/remove=%d) failed with %ld\n", + pd->is_poll, pd->is_cancel, + (long) cqe->res); + return 1; + } else if (pd->is_cancel && cqe->res) { + fprintf(stderr, "sqe (add=%d/remove=%d) failed with %ld\n", + pd->is_poll, pd->is_cancel, + (long) cqe->res); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait_cqe: %d\n", ret); + return 1; + } + + pd = io_uring_cqe_get_data(cqe); + if (pd->is_poll && cqe->res != -ECANCELED) { + fprintf(stderr, "sqe (add=%d/remove=%d) failed with %ld\n", + pd->is_poll, pd->is_cancel, + (long) cqe->res); + return 1; + } else if (pd->is_cancel && cqe->res) { + fprintf(stderr, "sqe (add=%d/remove=%d) failed with %ld\n", + pd->is_poll, pd->is_cancel, + (long) cqe->res); + return 1; + } + + io_uring_cqe_seen(&ring, cqe); + return 0; +} diff --git a/test/poll-link.c b/test/poll-link.c new file mode 100644 index 0000000..d0786d4 --- /dev/null +++ b/test/poll-link.c @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; + +static int recv_thread_ready = 0; +static int recv_thread_done = 0; + +static void signal_var(int *var) +{ + pthread_mutex_lock(&mutex); + *var = 1; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); +} + +static void wait_for_var(int *var) +{ + pthread_mutex_lock(&mutex); + + while (!*var) + pthread_cond_wait(&cond, &mutex); + + pthread_mutex_unlock(&mutex); +} + +struct data { + unsigned expected[2]; + unsigned is_mask[2]; + unsigned long timeout; + int port; + int stop; +}; + +static void *send_thread(void *arg) +{ + struct data *data = arg; + + wait_for_var(&recv_thread_ready); + + int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + assert(s0 != -1); + + struct sockaddr_in addr; + + addr.sin_family = AF_INET; + addr.sin_port = data->port; + addr.sin_addr.s_addr = 0x0100007fU; + + if (connect(s0, (struct sockaddr*)&addr, sizeof(addr)) != -1) + wait_for_var(&recv_thread_done); + + close(s0); + return 0; +} + +void *recv_thread(void *arg) +{ + struct data *data = arg; + struct io_uring_sqe *sqe; + struct io_uring ring; + int i; + + assert(io_uring_queue_init(8, &ring, 0) == 0); + + int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + assert(s0 != -1); + + int32_t val = 1; + assert(setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) != -1); + assert(setsockopt(s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) != -1); + + struct sockaddr_in addr; + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = 0x0100007fU; + + i = 0; + do { + data->port = 1025 + (rand() % 64510); + addr.sin_port = data->port; + + if (bind(s0, (struct sockaddr*)&addr, sizeof(addr)) != -1) + break; + } while (++i < 100); + + if (i >= 100) { + fprintf(stderr, "Can't find good port, skipped\n"); + data->stop = 1; + signal_var(&recv_thread_ready); + goto out; + } + + assert(listen(s0, 128) != -1); + + signal_var(&recv_thread_ready); + + sqe = io_uring_get_sqe(&ring); + assert(sqe != NULL); + + io_uring_prep_poll_add(sqe, s0, POLLIN | POLLHUP | POLLERR); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(&ring); + assert(sqe != NULL); + + struct __kernel_timespec ts; + ts.tv_sec = data->timeout / 1000000000; + ts.tv_nsec = data->timeout % 1000000000; + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 2; + + assert(io_uring_submit(&ring) == 2); + + for (i = 0; i < 2; i++) { + struct io_uring_cqe *cqe; + int idx; + + if (io_uring_wait_cqe(&ring, &cqe)) { + fprintf(stderr, "wait cqe failed\n"); + goto err; + } + idx = cqe->user_data - 1; + if (data->is_mask[idx] && !(data->expected[idx] & cqe->res)) { + fprintf(stderr, "cqe %llu got %x, wanted mask %x\n", + cqe->user_data, cqe->res, + data->expected[idx]); + goto err; + } else if (!data->is_mask[idx] && cqe->res != data->expected[idx]) { + fprintf(stderr, "cqe %llu got %d, wanted %d\n", + cqe->user_data, cqe->res, + data->expected[idx]); + goto err; + } + io_uring_cqe_seen(&ring, cqe); + } + +out: + signal_var(&recv_thread_done); + close(s0); + io_uring_queue_exit(&ring); + return NULL; +err: + signal_var(&recv_thread_done); + close(s0); + io_uring_queue_exit(&ring); + return (void *) 1; +} + +static int test_poll_timeout(int do_connect, unsigned long timeout) +{ + pthread_t t1, t2; + struct data d; + void *tret; + int ret = 0; + + recv_thread_ready = 0; + recv_thread_done = 0; + + memset(&d, 0, sizeof(d)); + d.timeout = timeout; + if (!do_connect) { + d.expected[0] = -ECANCELED; + d.expected[1] = -ETIME; + } else { + d.expected[0] = POLLIN; + d.is_mask[0] = 1; + d.expected[1] = -ECANCELED; + } + + pthread_create(&t1, NULL, recv_thread, &d); + + if (do_connect) + pthread_create(&t2, NULL, send_thread, &d); + + pthread_join(t1, &tret); + if (tret) + ret++; + + if (do_connect) { + pthread_join(t2, &tret); + if (tret) + ret++; + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + if (argc > 1) + return 0; + + srand(getpid()); + + if (test_poll_timeout(0, 200000000)) { + fprintf(stderr, "poll timeout 0 failed\n"); + return 1; + } + + if (test_poll_timeout(1, 1000000000)) { + fprintf(stderr, "poll timeout 1 failed\n"); + return 1; + } + + return 0; +} diff --git a/test/poll-many.c b/test/poll-many.c new file mode 100644 index 0000000..723a353 --- /dev/null +++ b/test/poll-many.c @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test many files being polled for + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define NFILES 5000 +#define BATCH 500 +#define NLOOPS 1000 + +#define RING_SIZE 512 + +struct p { + int fd[2]; + int triggered; +}; + +static struct p p[NFILES]; + +static int arm_poll(struct io_uring *ring, int off) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "failed getting sqe\n"); + return 1; + } + + io_uring_prep_poll_add(sqe, p[off].fd[0], POLLIN); + sqe->user_data = off; + return 0; +} + +static int reap_polls(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + int i, ret, off; + char c; + + for (i = 0; i < BATCH; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait cqe %d\n", ret); + return ret; + } + off = cqe->user_data; + p[off].triggered = 0; + ret = read(p[off].fd[0], &c, 1); + if (ret != 1) { + fprintf(stderr, "read got %d/%d\n", ret, errno); + break; + } + if (arm_poll(ring, off)) + break; + io_uring_cqe_seen(ring, cqe); + } + + if (i != BATCH) { + fprintf(stderr, "gave up at %d\n", i); + return 1; + } + + ret = io_uring_submit(ring); + if (ret != BATCH) { + fprintf(stderr, "submitted %d, %d\n", ret, BATCH); + return 1; + } + + return 0; +} + +static int trigger_polls(void) +{ + char c = 89; + int i, ret; + + for (i = 0; i < BATCH; i++) { + int off; + + do { + off = rand() % NFILES; + if (!p[off].triggered) + break; + } while (1); + + p[off].triggered = 1; + ret = write(p[off].fd[1], &c, 1); + if (ret != 1) { + fprintf(stderr, "write got %d/%d\n", ret, errno); + return 1; + } + } + + return 0; +} + +static int arm_polls(struct io_uring *ring) +{ + int ret, to_arm = NFILES, i, off; + + off = 0; + while (to_arm) { + int this_arm; + + this_arm = to_arm; + if (this_arm > RING_SIZE) + this_arm = RING_SIZE; + + for (i = 0; i < this_arm; i++) { + if (arm_poll(ring, off)) { + fprintf(stderr, "arm failed at %d\n", off); + return 1; + } + off++; + } + + ret = io_uring_submit(ring); + if (ret != this_arm) { + fprintf(stderr, "submitted %d, %d\n", ret, this_arm); + return 1; + } + to_arm -= this_arm; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + struct rlimit rlim; + int i, ret; + + if (argc > 1) + return 0; + + if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { + perror("getrlimit"); + goto err_noring; + } + + if (rlim.rlim_cur < (2 * NFILES + 5)) { + rlim.rlim_cur = (2 * NFILES + 5); + rlim.rlim_max = rlim.rlim_cur; + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { + if (errno == EPERM) + goto err_nofail; + perror("setrlimit"); + goto err_noring; + } + } + + for (i = 0; i < NFILES; i++) { + if (pipe(p[i].fd) < 0) { + perror("pipe"); + goto err_noring; + } + } + + if (io_uring_queue_init(RING_SIZE, &ring, 0)) { + fprintf(stderr, "failed ring init\n"); + goto err_noring; + } + + if (arm_polls(&ring)) + goto err; + + for (i = 0; i < NLOOPS; i++) { + trigger_polls(); + ret = reap_polls(&ring); + if (ret) + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); +err_noring: + fprintf(stderr, "poll-many failed\n"); + return 1; +err_nofail: + fprintf(stderr, "poll-many: not enough files available (and not root), " + "skipped\n"); + return 0; +} diff --git a/test/poll-v-poll.c b/test/poll-v-poll.c new file mode 100644 index 0000000..c8ba6f1 --- /dev/null +++ b/test/poll-v-poll.c @@ -0,0 +1,353 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test io_uring poll handling + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +struct thread_data { + struct io_uring *ring; + int fd; + int events; + const char *test; + int out[2]; +}; + +static void *epoll_wait_fn(void *data) +{ + struct thread_data *td = data; + struct epoll_event ev; + + if (epoll_wait(td->fd, &ev, 1, -1) < 0) { + perror("epoll_wait"); + goto err; + } + + return NULL; +err: + return (void *) 1; +} + +static void *iou_poll(void *data) +{ + struct thread_data *td = data; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret; + + sqe = io_uring_get_sqe(td->ring); + io_uring_prep_poll_add(sqe, td->fd, td->events); + + ret = io_uring_submit(td->ring); + if (ret != 1) { + fprintf(stderr, "submit got %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(td->ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe: %d\n", ret); + goto err; + } + + td->out[0] = cqe->res & 0x3f; + io_uring_cqe_seen(td->ring, cqe); + return NULL; +err: + return (void *) 1; +} + +static void *poll_pipe(void *data) +{ + struct thread_data *td = data; + struct pollfd pfd; + int ret; + + pfd.fd = td->fd; + pfd.events = td->events; + + ret = poll(&pfd, 1, -1); + if (ret < 0) + perror("poll"); + + td->out[1] = pfd.revents; + return NULL; +} + +static int do_pipe_pollin_test(struct io_uring *ring) +{ + struct thread_data td; + pthread_t threads[2]; + int ret, pipe1[2]; + char buf; + + if (pipe(pipe1) < 0) { + perror("pipe"); + return 1; + } + + td.ring = ring; + td.fd = pipe1[0]; + td.events = POLLIN; + td.test = __FUNCTION__; + + pthread_create(&threads[1], NULL, iou_poll, &td); + pthread_create(&threads[0], NULL, poll_pipe, &td); + usleep(100000); + + buf = 0x89; + ret = write(pipe1[1], &buf, sizeof(buf)); + if (ret != sizeof(buf)) { + fprintf(stderr, "write failed: %d\n", ret); + return 1; + } + + pthread_join(threads[0], NULL); + pthread_join(threads[1], NULL); + + if (td.out[0] != td.out[1]) { + fprintf(stderr, "%s: res %x/%x differ\n", __FUNCTION__, + td.out[0], td.out[1]); + return 1; + } + return 0; +} + +static int do_pipe_pollout_test(struct io_uring *ring) +{ + struct thread_data td; + pthread_t threads[2]; + int ret, pipe1[2]; + char buf; + + if (pipe(pipe1) < 0) { + perror("pipe"); + return 1; + } + + td.ring = ring; + td.fd = pipe1[1]; + td.events = POLLOUT; + td.test = __FUNCTION__; + + pthread_create(&threads[0], NULL, poll_pipe, &td); + pthread_create(&threads[1], NULL, iou_poll, &td); + usleep(100000); + + buf = 0x89; + ret = write(pipe1[1], &buf, sizeof(buf)); + if (ret != sizeof(buf)) { + fprintf(stderr, "write failed: %d\n", ret); + return 1; + } + + pthread_join(threads[0], NULL); + pthread_join(threads[1], NULL); + + if (td.out[0] != td.out[1]) { + fprintf(stderr, "%s: res %x/%x differ\n", __FUNCTION__, + td.out[0], td.out[1]); + return 1; + } + + return 0; +} + +static int do_fd_test(struct io_uring *ring, const char *fname, int events) +{ + struct thread_data td; + pthread_t threads[2]; + int fd; + + fd = open(fname, O_RDONLY); + if (fd < 0) { + perror("open"); + return 1; + } + + td.ring = ring; + td.fd = fd; + td.events = events; + td.test = __FUNCTION__; + + pthread_create(&threads[0], NULL, poll_pipe, &td); + pthread_create(&threads[1], NULL, iou_poll, &td); + + pthread_join(threads[0], NULL); + pthread_join(threads[1], NULL); + + if (td.out[0] != td.out[1]) { + fprintf(stderr, "%s: res %x/%x differ\n", __FUNCTION__, + td.out[0], td.out[1]); + return 1; + } + + return 0; +} + +static int iou_epoll_ctl(struct io_uring *ring, int epfd, int fd, + struct epoll_event *ev) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "Failed to get sqe\n"); + return 1; + } + + io_uring_prep_epoll_ctl(sqe, epfd, fd, EPOLL_CTL_ADD, ev); + + ret = io_uring_submit(ring); + if (ret != 1) { + fprintf(stderr, "submit: %d\n", ret); + return 1; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe: %d\n", ret); + return 1; + } + + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + return ret; +} + +static int do_test_epoll(struct io_uring *ring, int iou_epoll_add) +{ + struct epoll_event ev; + struct thread_data td; + pthread_t threads[2]; + int ret, pipe1[2]; + char buf; + int fd; + + fd = epoll_create1(0); + if (fd < 0) { + perror("epoll_create"); + return 1; + } + + if (pipe(pipe1) < 0) { + perror("pipe"); + return 1; + } + + ev.events = EPOLLIN; + ev.data.fd = pipe1[0]; + + if (!iou_epoll_add) { + if (epoll_ctl(fd, EPOLL_CTL_ADD, pipe1[0], &ev) < 0) { + perror("epoll_ctrl"); + return 1; + } + } else { + ret = iou_epoll_ctl(ring, fd, pipe1[0], &ev); + if (ret == -EINVAL) { + fprintf(stdout, "epoll not supported, skipping\n"); + return 0; + } else if (ret < 0) { + return 1; + } + } + + td.ring = ring; + td.fd = fd; + td.events = POLLIN; + td.test = __FUNCTION__; + + pthread_create(&threads[0], NULL, iou_poll, &td); + pthread_create(&threads[1], NULL, epoll_wait_fn, &td); + usleep(100000); + + buf = 0x89; + ret = write(pipe1[1], &buf, sizeof(buf)); + if (ret != sizeof(buf)) { + fprintf(stderr, "write failed: %d\n", ret); + return 1; + } + + pthread_join(threads[0], NULL); + pthread_join(threads[1], NULL); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + const char *fname; + int ret; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + ret = do_pipe_pollin_test(&ring); + if (ret) { + fprintf(stderr, "pipe pollin test failed\n"); + return ret; + } + + ret = do_pipe_pollout_test(&ring); + if (ret) { + fprintf(stderr, "pipe pollout test failed\n"); + return ret; + } + + ret = do_test_epoll(&ring, 0); + if (ret) { + fprintf(stderr, "epoll test 0 failed\n"); + return ret; + } + + ret = do_test_epoll(&ring, 1); + if (ret) { + fprintf(stderr, "epoll test 1 failed\n"); + return ret; + } + + if (argc > 1) + fname = argv[1]; + else + fname = argv[0]; + + ret = do_fd_test(&ring, fname, POLLIN); + if (ret) { + fprintf(stderr, "fd test IN failed\n"); + return ret; + } + + ret = do_fd_test(&ring, fname, POLLOUT); + if (ret) { + fprintf(stderr, "fd test OUT failed\n"); + return ret; + } + + ret = do_fd_test(&ring, fname, POLLOUT | POLLIN); + if (ret) { + fprintf(stderr, "fd test IN|OUT failed\n"); + return ret; + } + + return 0; + +} diff --git a/test/poll.c b/test/poll.c new file mode 100644 index 0000000..f9a89d0 --- /dev/null +++ b/test/poll.c @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test io_uring poll handling + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static void sig_alrm(int sig) +{ + fprintf(stderr, "Timed out!\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct io_uring ring; + int pipe1[2]; + pid_t p; + int ret; + + if (argc > 1) + return 0; + + if (pipe(pipe1) != 0) { + perror("pipe"); + return 1; + } + + p = fork(); + switch (p) { + case -1: + perror("fork"); + exit(2); + case 0: { + struct sigaction act; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + fprintf(stderr, "child: ring setup failed: %d\n", ret); + return 1; + } + + memset(&act, 0, sizeof(act)); + act.sa_handler = sig_alrm; + act.sa_flags = SA_RESTART; + sigaction(SIGALRM, &act, NULL); + alarm(1); + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + return 1; + } + + io_uring_prep_poll_add(sqe, pipe1[0], POLLIN); + io_uring_sqe_set_data(sqe, sqe); + + ret = io_uring_submit(&ring); + if (ret <= 0) { + fprintf(stderr, "child: sqe submit failed: %d\n", ret); + return 1; + } + + do { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "child: wait completion %d\n", ret); + break; + } + io_uring_cqe_seen(&ring, cqe); + } while (ret != 0); + + if (ret < 0) + return 1; + if (cqe->user_data != (unsigned long) sqe) { + fprintf(stderr, "child: cqe doesn't match sqe\n"); + return 1; + } + if ((cqe->res & POLLIN) != POLLIN) { + fprintf(stderr, "child: bad return value %ld\n", + (long) cqe->res); + return 1; + } + exit(0); + } + default: + do { + errno = 0; + ret = write(pipe1[1], "foo", 3); + } while (ret == -1 && errno == EINTR); + + if (ret != 3) { + fprintf(stderr, "parent: bad write return %d\n", ret); + return 1; + } + return 0; + } +} diff --git a/test/probe.c b/test/probe.c new file mode 100644 index 0000000..1961176 --- /dev/null +++ b/test/probe.c @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test IORING_REGISTER_PROBE + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int no_probe; + +static int verify_probe(struct io_uring_probe *p, int full) +{ + if (!full && p->ops_len) { + fprintf(stderr, "Got ops_len=%u\n", p->ops_len); + return 1; + } + if (!p->last_op) { + fprintf(stderr, "Got last_op=%u\n", p->last_op); + return 1; + } + if (!full) + return 0; + /* check a few ops that must be supported */ + if (!(p->ops[IORING_OP_NOP].flags & IO_URING_OP_SUPPORTED)) { + fprintf(stderr, "NOP not supported!?\n"); + return 1; + } + if (!(p->ops[IORING_OP_READV].flags & IO_URING_OP_SUPPORTED)) { + fprintf(stderr, "READV not supported!?\n"); + return 1; + } + if (!(p->ops[IORING_OP_WRITE].flags & IO_URING_OP_SUPPORTED)) { + fprintf(stderr, "READV not supported!?\n"); + return 1; + } + + return 0; +} + +static int test_probe_helper(struct io_uring *ring) +{ + struct io_uring_probe *p; + + p = io_uring_get_probe_ring(ring); + if (!p) { + fprintf(stderr, "Failed getting probe data\n"); + return 1; + } + + if (verify_probe(p, 1)) { + free(p); + return 1; + } + + return 0; +} + +static int test_probe(struct io_uring *ring) +{ + struct io_uring_probe *p; + size_t len; + int ret; + + len = sizeof(*p) + 256 * sizeof(struct io_uring_probe_op); + p = calloc(1, len); + ret = io_uring_register_probe(ring, p, 0); + if (ret == -EINVAL) { + fprintf(stdout, "Probe not supported, skipping\n"); + no_probe = 1; + goto out; + } else if (ret) { + fprintf(stdout, "Probe returned %d\n", ret); + goto err; + } + + if (verify_probe(p, 0)) + goto err; + + /* now grab for all entries */ + memset(p, 0, len); + ret = io_uring_register_probe(ring, p, 256); + if (ret == -EINVAL) { + fprintf(stdout, "Probe not supported, skipping\n"); + goto err; + } else if (ret) { + fprintf(stdout, "Probe returned %d\n", ret); + goto err; + } + + if (verify_probe(p, 1)) + goto err; + +out: + free(p); + return 0; +err: + free(p); + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + ret = test_probe(&ring); + if (ret) { + fprintf(stderr, "test_probe failed\n"); + return ret; + } + if (no_probe) + return 0; + + ret = test_probe_helper(&ring); + if (ret) { + fprintf(stderr, "test_probe failed\n"); + return ret; + } + + + return 0; +} diff --git a/test/read-write.c b/test/read-write.c new file mode 100644 index 0000000..3bea26f --- /dev/null +++ b/test/read-write.c @@ -0,0 +1,770 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: basic read/write tests with buffered, O_DIRECT, and SQPOLL + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "liburing.h" + +#define FILE_SIZE (128 * 1024) +#define BS 4096 +#define BUFFERS (FILE_SIZE / BS) + +static struct iovec *vecs; +static int no_read; +static int no_buf_select; +static int warned; + +static int create_buffers(void) +{ + int i; + + vecs = malloc(BUFFERS * sizeof(struct iovec)); + for (i = 0; i < BUFFERS; i++) { + if (posix_memalign(&vecs[i].iov_base, BS, BS)) + return 1; + vecs[i].iov_len = BS; + } + + return 0; +} + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + close(fd); + return ret != FILE_SIZE; +} + +static int __test_io(const char *file, struct io_uring *ring, int write, int buffered, + int sqthread, int fixed, int mixed_fixed, int nonvec, + int buf_select, int seq, int exp_len) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + int open_flags; + int i, fd, ret; + off_t offset; + +#ifdef VERBOSE + fprintf(stdout, "%s: start %d/%d/%d/%d/%d/%d: ", __FUNCTION__, write, + buffered, sqthread, + fixed, mixed_fixed, + nonvec); +#endif + if (sqthread && geteuid()) { +#ifdef VERBOSE + fprintf(stdout, "SKIPPED (not root)\n"); +#endif + return 0; + } + + if (write) + open_flags = O_WRONLY; + else + open_flags = O_RDONLY; + if (!buffered) + open_flags |= O_DIRECT; + + fd = open(file, open_flags); + if (fd < 0) { + perror("file open"); + goto err; + } + + if (fixed) { + ret = io_uring_register_buffers(ring, vecs, BUFFERS); + if (ret) { + fprintf(stderr, "buffer reg failed: %d\n", ret); + goto err; + } + } + if (sqthread) { + ret = io_uring_register_files(ring, &fd, 1); + if (ret) { + fprintf(stderr, "file reg failed: %d\n", ret); + goto err; + } + } + + offset = 0; + for (i = 0; i < BUFFERS; i++) { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + if (!seq) + offset = BS * (rand() % BUFFERS); + if (write) { + int do_fixed = fixed; + int use_fd = fd; + + if (sqthread) + use_fd = 0; + if (fixed && (i & 1)) + do_fixed = 0; + if (do_fixed) { + io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, + vecs[i].iov_len, + offset, i); + } else if (nonvec) { + io_uring_prep_write(sqe, use_fd, vecs[i].iov_base, + vecs[i].iov_len, offset); + } else { + io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, + offset); + } + } else { + int do_fixed = fixed; + int use_fd = fd; + + if (sqthread) + use_fd = 0; + if (fixed && (i & 1)) + do_fixed = 0; + if (do_fixed) { + io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, + vecs[i].iov_len, + offset, i); + } else if (nonvec) { + io_uring_prep_read(sqe, use_fd, vecs[i].iov_base, + vecs[i].iov_len, offset); + } else { + io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, + offset); + } + + } + if (sqthread) + sqe->flags |= IOSQE_FIXED_FILE; + if (buf_select) { + if (nonvec) + sqe->addr = 0; + sqe->flags |= IOSQE_BUFFER_SELECT; + sqe->buf_group = buf_select; + sqe->user_data = i; + } + if (seq) + offset += BS; + } + + ret = io_uring_submit(ring); + if (ret != BUFFERS) { + fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); + goto err; + } + + for (i = 0; i < BUFFERS; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + goto err; + } + if (cqe->res == -EINVAL && nonvec) { + if (!warned) { + fprintf(stdout, "Non-vectored IO not " + "supported, skipping\n"); + warned = 1; + no_read = 1; + } + } else if (cqe->res != exp_len) { + fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len); + goto err; + } + if (buf_select && exp_len == BS) { + int bid = cqe->flags >> 16; + unsigned char *ptr = vecs[bid].iov_base; + int j; + + for (j = 0; j < BS; j++) { + if (ptr[j] == cqe->user_data) + continue; + + fprintf(stderr, "Data mismatch! bid=%d, " + "wanted=%d, got=%d\n", bid, + (int)cqe->user_data, ptr[j]); + return 1; + } + } + io_uring_cqe_seen(ring, cqe); + } + + if (fixed) { + ret = io_uring_unregister_buffers(ring); + if (ret) { + fprintf(stderr, "buffer unreg failed: %d\n", ret); + goto err; + } + } + if (sqthread) { + ret = io_uring_unregister_files(ring); + if (ret) { + fprintf(stderr, "file unreg failed: %d\n", ret); + goto err; + } + } + + close(fd); +#ifdef VERBOSE + fprintf(stdout, "PASS\n"); +#endif + return 0; +err: +#ifdef VERBOSE + fprintf(stderr, "FAILED\n"); +#endif + if (fd != -1) + close(fd); + return 1; +} +static int test_io(const char *file, int write, int buffered, int sqthread, + int fixed, int mixed_fixed, int nonvec) +{ + struct io_uring ring; + int ret, ring_flags; + + if (sqthread) { + if (geteuid()) { + if (!warned) { + fprintf(stderr, "SQPOLL requires root, skipping\n"); + warned = 1; + } + return 0; + } + ring_flags = IORING_SETUP_SQPOLL; + } else { + ring_flags = 0; + } + + ret = io_uring_queue_init(64, &ring, ring_flags); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + return 1; + } + + ret = __test_io(file, &ring, write, buffered, sqthread, fixed, + mixed_fixed, nonvec, 0, 0, BS); + + io_uring_queue_exit(&ring); + return ret; +} + +static int read_poll_link(const char *file) +{ + struct __kernel_timespec ts; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + int i, fd, ret, fds[2]; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) + return ret; + + fd = open(file, O_WRONLY); + if (fd < 0) { + perror("open"); + return 1; + } + + if (pipe(fds)) { + perror("pipe"); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_writev(sqe, fd, &vecs[0], 1, 0); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 1; + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_poll_add(sqe, fds[0], POLLIN); + sqe->flags |= IOSQE_IO_LINK; + sqe->user_data = 2; + + ts.tv_sec = 1; + ts.tv_nsec = 0; + sqe = io_uring_get_sqe(&ring); + io_uring_prep_link_timeout(sqe, &ts, 0); + sqe->user_data = 3; + + ret = io_uring_submit(&ring); + if (ret != 3) { + fprintf(stderr, "submitted %d\n", ret); + return 1; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + } + + return 0; +} + +static int has_nonvec_read(void) +{ + struct io_uring_probe *p; + struct io_uring ring; + int ret; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + fprintf(stderr, "queue init failed: %d\n", ret); + exit(ret); + } + + p = calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op)); + ret = io_uring_register_probe(&ring, p, 256); + /* if we don't have PROBE_REGISTER, we don't have OP_READ/WRITE */ + if (ret == -EINVAL) { +out: + io_uring_queue_exit(&ring); + return 0; + } else if (ret) { + fprintf(stderr, "register_probe: %d\n", ret); + goto out; + } + + if (p->ops_len <= IORING_OP_READ) + goto out; + if (!(p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED)) + goto out; + io_uring_queue_exit(&ring); + return 1; +} + +static int test_eventfd_read(void) +{ + struct io_uring ring; + int fd, ret; + eventfd_t event; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + + if (no_read) + return 0; + ret = io_uring_queue_init(8, &ring, 0); + if (ret) + return ret; + + fd = eventfd(1, 0); + if (fd < 0) { + perror("eventfd"); + return 1; + } + sqe = io_uring_get_sqe(&ring); + io_uring_prep_read(sqe, fd, &event, sizeof(eventfd_t), 0); + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submitted %d\n", ret); + return 1; + } + eventfd_write(fd, 1); + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + return 1; + } + if (cqe->res == -EINVAL) { + fprintf(stdout, "eventfd IO not supported, skipping\n"); + } else if (cqe->res != sizeof(eventfd_t)) { + fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, + (int) sizeof(eventfd_t)); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + return 0; +} + +static int test_buf_select_short(const char *filename, int nonvec) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + int ret, i, exp_len; + + if (no_buf_select) + return 0; + + ret = io_uring_queue_init(64, &ring, 0); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + return 1; + } + + exp_len = 0; + for (i = 0; i < BUFFERS; i++) { + sqe = io_uring_get_sqe(&ring); + io_uring_prep_provide_buffers(sqe, vecs[i].iov_base, + vecs[i].iov_len / 2, 1, 1, i); + if (!exp_len) + exp_len = vecs[i].iov_len / 2; + } + + ret = io_uring_submit(&ring); + if (ret != BUFFERS) { + fprintf(stderr, "submit: %d\n", ret); + return -1; + } + + for (i = 0; i < BUFFERS; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (cqe->res < 0) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + } + + ret = __test_io(filename, &ring, 0, 0, 0, 0, 0, nonvec, 1, 1, exp_len); + + io_uring_queue_exit(&ring); + return ret; +} + +static int test_buf_select(const char *filename, int nonvec) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring_probe *p; + struct io_uring ring; + int ret, i; + + ret = io_uring_queue_init(64, &ring, 0); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + return 1; + } + + p = io_uring_get_probe_ring(&ring); + if (!p || !io_uring_opcode_supported(p, IORING_OP_PROVIDE_BUFFERS)) { + no_buf_select = 1; + fprintf(stdout, "Buffer select not supported, skipping\n"); + return 0; + } + free(p); + + /* + * Write out data with known pattern + */ + for (i = 0; i < BUFFERS; i++) + memset(vecs[i].iov_base, i, vecs[i].iov_len); + + ret = __test_io(filename, &ring, 1, 0, 0, 0, 0, 0, 0, 1, BS); + if (ret) { + fprintf(stderr, "failed writing data\n"); + return 1; + } + + for (i = 0; i < BUFFERS; i++) + memset(vecs[i].iov_base, 0x55, vecs[i].iov_len); + + for (i = 0; i < BUFFERS; i++) { + sqe = io_uring_get_sqe(&ring); + io_uring_prep_provide_buffers(sqe, vecs[i].iov_base, + vecs[i].iov_len, 1, 1, i); + } + + ret = io_uring_submit(&ring); + if (ret != BUFFERS) { + fprintf(stderr, "submit: %d\n", ret); + return -1; + } + + for (i = 0; i < BUFFERS; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (cqe->res < 0) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + } + + ret = __test_io(filename, &ring, 0, 0, 0, 0, 0, nonvec, 1, 1, BS); + + io_uring_queue_exit(&ring); + return ret; +} + +static int test_io_link(const char *file) +{ + const int nr_links = 100; + const int link_len = 100; + const int nr_sqes = nr_links * link_len; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + int i, j, fd, ret; + + fd = open(file, O_WRONLY); + if (fd < 0) { + perror("file open"); + goto err; + } + + ret = io_uring_queue_init(nr_sqes, &ring, 0); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + goto err; + } + + for (i = 0; i < nr_links; ++i) { + for (j = 0; j < link_len; ++j) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + io_uring_prep_writev(sqe, fd, &vecs[0], 1, 0); + sqe->flags |= IOSQE_ASYNC; + if (j != link_len - 1) + sqe->flags |= IOSQE_IO_LINK; + } + } + + ret = io_uring_submit(&ring); + if (ret != nr_sqes) { + ret = io_uring_peek_cqe(&ring, &cqe); + if (!ret && cqe->res == -EINVAL) { + fprintf(stdout, "IOSQE_ASYNC not supported, skipped\n"); + goto out; + } + fprintf(stderr, "submit got %d, wanted %d\n", ret, nr_sqes); + goto err; + } + + for (i = 0; i < nr_sqes; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + goto err; + } + if (cqe->res == -EINVAL) { + if (!warned) { + fprintf(stdout, "Non-vectored IO not " + "supported, skipping\n"); + warned = 1; + no_read = 1; + } + } else if (cqe->res != BS) { + fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS); + goto err; + } + io_uring_cqe_seen(&ring, cqe); + } + +out: + io_uring_queue_exit(&ring); + close(fd); + return 0; +err: + if (fd != -1) + close(fd); + return 1; +} + +static int test_write_efbig(void) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + struct rlimit rlim; + int i, fd, ret; + loff_t off; + + if (getrlimit(RLIMIT_FSIZE, &rlim) < 0) { + perror("getrlimit"); + return 1; + } + rlim.rlim_cur = 64 * 1024; + rlim.rlim_max = 64 * 1024; + if (setrlimit(RLIMIT_FSIZE, &rlim) < 0) { + perror("setrlimit"); + return 1; + } + + fd = open(".efbig", O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("file open"); + goto err; + } + + ret = io_uring_queue_init(32, &ring, 0); + if (ret) { + fprintf(stderr, "ring create failed: %d\n", ret); + goto err; + } + + off = 0; + for (i = 0; i < 32; i++) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + goto err; + } + io_uring_prep_writev(sqe, fd, &vecs[i], 1, off); + off += BS; + } + + ret = io_uring_submit(&ring); + if (ret != 32) { + fprintf(stderr, "submit got %d, wanted %d\n", ret, 32); + goto err; + } + + for (i = 0; i < 32; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + goto err; + } + if (i < 16) { + if (cqe->res != BS) { + fprintf(stderr, "bad write: %d\n", cqe->res); + goto err; + } + } else { + if (cqe->res != -EFBIG) { + fprintf(stderr, "Expected -EFBIG: %d\n", cqe->res); + goto err; + } + } + io_uring_cqe_seen(&ring, cqe); + } + + io_uring_queue_exit(&ring); + close(fd); + unlink(".efbig"); + return 0; +err: + if (fd != -1) + close(fd); + unlink(".efbig"); + return 1; +} + +int main(int argc, char *argv[]) +{ + int i, ret, nr; + char *fname; + + if (argc > 1) { + fname = argv[1]; + } else { + fname = ".basic-rw"; + if (create_file(fname)) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + } + + if (create_buffers()) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + + /* if we don't have nonvec read, skip testing that */ + if (has_nonvec_read()) + nr = 64; + else + nr = 32; + + for (i = 0; i < nr; i++) { + int v1, v2, v3, v4, v5, v6; + + v1 = (i & 1) != 0; + v2 = (i & 2) != 0; + v3 = (i & 4) != 0; + v4 = (i & 8) != 0; + v5 = (i & 16) != 0; + v6 = (i & 32) != 0; + ret = test_io(fname, v1, v2, v3, v4, v5, v6); + if (ret) { + fprintf(stderr, "test_io failed %d/%d/%d/%d/%d/%d\n", + v1, v2, v3, v4, v5, v6); + goto err; + } + } + + ret = test_buf_select(fname, 1); + if (ret) { + fprintf(stderr, "test_buf_select nonvec failed\n"); + goto err; + } + + ret = test_buf_select(fname, 0); + if (ret) { + fprintf(stderr, "test_buf_select vec failed\n"); + goto err; + } + + ret = test_buf_select_short(fname, 1); + if (ret) { + fprintf(stderr, "test_buf_select_short nonvec failed\n"); + goto err; + } + + ret = test_buf_select_short(fname, 0); + if (ret) { + fprintf(stderr, "test_buf_select_short vec failed\n"); + goto err; + } + + ret = test_eventfd_read(); + if (ret) { + fprintf(stderr, "test_eventfd_read failed\n"); + goto err; + } + + ret = read_poll_link(fname); + if (ret) { + fprintf(stderr, "read_poll_link failed\n"); + goto err; + } + + ret = test_io_link(fname); + if (ret) { + fprintf(stderr, "test_io_link failed\n"); + goto err; + } + + ret = test_write_efbig(); + if (ret) { + fprintf(stderr, "test_write_efbig failed\n"); + goto err; + } + + if (fname != argv[1]) + unlink(fname); + return 0; +err: + if (fname != argv[1]) + unlink(fname); + return 1; +} diff --git a/test/ring-leak.c b/test/ring-leak.c new file mode 100644 index 0000000..4ddc8ff --- /dev/null +++ b/test/ring-leak.c @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Based on description from Al Viro - this demonstrates a leak of the + * io_uring instance, by sending the io_uring fd over a UNIX socket. + * + * See: + * + * https://lore.kernel.org/linux-block/20190129192702.3605-1-axboe@kernel.dk/T/#m6c87fc64e4d063786af6ec6fadce3ac1e95d3184 + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" +#include "../src/syscall.h" + +static int __io_uring_register_files(int ring_fd, int fd1, int fd2) +{ + __s32 fds[2] = { fd1, fd2 }; + + return __sys_io_uring_register(ring_fd, IORING_REGISTER_FILES, fds, 2); +} + +static int get_ring_fd(void) +{ + struct io_uring_params p; + int fd; + + memset(&p, 0, sizeof(p)); + + fd = __sys_io_uring_setup(2, &p); + if (fd < 0) { + perror("io_uring_setup"); + return -1; + } + + return fd; +} + +static void send_fd(int socket, int fd) +{ + char buf[CMSG_SPACE(sizeof(fd))]; + struct cmsghdr *cmsg; + struct msghdr msg; + + memset(buf, 0, sizeof(buf)); + memset(&msg, 0, sizeof(msg)); + + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + + memmove(CMSG_DATA(cmsg), &fd, sizeof(fd)); + + msg.msg_controllen = CMSG_SPACE(sizeof(fd)); + + if (sendmsg(socket, &msg, 0) < 0) + perror("sendmsg"); +} + +int main(int argc, char *argv[]) +{ + int sp[2], pid, ring_fd, ret; + + if (argc > 1) + return 0; + + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sp) != 0) { + perror("Failed to create Unix-domain socket pair\n"); + return 1; + } + + ring_fd = get_ring_fd(); + if (ring_fd < 0) + return 1; + + ret = __io_uring_register_files(ring_fd, sp[0], sp[1]); + if (ret < 0) { + perror("register files"); + return 1; + } + + pid = fork(); + if (pid) + send_fd(sp[0], ring_fd); + + close(ring_fd); + close(sp[0]); + close(sp[1]); + return 0; +} diff --git a/test/runtests-loop.sh b/test/runtests-loop.sh new file mode 100755 index 0000000..4019eba --- /dev/null +++ b/test/runtests-loop.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +TESTS="$@" +ITER=0 + +while true; do + ./runtests.sh "$TESTS" + RET="$?" + if [ "${RET}" -ne 0 ]; then + echo "Tests failed at loop $ITER" + break + fi + echo "Finished loop $ITER" + ((ITER++)) +done + diff --git a/test/runtests.sh b/test/runtests.sh new file mode 100755 index 0000000..a891f4b --- /dev/null +++ b/test/runtests.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +TESTS="$@" +RET=0 + +TIMEOUT=60 +FAILED="" +MAYBE_FAILED="" + +do_kmsg="1" +if ! [ $(id -u) = 0 ]; then + do_kmsg="0" +fi + +TEST_DIR=$(dirname $0) +TEST_FILES="" +if [ -f "$TEST_DIR/config.local" ]; then + . $TEST_DIR/config.local + for dev in $TEST_FILES; do + if [ ! -e "$dev" ]; then + echo "Test file $dev not valid" + exit 1 + fi + done +fi + +_check_dmesg() +{ + local dmesg_marker="$1" + local seqres="$2.seqres" + + if [[ $do_kmsg -eq 0 ]]; then + return 0 + fi + + dmesg | bash -c "$DMESG_FILTER" | grep -A 9999 "$dmesg_marker" >"${seqres}.dmesg" + grep -q -e "kernel BUG at" \ + -e "WARNING:" \ + -e "BUG:" \ + -e "Oops:" \ + -e "possible recursive locking detected" \ + -e "Internal error" \ + -e "INFO: suspicious RCU usage" \ + -e "INFO: possible circular locking dependency detected" \ + -e "general protection fault:" \ + -e "blktests failure" \ + "${seqres}.dmesg" + # shellcheck disable=SC2181 + if [[ $? -eq 0 ]]; then + return 1 + else + rm -f "${seqres}.dmesg" + return 0 + fi +} + +run_test() +{ + T="$1" + D="$2" + DMESG_FILTER="cat" + + if [ "$do_kmsg" -eq 1 ]; then + if [ -z "$D" ]; then + local dmesg_marker="Running test $T:" + else + local dmesg_marker="Running test $T $D:" + fi + echo $dmesg_marker | tee /dev/kmsg + else + local dmesg_marker="" + echo Running test $T $D + fi + timeout --preserve-status -s INT $TIMEOUT ./$T $D + r=$? + if [ "${r}" -eq 124 ]; then + echo "Test $T timed out (may not be a failure)" + elif [ "${r}" -ne 0 ]; then + echo "Test $T failed with ret ${r}" + if [ -z "$D" ]; then + FAILED="$FAILED <$T>" + else + FAILED="$FAILED <$T $D>" + fi + RET=1 + elif ! _check_dmesg "$dmesg_marker" "$T"; then + echo "Test $T failed dmesg check" + if [ -z "$D" ]; then + FAILED="$FAILED <$T>" + else + FAILED="$FAILED <$T $D>" + fi + RET=1 + elif [ ! -z "$D" ]; then + sleep .1 + ps aux | grep "\[io_wq_manager\]" > /dev/null + R="$?" + if [ "$R" -eq 0 ]; then + MAYBE_FAILED="$MAYBE_FAILED $T" + fi + fi +} + +for t in $TESTS; do + run_test $t + if [ ! -z "$TEST_FILES" ]; then + for dev in $TEST_FILES; do + run_test $t $dev + done + fi +done + +if [ "${RET}" -ne 0 ]; then + echo "Tests $FAILED failed" + exit $RET +else + sleep 1 + ps aux | grep "\[io_wq_manager\]" > /dev/null + R="$?" + if [ "$R" -ne 0 ]; then + MAYBE_FAILED="" + fi + if [ ! -z "$MAYBE_FAILED" ]; then + echo "Tests _maybe_ failed: $MAYBE_FAILED" + fi + echo "All tests passed" + exit 0 +fi diff --git a/test/send_recv.c b/test/send_recv.c new file mode 100644 index 0000000..bc13235 --- /dev/null +++ b/test/send_recv.c @@ -0,0 +1,267 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Simple test case showing using send and recv through io_uring + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static char str[] = "This is a test of send and recv over io_uring!"; + +#define MAX_MSG 128 + +#define PORT 10200 +#define HOST "127.0.0.1" + +#if 0 +# define io_uring_prep_send io_uring_prep_write +# define io_uring_prep_recv io_uring_prep_read +#endif + +static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock) +{ + struct sockaddr_in saddr; + struct io_uring_sqe *sqe; + int sockfd, ret, val; + + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = htonl(INADDR_ANY); + saddr.sin_port = htons(PORT); + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + perror("socket"); + return 1; + } + + val = 1; + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + + ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + perror("bind"); + goto err; + } + + ret = io_uring_register_files(ring, &sockfd, 1); + if (ret) { + fprintf(stderr, "file reg failed\n"); + goto err; + } + + sqe = io_uring_get_sqe(ring); + io_uring_prep_recv(sqe, 0, iov->iov_base, iov->iov_len, 0); + sqe->flags |= IOSQE_FIXED_FILE; + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "submit failed: %d\n", ret); + goto err; + } + + *sock = sockfd; + return 0; +err: + close(sockfd); + return 1; +} + +static int do_recv(struct io_uring *ring, struct iovec *iov) +{ + struct io_uring_cqe *cqe; + int ret; + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stdout, "wait_cqe: %d\n", ret); + goto err; + } + if (cqe->res == -EINVAL) { + fprintf(stdout, "recv not supported, skipping\n"); + return 0; + } + if (cqe->res < 0) { + fprintf(stderr, "failed cqe: %d\n", cqe->res); + goto err; + } + + if (cqe->res -1 != strlen(str)) { + fprintf(stderr, "got wrong length: %d/%d\n", cqe->res, + (int) strlen(str) + 1); + goto err; + } + + if (strcmp(str, iov->iov_base)) { + fprintf(stderr, "string mismatch\n"); + goto err; + } + + return 0; +err: + return 1; +} + +struct recv_data { + pthread_mutex_t mutex; + int use_sqthread; +}; + +static void *recv_fn(void *data) +{ + struct recv_data *rd = data; + char buf[MAX_MSG + 1]; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) - 1, + }; + struct io_uring_params p = { }; + struct io_uring ring; + int ret, sock; + + if (rd->use_sqthread) + p.flags = IORING_SETUP_SQPOLL; + ret = io_uring_queue_init_params(1, &ring, &p); + if (ret) { + fprintf(stderr, "queue init failed: %d\n", ret); + goto err; + } + + ret = recv_prep(&ring, &iov, &sock); + if (ret) { + fprintf(stderr, "recv_prep failed: %d\n", ret); + goto err; + } + pthread_mutex_unlock(&rd->mutex); + ret = do_recv(&ring, &iov); + + close(sock); + io_uring_queue_exit(&ring); +err: + return (void *)(intptr_t)ret; +} + +static int do_send(void) +{ + struct sockaddr_in saddr; + struct iovec iov = { + .iov_base = str, + .iov_len = sizeof(str), + }; + struct io_uring ring; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int sockfd, ret; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + fprintf(stderr, "queue init failed: %d\n", ret); + return 1; + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_port = htons(PORT); + inet_pton(AF_INET, HOST, &saddr.sin_addr); + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + perror("socket"); + return 1; + } + + ret = connect(sockfd, &saddr, sizeof(saddr)); + if (ret < 0) { + perror("connect"); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_send(sqe, sockfd, iov.iov_base, iov.iov_len, 0); + sqe->user_data = 1; + + ret = io_uring_submit(&ring); + if (ret <= 0) { + fprintf(stderr, "submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (cqe->res == -EINVAL) { + fprintf(stdout, "send not supported, skipping\n"); + close(sockfd); + return 0; + } + if (cqe->res != iov.iov_len) { + fprintf(stderr, "failed cqe: %d\n", cqe->res); + goto err; + } + + close(sockfd); + return 0; +err: + close(sockfd); + return 1; +} + +static int test(int use_sqthread) +{ + pthread_mutexattr_t attr; + pthread_t recv_thread; + struct recv_data rd; + int ret; + void *retval; + + pthread_mutexattr_init(&attr); + pthread_mutexattr_setpshared(&attr, 1); + pthread_mutex_init(&rd.mutex, &attr); + pthread_mutex_lock(&rd.mutex); + rd.use_sqthread = use_sqthread; + + ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); + if (ret) { + fprintf(stderr, "Thread create failed: %d\n", ret); + return 1; + } + + pthread_mutex_lock(&rd.mutex); + do_send(); + pthread_join(recv_thread, &retval); + return (int)(intptr_t)retval; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc > 1) + return 0; + + ret = test(0); + if (ret) { + fprintf(stderr, "test sqthread=0 failed\n"); + return ret; + } + + if (geteuid()) { + fprintf(stdout, "%s: skipping SQPOLL variant\n", argv[0]); + return 0; + } + + ret = test(1); + if (ret) { + fprintf(stderr, "test sqthread=1 failed\n"); + return ret; + } + + return 0; +} diff --git a/test/send_recvmsg.c b/test/send_recvmsg.c new file mode 100644 index 0000000..1be8cc6 --- /dev/null +++ b/test/send_recvmsg.c @@ -0,0 +1,316 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Simple test case showing using sendmsg and recvmsg through io_uring + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static char str[] = "This is a test of sendmsg and recvmsg over io_uring!"; + +#define MAX_MSG 128 + +#define PORT 10200 +#define HOST "127.0.0.1" + +#define BUF_BGID 10 +#define BUF_BID 89 + +static int recv_prep(struct io_uring *ring, struct iovec *iov, int bgid) +{ + struct sockaddr_in saddr; + struct msghdr msg; + struct io_uring_sqe *sqe; + int sockfd, ret; + int val = 1; + + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = htonl(INADDR_ANY); + saddr.sin_port = htons(PORT); + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + perror("socket"); + return 1; + } + + val = 1; + setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + + ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + perror("bind"); + goto err; + } + + memset(&msg, 0, sizeof(msg)); + msg.msg_namelen = sizeof(struct sockaddr_in); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + sqe = io_uring_get_sqe(ring); + io_uring_prep_recvmsg(sqe, sockfd, &msg, 0); + if (bgid) { + sqe->user_data = (unsigned long) iov->iov_base; + iov->iov_base = NULL; + sqe->flags |= IOSQE_BUFFER_SELECT; + sqe->buf_group = bgid; + } + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "submit failed: %d\n", ret); + goto err; + } + + close(sockfd); + return 0; +err: + close(sockfd); + return 1; +} + +struct recv_data { + pthread_mutex_t *mutex; + int buf_select; + int no_buf_add; +}; + +static int do_recvmsg(struct io_uring *ring, struct iovec *iov, + struct recv_data *rd) +{ + struct io_uring_cqe *cqe; + int ret; + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stdout, "wait_cqe: %d\n", ret); + goto err; + } + if (cqe->res < 0) { + if (rd->no_buf_add && rd->buf_select) + return 0; + fprintf(stderr, "%s: failed cqe: %d\n", __FUNCTION__, cqe->res); + goto err; + } + if (cqe->flags) { + int bid = cqe->flags >> 16; + if (bid != BUF_BID) + fprintf(stderr, "Buffer ID mismatch %d\n", bid); + /* just for passing the pointer to str */ + iov->iov_base = (void *) (uintptr_t) cqe->user_data; + } + + if (rd->no_buf_add && rd->buf_select) { + fprintf(stderr, "Expected -ENOBUFS: %d\n", cqe->res); + goto err; + } + + if (cqe->res -1 != strlen(str)) { + fprintf(stderr, "got wrong length: %d/%d\n", cqe->res, + (int) strlen(str) + 1); + goto err; + } + + if (strcmp(str, iov->iov_base)) { + fprintf(stderr, "string mismatch\n"); + goto err; + } + + return 0; +err: + return 1; +} + +static void *recv_fn(void *data) +{ + struct recv_data *rd = data; + pthread_mutex_t *mutex = rd->mutex; + char buf[MAX_MSG + 1]; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) - 1, + }; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + int ret; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + fprintf(stderr, "queue init failed: %d\n", ret); + goto err; + } + + if (rd->buf_select && !rd->no_buf_add) { + sqe = io_uring_get_sqe(&ring); + io_uring_prep_provide_buffers(sqe, buf, sizeof(buf) -1, 1, + BUF_BGID, BUF_BID); + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit ret=%d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(&ring, cqe); + if (ret == -EINVAL) { + fprintf(stdout, "PROVIDE_BUFFERS not supported, skip\n"); + goto out; + goto err; + } else if (ret < 0) { + fprintf(stderr, "PROVIDER_BUFFERS %d\n", ret); + goto err; + } + } + + ret = recv_prep(&ring, &iov, rd->buf_select ? BUF_BGID : 0); + if (ret) { + fprintf(stderr, "recv_prep failed: %d\n", ret); + goto err; + } + + pthread_mutex_unlock(mutex); + ret = do_recvmsg(&ring, &iov, rd); + + io_uring_queue_exit(&ring); + +err: + return (void *)(intptr_t)ret; +out: + pthread_mutex_unlock(mutex); + io_uring_queue_exit(&ring); + return NULL; +} + +static int do_sendmsg(void) +{ + struct sockaddr_in saddr; + struct iovec iov = { + .iov_base = str, + .iov_len = sizeof(str), + }; + struct msghdr msg; + struct io_uring ring; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int sockfd, ret; + + ret = io_uring_queue_init(1, &ring, 0); + if (ret) { + fprintf(stderr, "queue init failed: %d\n", ret); + return 1; + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; + saddr.sin_port = htons(PORT); + inet_pton(AF_INET, HOST, &saddr.sin_addr); + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = &saddr; + msg.msg_namelen = sizeof(struct sockaddr_in); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + perror("socket"); + return 1; + } + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_sendmsg(sqe, sockfd, &msg, 0); + + ret = io_uring_submit(&ring); + if (ret <= 0) { + fprintf(stderr, "submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (cqe->res < 0) { + fprintf(stderr, "%s: failed cqe: %d\n", __FUNCTION__, cqe->res); + goto err; + } + + close(sockfd); + return 0; +err: + close(sockfd); + return 1; +} + +static int test(int buf_select, int no_buf_add) +{ + struct recv_data rd; + pthread_mutexattr_t attr; + pthread_t recv_thread; + pthread_mutex_t mutex; + int ret; + void *retval; + + pthread_mutexattr_init(&attr); + pthread_mutexattr_setpshared(&attr, 1); + pthread_mutex_init(&mutex, &attr); + pthread_mutex_lock(&mutex); + + rd.mutex = &mutex; + rd.buf_select = buf_select; + rd.no_buf_add = no_buf_add; + ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); + if (ret) { + fprintf(stderr, "Thread create failed\n"); + return 1; + } + + pthread_mutex_lock(&mutex); + do_sendmsg(); + pthread_join(recv_thread, &retval); + ret = (int)(intptr_t)retval; + + return ret; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc > 1) + return 0; + + ret = test(0, 0); + if (ret) { + fprintf(stderr, "send_recvmsg 0 failed\n"); + return 1; + } + + ret = test(1, 0); + if (ret) { + fprintf(stderr, "send_recvmsg 1 0 failed\n"); + return 1; + } + + ret = test(1, 1); + if (ret) { + fprintf(stderr, "send_recvmsg 1 1 failed\n"); + return 1; + } + + return 0; +} diff --git a/test/shared-wq.c b/test/shared-wq.c new file mode 100644 index 0000000..c0571e6 --- /dev/null +++ b/test/shared-wq.c @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test wq sharing + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int test_attach_invalid(int ringfd) +{ + struct io_uring_params p; + struct io_uring ring; + int ret; + + memset(&p, 0, sizeof(p)); + p.flags = IORING_SETUP_ATTACH_WQ; + p.wq_fd = ringfd; + ret = io_uring_queue_init_params(1, &ring, &p); + if (ret != -EINVAL) { + fprintf(stderr, "Attach to zero: %d\n", ret); + goto err; + } + return 0; +err: + return 1; +} + +static int test_attach(int ringfd) +{ + struct io_uring_params p; + struct io_uring ring2; + int ret; + + memset(&p, 0, sizeof(p)); + p.flags = IORING_SETUP_ATTACH_WQ; + p.wq_fd = ringfd; + ret = io_uring_queue_init_params(1, &ring2, &p); + if (ret == -EINVAL) { + fprintf(stdout, "Sharing not supported, skipping\n"); + return 0; + } else if (ret) { + fprintf(stderr, "Attach to id: %d\n", ret); + goto err; + } + io_uring_queue_exit(&ring2); + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + /* stdout is definitely not an io_uring descriptor */ + ret = test_attach_invalid(2); + if (ret) { + fprintf(stderr, "test_attach_invalid failed\n"); + return ret; + } + + ret = test_attach(ring.ring_fd); + if (ret) { + fprintf(stderr, "test_attach failed\n"); + return ret; + } + + return 0; +} diff --git a/test/short-read.c b/test/short-read.c new file mode 100644 index 0000000..aba7ea0 --- /dev/null +++ b/test/short-read.c @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define BUF_SIZE 4096 +#define FILE_SIZE 1024 + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + close(fd); + return ret != FILE_SIZE; +} + +int main(int argc, char *argv[]) +{ + int ret, fd, save_errno; + struct io_uring ring; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec vec; + + if (argc > 1) + return 0; + + vec.iov_base = malloc(BUF_SIZE); + vec.iov_len = BUF_SIZE; + + if (create_file(".short-read")) { + fprintf(stderr, "file creation failed\n"); + return 1; + } + + fd = open(".short-read", O_RDONLY); + save_errno = errno; + unlink(".short-read"); + errno = save_errno; + if (fd < 0) { + perror("file open"); + return 1; + } + + ret = io_uring_queue_init(32, &ring, 0); + if (ret) { + fprintf(stderr, "queue init failed: %d\n", ret); + return ret; + } + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "sqe get failed\n"); + return 1; + } + io_uring_prep_readv(sqe, fd, &vec, 1, 0); + + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit failed: %d\n", ret); + return 1; + } + + ret = io_uring_wait_cqes(&ring, &cqe, 1, 0, 0); + if (ret) { + fprintf(stderr, "wait_cqe failed: %d\n", ret); + return 1; + } + + if (cqe->res != FILE_SIZE) { + fprintf(stderr, "Read failed: %d\n", cqe->res); + return 1; + } + + io_uring_cqe_seen(&ring, cqe); + return 0; +} diff --git a/test/socket-rw.c b/test/socket-rw.c new file mode 100644 index 0000000..45daf57 --- /dev/null +++ b/test/socket-rw.c @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Check that a readv on a socket queued before a writev doesn't hang + * the processing. + * + * From Hrvoje Zeba + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + int p_fd[2]; + int32_t recv_s0; + int32_t val = 1; + struct sockaddr_in addr; + + if (argc > 1) + return 0; + + recv_s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + + assert(setsockopt(recv_s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)) != -1); + assert(setsockopt(recv_s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) != -1); + + addr.sin_family = AF_INET; + addr.sin_port = 0x1235; + addr.sin_addr.s_addr = 0x0100007fU; + + assert(bind(recv_s0, (struct sockaddr*)&addr, sizeof(addr)) != -1); + assert(listen(recv_s0, 128) != -1); + + + p_fd[1] = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + + val = 1; + assert(setsockopt(p_fd[1], IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) != -1); + + int32_t flags = fcntl(p_fd[1], F_GETFL, 0); + assert(flags != -1); + + flags |= O_NONBLOCK; + assert(fcntl(p_fd[1], F_SETFL, flags) != -1); + + assert(connect(p_fd[1], (struct sockaddr*)&addr, sizeof(addr)) == -1); + + flags = fcntl(p_fd[1], F_GETFL, 0); + assert(flags != -1); + + flags &= ~O_NONBLOCK; + assert(fcntl(p_fd[1], F_SETFL, flags) != -1); + + p_fd[0] = accept(recv_s0, NULL, NULL); + assert(p_fd[0] != -1); + + while (1) { + int32_t code; + socklen_t code_len = sizeof(code); + + assert(getsockopt(p_fd[1], SOL_SOCKET, SO_ERROR, &code, &code_len) != -1); + + if (!code) + break; + } + + struct io_uring m_io_uring; + + assert(io_uring_queue_init(32, &m_io_uring, 0) >= 0); + + char recv_buff[128]; + char send_buff[128]; + + { + struct iovec iov[1]; + + iov[0].iov_base = recv_buff; + iov[0].iov_len = sizeof(recv_buff); + + struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); + assert(sqe != NULL); + + io_uring_prep_readv(sqe, p_fd[0], iov, 1, 0); + } + + { + struct iovec iov[1]; + + iov[0].iov_base = send_buff; + iov[0].iov_len = sizeof(send_buff); + + struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); + assert(sqe != NULL); + + io_uring_prep_writev(sqe, p_fd[1], iov, 1, 0); + } + + assert(io_uring_submit_and_wait(&m_io_uring, 2) != -1); + + struct io_uring_cqe* cqe; + uint32_t head; + uint32_t count = 0; + + while (count != 2) { + io_uring_for_each_cqe(&m_io_uring, head, cqe) { + assert(cqe->res == 128); + count++; + } + + assert(count <= 2); + io_uring_cq_advance(&m_io_uring, count); + } + + io_uring_queue_exit(&m_io_uring); + return 0; +} diff --git a/test/splice.c b/test/splice.c new file mode 100644 index 0000000..e67bb10 --- /dev/null +++ b/test/splice.c @@ -0,0 +1,509 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define BUF_SIZE (16 * 4096) + +struct test_ctx { + int real_pipe1[2]; + int real_pipe2[2]; + int real_fd_in; + int real_fd_out; + + /* fds or for registered files */ + int pipe1[2]; + int pipe2[2]; + int fd_in; + int fd_out; + + void *buf_in; + void *buf_out; +}; + +static unsigned int splice_flags = 0; +static unsigned int sqe_flags = 0; +static int has_splice = 0; +static int has_tee = 0; + +static int read_buf(int fd, void *buf, int len) +{ + int ret; + + while (len) { + ret = read(fd, buf, len); + if (ret < 0) + return ret; + len -= ret; + buf += ret; + } + return 0; +} + +static int write_buf(int fd, const void *buf, int len) +{ + int ret; + + while (len) { + ret = write(fd, buf, len); + if (ret < 0) + return ret; + len -= ret; + buf += ret; + } + return 0; +} + +static int check_content(int fd, void *buf, int len, const void *src) +{ + int ret; + + ret = read_buf(fd, buf, len); + if (ret) + return ret; + + ret = memcmp(buf, src, len); + return (ret != 0) ? -1 : 0; +} + +static int create_file(const char *filename) +{ + int fd, save_errno; + + fd = open(filename, O_RDWR | O_CREAT, 0644); + save_errno = errno; + unlink(filename); + errno = save_errno; + return fd; +} + +static int init_splice_ctx(struct test_ctx *ctx) +{ + int ret, rnd_fd; + + ctx->buf_in = calloc(BUF_SIZE, 1); + if (!ctx->buf_in) + return 1; + ctx->buf_out = calloc(BUF_SIZE, 1); + if (!ctx->buf_out) + return 1; + + ctx->fd_in = create_file(".splice-test-in"); + if (ctx->fd_in < 0) { + perror("file open"); + return 1; + } + + ctx->fd_out = create_file(".splice-test-out"); + if (ctx->fd_out < 0) { + perror("file open"); + return 1; + } + + /* get random data */ + rnd_fd = open("/dev/urandom", O_RDONLY); + if (rnd_fd < 0) + return 1; + + ret = read_buf(rnd_fd, ctx->buf_in, BUF_SIZE); + if (ret != 0) + return 1; + close(rnd_fd); + + /* populate file */ + ret = write_buf(ctx->fd_in, ctx->buf_in, BUF_SIZE); + if (ret) + return ret; + + if (pipe(ctx->pipe1) < 0) + return 1; + if (pipe(ctx->pipe2) < 0) + return 1; + + ctx->real_pipe1[0] = ctx->pipe1[0]; + ctx->real_pipe1[1] = ctx->pipe1[1]; + ctx->real_pipe2[0] = ctx->pipe2[0]; + ctx->real_pipe2[1] = ctx->pipe2[1]; + ctx->real_fd_in = ctx->fd_in; + ctx->real_fd_out = ctx->fd_out; + return 0; +} + +static int do_splice_op(struct io_uring *ring, + int fd_in, loff_t off_in, + int fd_out, loff_t off_out, + unsigned int len, + __u8 opcode) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + int ret = -1; + + do { + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + return -1; + } + io_uring_prep_splice(sqe, fd_in, off_in, fd_out, off_out, + len, splice_flags); + sqe->flags |= sqe_flags; + sqe->user_data = 42; + sqe->opcode = opcode; + + ret = io_uring_submit(ring); + if (ret != 1) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + return ret; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", cqe->res); + return ret; + } + + if (cqe->res <= 0) { + io_uring_cqe_seen(ring, cqe); + return cqe->res; + } + + len -= cqe->res; + if (off_in != -1) + off_in += cqe->res; + if (off_out != -1) + off_out += cqe->res; + io_uring_cqe_seen(ring, cqe); + } while (len); + + return 0; +} + +static int do_splice(struct io_uring *ring, + int fd_in, loff_t off_in, + int fd_out, loff_t off_out, + unsigned int len) +{ + return do_splice_op(ring, fd_in, off_in, fd_out, off_out, len, + IORING_OP_SPLICE); +} + +static int do_tee(struct io_uring *ring, int fd_in, int fd_out, + unsigned int len) +{ + return do_splice_op(ring, fd_in, 0, fd_out, 0, len, IORING_OP_TEE); +} + +static void check_splice_support(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = do_splice(ring, -1, 0, -1, 0, BUF_SIZE); + has_splice = (ret == -EBADF); +} + +static void check_tee_support(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = do_tee(ring, -1, -1, BUF_SIZE); + has_tee = (ret == -EBADF); +} + +static int check_zero_splice(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = do_splice(ring, ctx->fd_in, -1, ctx->pipe1[1], -1, 0); + if (ret) + return ret; + + ret = do_splice(ring, ctx->pipe2[0], -1, ctx->pipe1[1], -1, 0); + if (ret) + return ret; + + return 0; +} + +static int splice_to_pipe(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = lseek(ctx->real_fd_in, 0, SEEK_SET); + if (ret) + return ret; + + /* implicit file offset */ + ret = do_splice(ring, ctx->fd_in, -1, ctx->pipe1[1], -1, BUF_SIZE); + if (ret) + return ret; + + ret = check_content(ctx->real_pipe1[0], ctx->buf_out, BUF_SIZE, + ctx->buf_in); + if (ret) + return ret; + + /* explicit file offset */ + ret = do_splice(ring, ctx->fd_in, 0, ctx->pipe1[1], -1, BUF_SIZE); + if (ret) + return ret; + + return check_content(ctx->real_pipe1[0], ctx->buf_out, BUF_SIZE, + ctx->buf_in); +} + +static int splice_from_pipe(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = write_buf(ctx->real_pipe1[1], ctx->buf_in, BUF_SIZE); + if (ret) + return ret; + ret = do_splice(ring, ctx->pipe1[0], -1, ctx->fd_out, 0, BUF_SIZE); + if (ret) + return ret; + ret = check_content(ctx->real_fd_out, ctx->buf_out, BUF_SIZE, + ctx->buf_in); + if (ret) + return ret; + + ret = ftruncate(ctx->real_fd_out, 0); + if (ret) + return ret; + return lseek(ctx->real_fd_out, 0, SEEK_SET); +} + +static int splice_pipe_to_pipe(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = do_splice(ring, ctx->fd_in, 0, ctx->pipe1[1], -1, BUF_SIZE); + if (ret) + return ret; + ret = do_splice(ring, ctx->pipe1[0], -1, ctx->pipe2[1], -1, BUF_SIZE); + if (ret) + return ret; + + return check_content(ctx->real_pipe2[0], ctx->buf_out, BUF_SIZE, + ctx->buf_in); +} + +static int fail_splice_pipe_offset(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = do_splice(ring, ctx->fd_in, 0, ctx->pipe1[1], 0, BUF_SIZE); + if (ret != -ESPIPE && ret != -EINVAL) + return ret; + + ret = do_splice(ring, ctx->pipe1[0], 0, ctx->fd_out, 0, BUF_SIZE); + if (ret != -ESPIPE && ret != -EINVAL) + return ret; + + return 0; +} + +static int fail_tee_nonpipe(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = do_tee(ring, ctx->fd_in, ctx->pipe1[1], BUF_SIZE); + if (ret != -ESPIPE && ret != -EINVAL) + return ret; + + return 0; +} + +static int fail_tee_offset(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = do_splice_op(ring, ctx->pipe2[0], -1, ctx->pipe1[1], 0, + BUF_SIZE, IORING_OP_TEE); + if (ret != -ESPIPE && ret != -EINVAL) + return ret; + + ret = do_splice_op(ring, ctx->pipe2[0], 0, ctx->pipe1[1], -1, + BUF_SIZE, IORING_OP_TEE); + if (ret != -ESPIPE && ret != -EINVAL) + return ret; + + return 0; +} + +static int check_tee(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + ret = write_buf(ctx->real_pipe1[1], ctx->buf_in, BUF_SIZE); + if (ret) + return ret; + ret = do_tee(ring, ctx->pipe1[0], ctx->pipe2[1], BUF_SIZE); + if (ret) + return ret; + + ret = check_content(ctx->real_pipe1[0], ctx->buf_out, BUF_SIZE, + ctx->buf_in); + if (ret) { + fprintf(stderr, "tee(), invalid src data\n"); + return ret; + } + + ret = check_content(ctx->real_pipe2[0], ctx->buf_out, BUF_SIZE, + ctx->buf_in); + if (ret) { + fprintf(stderr, "tee(), invalid dst data\n"); + return ret; + } + + return 0; +} + +static int check_zero_tee(struct io_uring *ring, struct test_ctx *ctx) +{ + return do_tee(ring, ctx->pipe2[0], ctx->pipe1[1], 0); +} + +static int test_splice(struct io_uring *ring, struct test_ctx *ctx) +{ + int ret; + + if (has_splice) { + ret = check_zero_splice(ring, ctx); + if (ret) { + fprintf(stderr, "check_zero_splice failed %i %i\n", + ret, errno); + return ret; + } + + ret = splice_to_pipe(ring, ctx); + if (ret) { + fprintf(stderr, "splice_to_pipe failed %i %i\n", + ret, errno); + return ret; + } + + ret = splice_from_pipe(ring, ctx); + if (ret) { + fprintf(stderr, "splice_from_pipe failed %i %i\n", + ret, errno); + return ret; + } + + ret = splice_pipe_to_pipe(ring, ctx); + if (ret) { + fprintf(stderr, "splice_pipe_to_pipe failed %i %i\n", + ret, errno); + return ret; + } + + ret = fail_splice_pipe_offset(ring, ctx); + if (ret) { + fprintf(stderr, "fail_splice_pipe_offset failed %i %i\n", + ret, errno); + return ret; + } + } + + if (has_tee) { + ret = check_zero_tee(ring, ctx); + if (ret) { + fprintf(stderr, "check_zero_tee() failed %i %i\n", + ret, errno); + return ret; + } + + ret = fail_tee_nonpipe(ring, ctx); + if (ret) { + fprintf(stderr, "fail_tee_nonpipe() failed %i %i\n", + ret, errno); + return ret; + } + + ret = fail_tee_offset(ring, ctx); + if (ret) { + fprintf(stderr, "fail_tee_offset failed %i %i\n", + ret, errno); + return ret; + } + + ret = check_tee(ring, ctx); + if (ret) { + fprintf(stderr, "check_tee() failed %i %i\n", + ret, errno); + return ret; + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + struct test_ctx ctx; + int ret; + int reg_fds[6]; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + ret = init_splice_ctx(&ctx); + if (ret) { + fprintf(stderr, "init failed %i %i\n", ret, errno); + return 1; + } + + check_splice_support(&ring, &ctx); + if (!has_splice) + fprintf(stdout, "skip, doesn't support splice()\n"); + check_tee_support(&ring, &ctx); + if (!has_tee) + fprintf(stdout, "skip, doesn't support tee()\n"); + + ret = test_splice(&ring, &ctx); + if (ret) { + fprintf(stderr, "basic splice tests failed\n"); + return ret; + } + + reg_fds[0] = ctx.real_pipe1[0]; + reg_fds[1] = ctx.real_pipe1[1]; + reg_fds[2] = ctx.real_pipe2[0]; + reg_fds[3] = ctx.real_pipe2[1]; + reg_fds[4] = ctx.real_fd_in; + reg_fds[5] = ctx.real_fd_out; + ret = io_uring_register_files(&ring, reg_fds, 6); + if (ret) { + fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); + return 1; + } + + /* remap fds to registered */ + ctx.pipe1[0] = 0; + ctx.pipe1[1] = 1; + ctx.pipe2[0] = 2; + ctx.pipe2[1] = 3; + ctx.fd_in = 4; + ctx.fd_out = 5; + + splice_flags = SPLICE_F_FD_IN_FIXED; + sqe_flags = IOSQE_FIXED_FILE; + ret = test_splice(&ring, &ctx); + if (ret) { + fprintf(stderr, "registered fds splice tests failed\n"); + return ret; + } + return 0; +} diff --git a/test/sq-full-cpp.cc b/test/sq-full-cpp.cc new file mode 100644 index 0000000..ba40099 --- /dev/null +++ b/test/sq-full-cpp.cc @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test SQ queue full condition + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_sqe *sqe; + struct io_uring ring; + int ret, i; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + + } + + i = 0; + while ((sqe = io_uring_get_sqe(&ring)) != NULL) + i++; + + if (i != 8) { + fprintf(stderr, "Got %d SQEs, wanted 8\n", i); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} diff --git a/test/sq-full.c b/test/sq-full.c new file mode 100644 index 0000000..ba40099 --- /dev/null +++ b/test/sq-full.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test SQ queue full condition + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +int main(int argc, char *argv[]) +{ + struct io_uring_sqe *sqe; + struct io_uring ring; + int ret, i; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + + } + + i = 0; + while ((sqe = io_uring_get_sqe(&ring)) != NULL) + i++; + + if (i != 8) { + fprintf(stderr, "Got %d SQEs, wanted 8\n", i); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} diff --git a/test/sq-poll-kthread.c b/test/sq-poll-kthread.c new file mode 100644 index 0000000..ed7d0bf --- /dev/null +++ b/test/sq-poll-kthread.c @@ -0,0 +1,170 @@ +/* + * Description: test if io_uring SQ poll kthread is stopped when the userspace + * process ended with or without closing the io_uring fd + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define SQ_THREAD_IDLE 2000 +#define BUF_SIZE 128 +#define KTHREAD_NAME "io_uring-sq" + +enum { + TEST_OK = 0, + TEST_SKIPPED = 1, + TEST_FAILED = 2, +}; + +static int do_test_sq_poll_kthread_stopped(bool do_exit) +{ + int ret = 0, pipe1[2]; + struct io_uring_params param; + struct io_uring ring; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + uint8_t buf[BUF_SIZE]; + struct iovec iov; + + if (geteuid()) { + fprintf(stderr, "sqpoll requires root!\n"); + return TEST_SKIPPED; + } + + if (pipe(pipe1) != 0) { + perror("pipe"); + return TEST_FAILED; + } + + memset(¶m, 0, sizeof(param)); + + param.flags |= IORING_SETUP_SQPOLL; + param.sq_thread_idle = SQ_THREAD_IDLE; + + ret = io_uring_queue_init_params(16, &ring, ¶m); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + ret = TEST_FAILED; + goto err_pipe; + } + + ret = io_uring_register_files(&ring, &pipe1[1], 1); + if (ret) { + fprintf(stderr, "file reg failed: %d\n", ret); + ret = TEST_FAILED; + goto err_uring; + } + + iov.iov_base = buf; + iov.iov_len = BUF_SIZE; + + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "io_uring_get_sqe failed\n"); + ret = TEST_FAILED; + goto err_uring; + } + + io_uring_prep_writev(sqe, 0, &iov, 1, 0); + sqe->flags |= IOSQE_FIXED_FILE; + + ret = io_uring_submit(&ring); + if (ret < 0) { + fprintf(stderr, "io_uring_submit failed - ret: %d\n", + ret); + ret = TEST_FAILED; + goto err_uring; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "io_uring_wait_cqe - ret: %d\n", + ret); + ret = TEST_FAILED; + goto err_uring; + } + + if (cqe->res != BUF_SIZE) { + fprintf(stderr, "unexpected cqe->res %d [expected %d]\n", + cqe->res, BUF_SIZE); + ret = TEST_FAILED; + goto err_uring; + + } + + io_uring_cqe_seen(&ring, cqe); + + ret = TEST_OK; + +err_uring: + if (do_exit) + io_uring_queue_exit(&ring); +err_pipe: + close(pipe1[0]); + close(pipe1[1]); + + return ret; +} + +int test_sq_poll_kthread_stopped(bool do_exit) +{ + pid_t pid; + int status = 0; + + pid = fork(); + + if (pid == 0) { + int ret = do_test_sq_poll_kthread_stopped(do_exit); + exit(ret); + } + + pid = wait(&status); + if (status != 0) + return WEXITSTATUS(status); + + sleep(1); + if (system("ps --ppid 2 | grep " KTHREAD_NAME) == 0) { + fprintf(stderr, "%s kthread still running!\n", KTHREAD_NAME); + return TEST_FAILED; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc > 1) + return 0; + + ret = test_sq_poll_kthread_stopped(true); + if (ret == TEST_SKIPPED) { + printf("test_sq_poll_kthread_stopped_exit: skipped\n"); + } else if (ret == TEST_FAILED) { + fprintf(stderr, "test_sq_poll_kthread_stopped_exit failed\n"); + return ret; + } + + ret = test_sq_poll_kthread_stopped(false); + if (ret == TEST_SKIPPED) { + printf("test_sq_poll_kthread_stopped_noexit: skipped\n"); + } else if (ret == TEST_FAILED) { + fprintf(stderr, "test_sq_poll_kthread_stopped_noexit failed\n"); + return ret; + } + + return 0; +} diff --git a/test/sq-space_left.c b/test/sq-space_left.c new file mode 100644 index 0000000..69f554c --- /dev/null +++ b/test/sq-space_left.c @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: test SQ queue space left + * + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int test_left(void) +{ + struct io_uring_sqe *sqe; + struct io_uring ring; + int ret, i = 0, s; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + + } + + if ((s = io_uring_sq_space_left(&ring)) != 8) { + fprintf(stderr, "Got %d SQEs left, expected %d\n", s, 8); + goto err; + } + + i = 0; + while ((sqe = io_uring_get_sqe(&ring)) != NULL) { + i++; + if ((s = io_uring_sq_space_left(&ring)) != 8 - i) { + fprintf(stderr, "Got %d SQEs left, expected %d\n", s, 8 - i); + goto err; + } + } + + if (i != 8) { + fprintf(stderr, "Got %d SQEs, expected %d\n", i, 8); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} + +static int test_sync(void) +{ + struct io_uring_sqe *sqe; + struct io_uring ring; + int ret, i; + + ret = io_uring_queue_init(32, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + + } + + /* prep 8 NOPS */ + for (i = 0; i < 8; i++) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + } + + /* prep known bad command, this should terminate submission */ + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + sqe->opcode = 0xfe; + + /* prep 8 NOPS */ + for (i = 0; i < 8; i++) { + sqe = io_uring_get_sqe(&ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_nop(sqe); + } + + /* we should have 8 + 1 + 8 pending now */ + ret = io_uring_sq_ready(&ring); + if (ret != 17) { + fprintf(stderr, "%d ready, wanted 17\n", ret); + goto err; + } + + ret = io_uring_submit(&ring); + + /* should submit 8 successfully, then error #9 and stop */ + if (ret != 9) { + fprintf(stderr, "submitted %d, wanted 9\n", ret); + goto err; + } + + /* should now have 8 ready, with 9 gone */ + ret = io_uring_sq_ready(&ring); + if (ret != 8) { + fprintf(stderr, "%d ready, wanted 8\n", ret); + goto err; + } + + ret = io_uring_submit(&ring); + + /* the last 8 should submit fine */ + if (ret != 8) { + fprintf(stderr, "submitted %d, wanted 8\n", ret); + goto err; + } + + ret = io_uring_sq_ready(&ring); + if (ret) { + fprintf(stderr, "%d ready, wanted 0\n", ret); + goto err; + } + + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc > 1) + return 0; + + ret = test_left(); + if (ret) { + fprintf(stderr, "test_left failed\n"); + return ret; + } + + ret = test_sync(); + if (ret) { + fprintf(stderr, "test_sync failed\n"); + return ret; + } + + return 0; +} diff --git a/test/statx.c b/test/statx.c new file mode 100644 index 0000000..c846a4a --- /dev/null +++ b/test/statx.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various statx(2) tests + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#ifdef __NR_statx +static int do_statx(int dfd, const char *path, int flags, unsigned mask, + struct statx *statxbuf) +{ + return syscall(__NR_statx, dfd, path, flags, mask, statxbuf); +} +#else +static int do_statx(int dfd, const char *path, int flags, unsigned mask, + struct statx *statxbuf) +{ + errno = ENOSYS; + return -1; +} +#endif + +static int create_file(const char *file, size_t size) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(size); + memset(buf, 0xaa, size); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, size); + close(fd); + return ret != size; +} + +static int statx_syscall_supported(void) +{ + return errno == ENOSYS ? 0 : -1; +} + +static int test_statx(struct io_uring *ring, const char *path) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct statx x1, x2; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_statx(sqe, -1, path, 0, STATX_ALL, &x1); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret) + return ret; + ret = do_statx(-1, path, 0, STATX_ALL, &x2); + if (ret < 0) + return statx_syscall_supported(); + if (memcmp(&x1, &x2, sizeof(x1))) { + fprintf(stderr, "Miscompare between io_uring and statx\n"); + goto err; + } + return 0; +err: + return -1; +} + +static int test_statx_fd(struct io_uring *ring, const char *path) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct statx x1, x2; + int ret, fd; + + fd = open(path, O_RDONLY); + if (fd < 0) { + perror("open"); + return 1; + } + + memset(&x1, 0, sizeof(x1)); + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_statx(sqe, fd, "", AT_EMPTY_PATH, STATX_ALL, &x1); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret) + return ret; + memset(&x2, 0, sizeof(x2)); + ret = do_statx(fd, "", AT_EMPTY_PATH, STATX_ALL, &x2); + if (ret < 0) + return statx_syscall_supported(); + if (memcmp(&x1, &x2, sizeof(x1))) { + fprintf(stderr, "Miscompare between io_uring and statx\n"); + goto err; + } + return 0; +err: + return -1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + const char *fname; + int ret; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + if (argc > 1) { + fname = argv[1]; + } else { + fname = "/tmp/.statx"; + if (create_file(fname, 4096)) { + fprintf(stderr, "file create failed\n"); + return 1; + } + } + + ret = test_statx(&ring, fname); + if (ret) { + if (ret == -EINVAL) { + fprintf(stdout, "statx not supported, skipping\n"); + goto done; + } + fprintf(stderr, "test_statx failed: %d\n", ret); + goto err; + } + + ret = test_statx_fd(&ring, fname); + if (ret) { + fprintf(stderr, "test_statx_fd failed: %d\n", ret); + goto err; + } +done: + if (fname != argv[1]) + unlink(fname); + return 0; +err: + if (fname != argv[1]) + unlink(fname); + return 1; +} diff --git a/test/stdout.c b/test/stdout.c new file mode 100644 index 0000000..25585dc --- /dev/null +++ b/test/stdout.c @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: check that STDOUT write works + */ +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static int test_pipe_io_fixed(struct io_uring *ring) +{ + const char str[] = "This is a fixed pipe test\n"; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct iovec vecs[2]; + char buffer[128]; + int i, ret, fds[2]; + + if (posix_memalign(&vecs[0].iov_base, 4096, 4096)) { + fprintf(stderr, "Failed to alloc mem\n"); + return 1; + } + memcpy(vecs[0].iov_base, str, strlen(str)); + vecs[0].iov_len = strlen(str); + + if (pipe(fds) < 0) { + perror("pipe"); + return 1; + } + + ret = io_uring_register_buffers(ring, vecs, 1); + if (ret) { + fprintf(stderr, "Failed to register buffers: %d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_write_fixed(sqe, fds[1], vecs[0].iov_base, + vecs[0].iov_len, 0, 0); + sqe->user_data = 1; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + vecs[1].iov_base = buffer; + vecs[1].iov_len = sizeof(buffer); + io_uring_prep_readv(sqe, fds[0], &vecs[1], 1, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } else if (ret != 2) { + fprintf(stderr, "Submitted only %d\n", ret); + goto err; + } + + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + if (cqe->res < 0) { + fprintf(stderr, "I/O write error on %lu: %s\n", + (unsigned long) cqe->user_data, + strerror(-cqe->res)); + goto err; + } + if (cqe->res != strlen(str)) { + fprintf(stderr, "Got %d bytes, wanted %d on %lu\n", + cqe->res, (int)strlen(str), + (unsigned long) cqe->user_data); + goto err; + } + if (cqe->user_data == 2 && memcmp(str, buffer, strlen(str))) { + fprintf(stderr, "read data mismatch\n"); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + io_uring_unregister_buffers(ring); + return 0; +err: + return 1; +} + +static int test_stdout_io_fixed(struct io_uring *ring) +{ + const char str[] = "This is a fixed pipe test\n"; + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct iovec vecs; + int ret; + + if (posix_memalign(&vecs.iov_base, 4096, 4096)) { + fprintf(stderr, "Failed to alloc mem\n"); + return 1; + } + memcpy(vecs.iov_base, str, strlen(str)); + vecs.iov_len = strlen(str); + + ret = io_uring_register_buffers(ring, &vecs, 1); + if (ret) { + fprintf(stderr, "Failed to register buffers: %d\n", ret); + return 1; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_write_fixed(sqe, STDOUT_FILENO, vecs.iov_base, vecs.iov_len, 0, 0); + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } else if (ret < 1) { + fprintf(stderr, "Submitted only %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + if (cqe->res < 0) { + fprintf(stderr, "STDOUT write error: %s\n", strerror(-cqe->res)); + goto err; + } + if (cqe->res != vecs.iov_len) { + fprintf(stderr, "Got %d write, wanted %d\n", cqe->res, (int)vecs.iov_len); + goto err; + } + io_uring_cqe_seen(ring, cqe); + io_uring_unregister_buffers(ring); + return 0; +err: + return 1; +} + +static int test_stdout_io(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct iovec vecs; + int ret; + + vecs.iov_base = "This is a pipe test\n"; + vecs.iov_len = strlen(vecs.iov_base); + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "get sqe failed\n"); + goto err; + } + io_uring_prep_writev(sqe, STDOUT_FILENO, &vecs, 1, 0); + + ret = io_uring_submit(ring); + if (ret < 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } else if (ret < 1) { + fprintf(stderr, "Submitted only %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + if (cqe->res < 0) { + fprintf(stderr, "STDOUT write error: %s\n", + strerror(-cqe->res)); + goto err; + } + if (cqe->res != vecs.iov_len) { + fprintf(stderr, "Got %d write, wanted %d\n", cqe->res, + (int)vecs.iov_len); + goto err; + } + io_uring_cqe_seen(ring, cqe); + + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + ret = test_stdout_io(&ring); + if (ret) { + fprintf(stderr, "test_pipe_io failed\n"); + return ret; + } + + ret = test_stdout_io_fixed(&ring); + if (ret) { + fprintf(stderr, "test_pipe_io_fixed failed\n"); + return ret; + } + + ret = test_pipe_io_fixed(&ring); + if (ret) { + fprintf(stderr, "test_pipe_io_fixed failed\n"); + return ret; + } + + return 0; +} diff --git a/test/submit-reuse.c b/test/submit-reuse.c new file mode 100644 index 0000000..c8d2d27 --- /dev/null +++ b/test/submit-reuse.c @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Test reads that will punt to blocking context, with immediate overwrite + * of iovec->iov_base to NULL. If the kernel doesn't properly handle + * reuse of the iovec, we should get -EFAULT. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +#define STR_SIZE 32768 +#define FILE_SIZE 65536 + +struct thread_data { + int fd1, fd2; + volatile int do_exit; +}; + +static void *flusher(void *__data) +{ + struct thread_data *data = __data; + int i = 0; + + while (!data->do_exit) { + posix_fadvise(data->fd1, 0, FILE_SIZE, POSIX_FADV_DONTNEED); + posix_fadvise(data->fd2, 0, FILE_SIZE, POSIX_FADV_DONTNEED); + i++; + } + + return NULL; +} + +static int create_file(const char *file) +{ + ssize_t ret; + char *buf; + int fd; + + buf = malloc(FILE_SIZE); + memset(buf, 0xaa, FILE_SIZE); + + fd = open(file, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + perror("open file"); + return 1; + } + ret = write(fd, buf, FILE_SIZE); + fsync(fd); + close(fd); + return ret != FILE_SIZE; +} + +static char str1[STR_SIZE]; +static char str2[STR_SIZE]; + +static struct io_uring ring; + +static int prep(int fd, char *str) +{ + struct io_uring_sqe *sqe; + struct iovec iov = { + .iov_base = str, + .iov_len = STR_SIZE, + }; + int ret; + + sqe = io_uring_get_sqe(&ring); + io_uring_prep_readv(sqe, fd, &iov, 1, 0); + sqe->user_data = fd; + ret = io_uring_submit(&ring); + if (ret != 1) { + fprintf(stderr, "submit got %d\n", ret); + return 1; + } + iov.iov_base = NULL; + return 0; +} + +static int wait_nr(int nr) +{ + int i, ret; + + for (i = 0; i < nr; i++) { + struct io_uring_cqe *cqe; + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) + return ret; + if (cqe->res < 0) { + fprintf(stderr, "cqe->res=%d\n", cqe->res); + return 1; + } + io_uring_cqe_seen(&ring, cqe); + } + + return 0; +} + +static unsigned long long mtime_since(const struct timeval *s, + const struct timeval *e) +{ + long long sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = (e->tv_usec - s->tv_usec); + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= 1000; + usec /= 1000; + return sec + usec; +} + +static unsigned long long mtime_since_now(struct timeval *tv) +{ + struct timeval end; + + gettimeofday(&end, NULL); + return mtime_since(tv, &end); +} + +int main(int argc, char *argv[]) +{ + struct thread_data data; + int fd1, fd2, ret, i; + struct timeval tv; + pthread_t thread; + void *tret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(32, &ring, 0); + if (ret) { + fprintf(stderr, "io_uring_queue_init: %d\n", ret); + return 1; + } + + if (create_file(".reuse.1")) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + if (create_file(".reuse.2")) { + fprintf(stderr, "file creation failed\n"); + goto err; + } + + fd1 = open(".reuse.1", O_RDONLY); + fd2 = open(".reuse.2", O_RDONLY); + + data.fd1 = fd1; + data.fd2 = fd2; + data.do_exit = 0; + pthread_create(&thread, NULL, flusher, &data); + usleep(10000); + + gettimeofday(&tv, NULL); + for (i = 0; i < 1000; i++) { + ret = prep(fd1, str1); + if (ret) { + fprintf(stderr, "prep1 failed: %d\n", ret); + goto err; + } + ret = prep(fd2, str2); + if (ret) { + fprintf(stderr, "prep1 failed: %d\n", ret); + goto err; + } + ret = wait_nr(2); + if (ret) { + fprintf(stderr, "wait_nr: %d\n", ret); + goto err; + } + if (mtime_since_now(&tv) > 5000) + break; + } + + data.do_exit = 1; + pthread_join(thread, &tret); + + close(fd2); + close(fd1); + io_uring_queue_exit(&ring); + unlink(".reuse.1"); + unlink(".reuse.2"); + return 0; +err: + io_uring_queue_exit(&ring); + unlink(".reuse.1"); + unlink(".reuse.2"); + return 1; +} diff --git a/test/teardowns.c b/test/teardowns.c new file mode 100644 index 0000000..f78fe22 --- /dev/null +++ b/test/teardowns.c @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" + +static void loop(void) +{ + int i, ret = 0; + + for (i = 0; i < 100; i++) { + struct io_uring ring; + int fd; + + memset(&ring, 0, sizeof(ring)); + fd = io_uring_queue_init(0xa4, &ring, 0); + if (fd >= 0) { + close(fd); + continue; + } + if (fd != -ENOMEM) + ret++; + } + exit(ret); +} + +int main(int argc, char *argv[]) +{ + int i, ret, status; + + if (argc > 1) + return 0; + + for (i = 0; i < 12; i++) { + if (!fork()) { + loop(); + break; + } + } + + ret = 0; + for (i = 0; i < 12; i++) { + if (waitpid(-1, &status, 0) < 0) { + perror("waitpid"); + return 1; + } + if (WEXITSTATUS(status)) + ret++; + } + + return ret; +} diff --git a/test/timeout-overflow.c b/test/timeout-overflow.c new file mode 100644 index 0000000..1074e2b --- /dev/null +++ b/test/timeout-overflow.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run timeout overflow test + * + */ +#include +#include +#include +#include + +#include "liburing.h" + +#define TIMEOUT_MSEC 200 +static int not_supported; + +static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec) +{ + ts->tv_sec = msec / 1000; + ts->tv_nsec = (msec % 1000) * 1000000; +} + +static int check_timeout_support() +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct __kernel_timespec ts; + struct io_uring ring; + int ret; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + sqe = io_uring_get_sqe(&ring); + msec_to_ts(&ts, TIMEOUT_MSEC); + io_uring_prep_timeout(sqe, &ts, 1, 0); + + ret = io_uring_submit(&ring); + if (ret < 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + + if (cqe->res == -EINVAL) { + not_supported = 1; + fprintf(stdout, "Timeout not supported, ignored\n"); + return 0; + } + + io_uring_cqe_seen(&ring, cqe); + io_uring_queue_exit(&ring); + return 0; +err: + io_uring_queue_exit(&ring); + return 1; +} + +/* + * We first setup 4 timeout requests, which require a count value of 1, 1, 2, + * UINT_MAX, so the sequence is 1, 2, 4, 2. Before really timeout, this 4 + * requests will not lead the change of cq_cached_tail, so as sq_dropped. + * + * And before this patch. The order of this four requests will be req1->req2-> + * req4->req3. Actually, it should be req1->req2->req3->req4. + * + * Then, if there is 2 nop req. All timeout requests expect req4 will completed + * successful after the patch. And req1/req2 will completed successful with + * req3/req4 return -ETIME without this patch! + */ +static int test_timeout_overflow() +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct __kernel_timespec ts; + struct io_uring ring; + int i, ret; + + ret = io_uring_queue_init(16, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } + + msec_to_ts(&ts, TIMEOUT_MSEC); + for (i = 0; i < 4; i++) { + unsigned num; + sqe = io_uring_get_sqe(&ring); + switch (i) { + case 0: + case 1: + num = 1; + break; + case 2: + num = 2; + break; + case 3: + num = UINT_MAX; + break; + } + io_uring_prep_timeout(sqe, &ts, num, 0); + } + + for (i = 0; i < 2; i++) { + sqe = io_uring_get_sqe(&ring); + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, (void *) 1); + } + ret = io_uring_submit(&ring); + if (ret < 0) { + fprintf(stderr, "sqe submit failed: %d\n", ret); + goto err; + } + + i = 0; + while (i < 6) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret < 0) { + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } + + /* + * cqe1: first nop req + * cqe2: first timeout req, because of cqe1 + * cqe3: second timeout req because of cqe1 + cqe2 + * cqe4: second nop req + * cqe5~cqe6: the left three timeout req + */ + switch (i) { + case 0: + case 3: + if (io_uring_cqe_get_data(cqe) != (void *) 1) { + fprintf(stderr, "nop not seen as 1 or 2\n"); + goto err; + } + break; + case 1: + case 2: + case 4: + if (cqe->res == -ETIME) { + fprintf(stderr, "expected not return -ETIME " + "for the #%d timeout req\n", i - 1); + goto err; + } + break; + case 5: + if (cqe->res != -ETIME) { + fprintf(stderr, "expected return -ETIME for " + "the #%d timeout req\n", i - 1); + goto err; + } + break; + } + io_uring_cqe_seen(&ring, cqe); + i++; + } + + return 0; +err: + return 1; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc > 1) + return 0; + + ret = check_timeout_support(); + if (ret) { + fprintf(stderr, "check_timeout_support failed: %d\n", ret); + return 1; + } + + if (not_supported) + return 0; + + ret = test_timeout_overflow(); + if (ret) { + fprintf(stderr, "test_timeout_overflow failed\n"); + return 1; + } + + return 0; +} diff --git a/test/timeout.c b/test/timeout.c new file mode 100644 index 0000000..7e9f11d --- /dev/null +++ b/test/timeout.c @@ -0,0 +1,1067 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: run various timeout tests + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "liburing.h" +#include "../src/syscall.h" + +#define TIMEOUT_MSEC 200 +static int not_supported; +static int no_modify; + +static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec) +{ + ts->tv_sec = msec / 1000; + ts->tv_nsec = (msec % 1000) * 1000000; +} + +static unsigned long long mtime_since(const struct timeval *s, + const struct timeval *e) +{ + long long sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = (e->tv_usec - s->tv_usec); + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= 1000; + usec /= 1000; + return sec + usec; +} + +static unsigned long long mtime_since_now(struct timeval *tv) +{ + struct timeval end; + + gettimeofday(&end, NULL); + return mtime_since(tv, &end); +} + +/* + * Test that we return to userspace if a timeout triggers, even if we + * don't satisfy the number of events asked for. + */ +static int test_single_timeout_many(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + unsigned long long exp; + struct __kernel_timespec ts; + struct timeval tv; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + msec_to_ts(&ts, TIMEOUT_MSEC); + io_uring_prep_timeout(sqe, &ts, 0, 0); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + gettimeofday(&tv, NULL); + ret = __sys_io_uring_enter(ring->ring_fd, 0, 4, IORING_ENTER_GETEVENTS, + NULL); + if (ret < 0) { + fprintf(stderr, "%s: io_uring_enter %d\n", __FUNCTION__, ret); + goto err; + } + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret == -EINVAL) { + fprintf(stdout, "Timeout not supported, ignored\n"); + not_supported = 1; + return 0; + } else if (ret != -ETIME) { + fprintf(stderr, "Timeout: %s\n", strerror(-ret)); + goto err; + } + + exp = mtime_since_now(&tv); + if (exp >= TIMEOUT_MSEC / 2 && exp <= (TIMEOUT_MSEC * 3) / 2) + return 0; + fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp); +err: + return 1; +} + +/* + * Test numbered trigger of timeout + */ +static int test_single_timeout_nr(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct __kernel_timespec ts; + int i, ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + msec_to_ts(&ts, TIMEOUT_MSEC); + io_uring_prep_timeout(sqe, &ts, 2, 0); + + sqe = io_uring_get_sqe(ring); + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, (void *) 1); + sqe = io_uring_get_sqe(ring); + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, (void *) 1); + + ret = io_uring_submit_and_wait(ring, 3); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + i = 0; + while (i < 3) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + + /* + * NOP commands have user_data as 1. Check that we get the + * two NOPs first, then the successfully removed timout as + * the last one. + */ + switch (i) { + case 0: + case 1: + if (io_uring_cqe_get_data(cqe) != (void *) 1) { + fprintf(stderr, "%s: nop not seen as 1 or 2\n", __FUNCTION__); + goto err; + } + break; + case 2: + if (io_uring_cqe_get_data(cqe) != NULL) { + fprintf(stderr, "%s: timeout not last\n", __FUNCTION__); + goto err; + } + break; + } + + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret < 0) { + fprintf(stderr, "Timeout: %s\n", strerror(-ret)); + goto err; + } else if (ret) { + fprintf(stderr, "res: %d\n", ret); + goto err; + } + i++; + }; + + return 0; +err: + return 1; +} + +static int test_single_timeout_wait(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct __kernel_timespec ts; + int i, ret; + + sqe = io_uring_get_sqe(ring); + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, (void *) 1); + + sqe = io_uring_get_sqe(ring); + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, (void *) 1); + + msec_to_ts(&ts, 1000); + + i = 0; + do { + ret = io_uring_wait_cqes(ring, &cqe, 2, &ts, NULL); + if (ret == -ETIME) + break; + if (ret < 0) { + fprintf(stderr, "%s: wait timeout failed: %d\n", __FUNCTION__, ret); + goto err; + } + + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret < 0) { + fprintf(stderr, "res: %d\n", ret); + goto err; + } + i++; + } while (1); + + if (i != 2) { + fprintf(stderr, "got %d completions\n", i); + goto err; + } + return 0; +err: + return 1; +} + +/* + * Test single timeout waking us up + */ +static int test_single_timeout(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + unsigned long long exp; + struct __kernel_timespec ts; + struct timeval tv; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + msec_to_ts(&ts, TIMEOUT_MSEC); + io_uring_prep_timeout(sqe, &ts, 0, 0); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + gettimeofday(&tv, NULL); + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret == -EINVAL) { + fprintf(stdout, "%s: Timeout not supported, ignored\n", __FUNCTION__); + not_supported = 1; + return 0; + } else if (ret != -ETIME) { + fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret)); + goto err; + } + + exp = mtime_since_now(&tv); + if (exp >= TIMEOUT_MSEC / 2 && exp <= (TIMEOUT_MSEC * 3) / 2) + return 0; + fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp); +err: + return 1; +} + +static int test_single_timeout_remove_notfound(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct __kernel_timespec ts; + int ret, i; + + if (no_modify) + return 0; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + msec_to_ts(&ts, TIMEOUT_MSEC); + io_uring_prep_timeout(sqe, &ts, 2, 0); + sqe->user_data = 1; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + io_uring_prep_timeout_remove(sqe, 2, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + /* + * We should get two completions. One is our modify request, which should + * complete with -ENOENT. The other is the timeout that will trigger after + * TIMEOUT_MSEC. + */ + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + if (cqe->user_data == 2) { + if (cqe->res != -ENOENT) { + fprintf(stderr, "%s: modify ret %d, wanted ENOENT\n", __FUNCTION__, cqe->res); + break; + } + } else if (cqe->user_data == 1) { + if (cqe->res != -ETIME) { + fprintf(stderr, "%s: timeout ret %d, wanted -ETIME\n", __FUNCTION__, cqe->res); + break; + } + } + io_uring_cqe_seen(ring, cqe); + } + return 0; +err: + return 1; +} + +static int test_single_timeout_remove(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct __kernel_timespec ts; + int ret, i; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + msec_to_ts(&ts, TIMEOUT_MSEC); + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 1; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + io_uring_prep_timeout_remove(sqe, 1, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + /* + * We should have two completions ready. One is for the original timeout + * request, user_data == 1, that should have a ret of -ECANCELED. The other + * is for our modify request, user_data == 2, that should have a ret of 0. + */ + for (i = 0; i < 2; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + if (no_modify) + goto seen; + if (cqe->res == -EINVAL && cqe->user_data == 2) { + fprintf(stdout, "Timeout modify not supported, ignoring\n"); + no_modify = 1; + goto seen; + } + if (cqe->user_data == 1) { + if (cqe->res != -ECANCELED) { + fprintf(stderr, "%s: timeout ret %d, wanted canceled\n", __FUNCTION__, cqe->res); + break; + } + } else if (cqe->user_data == 2) { + if (cqe->res) { + fprintf(stderr, "%s: modify ret %d, wanted 0\n", __FUNCTION__, cqe->res); + break; + } + } +seen: + io_uring_cqe_seen(ring, cqe); + } + return 0; +err: + return 1; +} + +/* + * Test single absolute timeout waking us up + */ +static int test_single_timeout_abs(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + unsigned long long exp; + struct __kernel_timespec ts; + struct timespec abs_ts; + struct timeval tv; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + clock_gettime(CLOCK_MONOTONIC, &abs_ts); + ts.tv_sec = abs_ts.tv_sec + 1; + ts.tv_nsec = abs_ts.tv_nsec; + io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ABS); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + gettimeofday(&tv, NULL); + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + if (ret == -EINVAL) { + fprintf(stdout, "Absolute timeouts not supported, ignored\n"); + return 0; + } else if (ret != -ETIME) { + fprintf(stderr, "Timeout: %s\n", strerror(-ret)); + goto err; + } + + exp = mtime_since_now(&tv); + if (exp >= 1000 / 2 && exp <= (1000 * 3) / 2) + return 0; + fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp); +err: + return 1; +} + +/* + * Test that timeout is canceled on exit + */ +static int test_single_timeout_exit(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct __kernel_timespec ts; + int ret; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + + msec_to_ts(&ts, 30000); + io_uring_prep_timeout(sqe, &ts, 0, 0); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + io_uring_queue_exit(ring); + return 0; +err: + io_uring_queue_exit(ring); + return 1; +} + +/* + * Test multi timeouts waking us up + */ +static int test_multi_timeout(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct __kernel_timespec ts[2]; + unsigned int timeout[2]; + unsigned long long exp; + struct timeval tv; + int ret, i; + + /* req_1: timeout req, count = 1, time = (TIMEOUT_MSEC * 2) */ + timeout[0] = TIMEOUT_MSEC * 2; + msec_to_ts(&ts[0], timeout[0]); + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts[0], 1, 0); + sqe->user_data = 1; + + /* req_2: timeout req, count = 1, time = TIMEOUT_MSEC */ + timeout[1] = TIMEOUT_MSEC; + msec_to_ts(&ts[1], timeout[1]); + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts[1], 1, 0); + sqe->user_data = 2; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + gettimeofday(&tv, NULL); + for (i = 0; i < 2; i++) { + unsigned int time; + __u64 user_data; + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + + /* + * Both of these two reqs should timeout, but req_2 should + * return before req_1. + */ + switch (i) { + case 0: + user_data = 2; + time = timeout[1]; + break; + case 1: + user_data = 1; + time = timeout[0]; + break; + } + + if (cqe->user_data != user_data) { + fprintf(stderr, "%s: unexpected timeout req %d sequece\n", + __FUNCTION__, i+1); + goto err; + } + if (cqe->res != -ETIME) { + fprintf(stderr, "%s: Req %d timeout: %s\n", + __FUNCTION__, i+1, strerror(cqe->res)); + goto err; + } + exp = mtime_since_now(&tv); + if (exp < time / 2 || exp > (time * 3) / 2) { + fprintf(stderr, "%s: Req %d timeout seems wonky (got %llu)\n", + __FUNCTION__, i+1, exp); + goto err; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test multi timeout req with different count + */ +static int test_multi_timeout_nr(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct __kernel_timespec ts; + int ret, i; + + msec_to_ts(&ts, TIMEOUT_MSEC); + + /* req_1: timeout req, count = 2 */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 2, 0); + sqe->user_data = 1; + + /* req_2: timeout req, count = 1 */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 1, 0); + sqe->user_data = 2; + + /* req_3: nop req */ + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, (void *) 1); + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + /* + * req_2 (count=1) should return without error and req_1 (count=2) + * should timeout. + */ + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + + switch (i) { + case 0: + /* Should be nop req */ + if (io_uring_cqe_get_data(cqe) != (void *) 1) { + fprintf(stderr, "%s: nop not seen as 1 or 2\n", __FUNCTION__); + goto err; + } + break; + case 1: + /* Should be timeout req_2 */ + if (cqe->user_data != 2) { + fprintf(stderr, "%s: unexpected timeout req %d sequece\n", + __FUNCTION__, i+1); + goto err; + } + if (cqe->res < 0) { + fprintf(stderr, "%s: Req %d res %d\n", + __FUNCTION__, i+1, cqe->res); + goto err; + } + break; + case 2: + /* Should be timeout req_1 */ + if (cqe->user_data != 1) { + fprintf(stderr, "%s: unexpected timeout req %d sequece\n", + __FUNCTION__, i+1); + goto err; + } + if (cqe->res != -ETIME) { + fprintf(stderr, "%s: Req %d timeout: %s\n", + __FUNCTION__, i+1, strerror(cqe->res)); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test timeout timeout timeout + */ +static int test_timeout_flags1(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct __kernel_timespec ts; + int ret, i; + + msec_to_ts(&ts, TIMEOUT_MSEC); + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 1; + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 2; + sqe->flags |= IOSQE_IO_DRAIN; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + + if (cqe->res == -EINVAL) { + if (!i) + fprintf(stdout, "%s: timeout flags not supported\n", + __FUNCTION__); + io_uring_cqe_seen(ring, cqe); + continue; + } + + switch (cqe->user_data) { + case 1: + if (cqe->res != -ETIME) { + fprintf(stderr, "%s: got %d, wanted %d\n", + __FUNCTION__, cqe->res, -ETIME); + goto err; + } + break; + case 2: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "%s: got %d, wanted %d\n", + __FUNCTION__, cqe->res, + -ECANCELED); + goto err; + } + break; + case 3: + if (cqe->res != -ETIME) { + fprintf(stderr, "%s: got %d, wanted %d\n", + __FUNCTION__, cqe->res, -ETIME); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test timeout timeout timeout + */ +static int test_timeout_flags2(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct __kernel_timespec ts; + int ret, i; + + msec_to_ts(&ts, TIMEOUT_MSEC); + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 1; + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 2; + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + + if (cqe->res == -EINVAL) { + if (!i) + fprintf(stdout, "%s: timeout flags not supported\n", + __FUNCTION__); + io_uring_cqe_seen(ring, cqe); + continue; + } + + switch (cqe->user_data) { + case 1: + if (cqe->res != -ETIME) { + fprintf(stderr, "%s: got %d, wanted %d\n", + __FUNCTION__, cqe->res, -ETIME); + goto err; + } + break; + case 2: + case 3: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "%s: got %d, wanted %d\n", + __FUNCTION__, cqe->res, + -ECANCELED); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + +/* + * Test timeout timeout timeout + */ +static int test_timeout_flags3(struct io_uring *ring) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct __kernel_timespec ts; + int ret, i; + + msec_to_ts(&ts, TIMEOUT_MSEC); + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 1; + sqe->flags |= IOSQE_IO_DRAIN; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 2; + sqe->flags |= IOSQE_IO_LINK; + + sqe = io_uring_get_sqe(ring); + if (!sqe) { + fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); + goto err; + } + io_uring_prep_timeout(sqe, &ts, 0, 0); + sqe->user_data = 3; + + ret = io_uring_submit(ring); + if (ret <= 0) { + fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); + goto err; + } + + for (i = 0; i < 3; i++) { + ret = io_uring_wait_cqe(ring, &cqe); + if (ret < 0) { + fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); + goto err; + } + + if (cqe->res == -EINVAL) { + if (!i) + fprintf(stdout, "%s: timeout flags not supported\n", + __FUNCTION__); + io_uring_cqe_seen(ring, cqe); + continue; + } + + switch (cqe->user_data) { + case 1: + case 2: + if (cqe->res != -ETIME) { + fprintf(stderr, "%s: got %d, wanted %d\n", + __FUNCTION__, cqe->res, -ETIME); + goto err; + } + break; + case 3: + if (cqe->res != -ECANCELED) { + fprintf(stderr, "%s: got %d, wanted %d\n", + __FUNCTION__, cqe->res, + -ECANCELED); + goto err; + } + break; + } + io_uring_cqe_seen(ring, cqe); + } + + return 0; +err: + return 1; +} + + +int main(int argc, char *argv[]) +{ + struct io_uring ring; + int ret; + + if (argc > 1) + return 0; + + ret = io_uring_queue_init(8, &ring, 0); + if (ret) { + fprintf(stderr, "ring setup failed\n"); + return 1; + } + + ret = test_single_timeout(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout failed\n"); + return ret; + } + if (not_supported) + return 0; + + ret = test_multi_timeout(&ring); + if (ret) { + fprintf(stderr, "test_multi_timeout failed\n"); + return ret; + } + + ret = test_single_timeout_abs(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout_abs failed\n"); + return ret; + } + + ret = test_single_timeout_remove(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout_remove failed\n"); + return ret; + } + + ret = test_single_timeout_remove_notfound(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout_remove_notfound failed\n"); + return ret; + } + + ret = test_single_timeout_many(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout_many failed\n"); + return ret; + } + + ret = test_single_timeout_nr(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout_nr failed\n"); + return ret; + } + + ret = test_multi_timeout_nr(&ring); + if (ret) { + fprintf(stderr, "test_multi_timeout_nr failed\n"); + return ret; + } + + ret = test_timeout_flags1(&ring); + if (ret) { + fprintf(stderr, "test_timeout_flags1 failed\n"); + return ret; + } + + ret = test_timeout_flags2(&ring); + if (ret) { + fprintf(stderr, "test_timeout_flags2 failed\n"); + return ret; + } + + ret = test_timeout_flags3(&ring); + if (ret) { + fprintf(stderr, "test_timeout_flags3 failed\n"); + return ret; + } + + ret = test_single_timeout_wait(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout_wait failed\n"); + return ret; + } + + /* + * this test must go last, it kills the ring + */ + ret = test_single_timeout_exit(&ring); + if (ret) { + fprintf(stderr, "test_single_timeout_exit failed\n"); + return ret; + } + + return 0; +}